In [1]:
import numpy as np
from scipy import stats

In [2]:
# Observed frequencies in a 2x4 contingency table
# Example: Responses from two groups across four categories
observed = np.array([
    [40, 45, 25, 10],  # Group 1
    [35, 30, 20, 30]   # Group 2
])

In [3]:
# Step 1: Row sums and column sums
row_sums = observed.sum(axis=1)       # Sum of each row
col_sums = observed.sum(axis=0)       # Sum of each column
total = observed.sum()                # Total sum of all observations

In [4]:
print("Row sums:", row_sums)
print("Column sums:", col_sums)
print("Total:", total)

Row sums: [120 115]
Column sums: [75 75 45 40]
Total: 235


In [5]:
# Step 2: Calculate expected frequencies
expected = np.outer(row_sums, col_sums) / total
print("Expected frequencies:\n", expected)

Expected frequencies:
 [[38.29787234 38.29787234 22.9787234  20.42553191]
 [36.70212766 36.70212766 22.0212766  19.57446809]]


In [6]:
# Step 3: Calculate Chi-Square statistic
chi_square_stat = np.sum((observed - expected)**2 / expected)
print("Calculated Chi-Square value:", round(chi_square_stat, 3))

Calculated Chi-Square value: 13.789


In [7]:
# Step 4: Degrees of freedom
df = (observed.shape[0] - 1) * (observed.shape[1] - 1)
print("Degrees of freedom:", df)

Degrees of freedom: 3


In [8]:
# Step 5: Critical Chi-Square value at alpha = 0.05
alpha = 0.05
chi_square_critical = stats.chi2.ppf(1 - alpha, df)
print("Critical Chi-Square value (alpha=0.05):", round(chi_square_critical, 3))

Critical Chi-Square value (alpha=0.05): 7.815


In [9]:
# Step 6: Decision
if chi_square_stat > chi_square_critical:
    print("Reject the null hypothesis (H0): There is an association between the groups and categories")
else:
    print("Fail to reject the null hypothesis (H0): No evidence of association")

Reject the null hypothesis (H0): There is an association between the groups and categories
