Test Chi-Squared

In [3]:
import numpy as np
import scipy.stats as stats

In [4]:
# Observed frequencies in each category
observed_frequencies = np.array([30, 40, 50, 80])

# Expected probabilities for each category under H0
expected_probabilities = np.array([0.2, 0.3, 0.3, 0.2])

In [11]:
# Total number of observations
n = np.sum(observed_frequencies)
print("N:", n)

# Expected frequencies under H0
expected_frequencies = n * expected_probabilities
print(expected_frequencies)

# Calculate the Chi-squared statistic by hand
chi_squared_statistic = np.sum((observed_frequencies - expected_frequencies) ** 2 / expected_frequencies)
print(f"Chi-squared statistic (by hand): {round(chi_squared_statistic, 3)}")

N: 200
[40. 60. 60. 40.]
Chi-squared statistic (by hand): 50.833


In [12]:
# Degrees of freedom (number of categories - 1)
degrees_of_freedom = len(observed_frequencies) - 1

# Get the critical value at a significance level (e.g., alpha = 0.05)
#alpha = 0.05
#critical_value = stats.chi2.ppf(1 - alpha, degrees_of_freedom)

# Calculate the p-value
p_value = 1 - stats.chi2.cdf(chi_squared_statistic, degrees_of_freedom)

#print(f"Critical value at alpha = {alpha}: {critical_value}")
print(f"P-value: {p_value}")

P-value: 5.308853356922327e-11


In [6]:
# Perform the Chi-squared test using scipy
chi2_statistic_scipy, p_value_scipy = stats.chisquare(f_obs=observed_frequencies, f_exp=expected_frequencies)

print(f"Chi-squared statistic (scipy): {chi2_statistic_scipy}")
print(f"P-value (scipy): {p_value_scipy}")

Chi-squared statistic (by hand): 50.833333333333336
Critical value at alpha = 0.05: 7.814727903251179
P-value: 5.308853356922327e-11
Chi-squared statistic (scipy): 50.833333333333336
P-value (scipy): 5.308854866224734e-11
