#### Goodness of Fit Chi-Squared Test:
This test is used to determine whether the observed frequency distribution of a variable matches a theoretically expected distribution.

In [12]:
import numpy as np
from scipy.stats import chisquare
import scipy.stats as stats

In [13]:
# Dummy data
observed = np.array([25, 20, 15, 10, 30])
expected = np.array([20, 20, 20, 20, 20])  # Expected equal frequencies

In [14]:
# Degrees of freedom (k - 1)
df = len(observed) - 1
df

4

In [15]:
# Critical value for 95% significance level
critical_value = stats.chi2.ppf(0.95, df)

print("Degrees of freedom:", df)
print("Critical value:", critical_value)

Degrees of freedom: 4
Critical value: 9.487729036781154


In [16]:
# Perform chi-squared test
chi2, p = chisquare(observed, expected)


In [17]:
print("Chi-squared statistic:", chi2)
print("p-value:", p)

Chi-squared statistic: 12.5
p-value: 0.013995792487650894


#### Chi-Squared Test for Independence

In [22]:
from scipy.stats import chi2_contingency

In [23]:
# Dummy data
observed = np.array([[10, 20, 30],
                     [15, 25, 35]])

In [24]:
# Degrees of freedom
df = (observed.shape[0] - 1) * (observed.shape[1] - 1)

In [25]:
# Critical value for 95% significance level
critical_value = stats.chi2.ppf(0.95, df)

print("Degrees of freedom:", df)
print("Critical value:", critical_value)

Degrees of freedom: 2
Critical value: 5.991464547107979


In [26]:
# Perform chi-squared test for independence
chi2, p, dof, expected = chi2_contingency(observed)


In [27]:
print("Null hypothesis: There is no association between the two categorical variables.")
print("Chi-squared statistic:", chi2)
print("p-value:", p)
print("Degrees of freedom:", dof)
print("Expected frequencies table:\n", expected)
print("Critical value:", critical_value)

Null hypothesis: There is no association between the two categorical variables.
Chi-squared statistic: 0.27692307692307694
p-value: 0.870696738961232
Degrees of freedom: 2
Expected frequencies table:
 [[11.11111111 20.         28.88888889]
 [13.88888889 25.         36.11111111]]
Critical value: 5.991464547107979


#### Chi-Squared Test for Homogeneity

In [28]:
# Dummy data
observed = np.array([[10, 20, 30],
                     [15, 25, 35],
                     [5, 10, 15]])

In [29]:
# Degrees of freedom
df = (observed.shape[0] - 1) * (observed.shape[1] - 1)

In [30]:
# Critical value for 95% significance level
critical_value = stats.chi2.ppf(0.95, df)

print("Degrees of freedom:", df)
print("Critical value:", critical_value)

Degrees of freedom: 4
Critical value: 9.487729036781154


In [31]:
print("Null hypothesis: The distributions of the categorical variables are homogeneous across populations.")
print("Chi-squared statistic:", chi2)
print("p-value:", p)
print("Degrees of freedom:", dof)
print("Expected frequencies table:\n", expected)
print("Critical value:", critical_value)

Null hypothesis: The distributions of the categorical variables are homogeneous across populations.
Chi-squared statistic: 0.27692307692307694
p-value: 0.870696738961232
Degrees of freedom: 2
Expected frequencies table:
 [[11.11111111 20.         28.88888889]
 [13.88888889 25.         36.11111111]]
Critical value: 9.487729036781154
