## Statistical Tests (t-test, ANOVA, chi-square)

In [3]:
!pip install numpy pandas scipy statsmodels --break-system-packages

Defaulting to user installation because normal site-packages is not writeable
Collecting pandas
  Downloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
Collecting statsmodels
  Downloading statsmodels-0.14.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (9.5 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting patsy>=0.5.6 (from statsmodels)
  Downloading patsy-1.0.2-py2.py3-none-any.whl.metadata (3.6 kB)
Downloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (12.4 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m7.2 MB/s[0m  [33m0:00:01[0m7.4 MB/s[0m eta [36m0:00:01[0m01[0m
[?25hDownloading statsmodels-0.14.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (10.4 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━

In [4]:
import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Sample data for two-sample t-test
group1 = np.random.rand(20) * 10
group2 = np.random.rand(25) * 10 + 2  # Slightly different mean

# two-sample t-test
t_stat, p_value = stats.ttest_ind(group1, group2, equal_var=False)
print(f"Two-sample t-test: t-statistic = {t_stat:.3f}, p-value = {p_value:.3f}")

# Sample data for chi-square test of independence
contingency_table = np.array([[10, 20], [15, 5]])

# chi-square test of independence
chi2, p, dof, expected = stats.chi2_contingency(contingency_table)
print(f"\nChi-square test: chi2 = {chi2:.3f}, p-value = {p:.3f}, dof = {dof}")
print(f"Expected frequencies:\n{expected}")

# Sample data for one-way ANOVA
data = {
    'y': np.concatenate([
        np.random.rand(10) * 5,
        np.random.rand(10) * 5 + 2,
        np.random.rand(10) * 5 + 1
    ]),
    'group': ['A'] * 10 + ['B'] * 10 + ['C'] * 10
}
df = pd.DataFrame(data)

# one-way ANOVA with statsmodels
model = smf.ols('y ~ C(group)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(f"\nOne-way ANOVA:\n{anova_table}")

Two-sample t-test: t-statistic = -1.287, p-value = 0.205

Chi-square test: chi2 = 6.750, p-value = 0.009, dof = 1
Expected frequencies:
[[15. 15.]
 [10. 10.]]

One-way ANOVA:
             sum_sq    df         F    PR(>F)
C(group)  27.894763   2.0  7.460603  0.002634
Residual  50.475717  27.0       NaN       NaN
