In [16]:
import pandas as pd
import numpy as np
from scipy.stats import norm, ttest_1samp, ttest_ind, chi2_contingency, chi2, f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multitest import multipletests

- Z-test used when population variance is known or sample size > 30.
- T-test used when population variance is known or sample size < 30.

**Z-test**

In [None]:
# Example: Test if sample mean differs from population mean
# H0: μ = 100
# H1: μ ≠ 100

sample_mean = 105
population_mean = 100
population_std = 15
sample_size = 100
alpha = 0.05

# Calculate Z-statistic (if Z-score bigger then 1.96 or less then -1.96 then we call here is differs)
z_stat = (sample_mean - population_mean) / (population_std / np.sqrt(sample_size)) # z = (x̄ - μ) / (σ / √n)
print(f"Z-statistic: {z_stat:.4f}")

# calculate p-value (two-tailed test)
p_value = 2 * (1 - norm.cdf(abs(z_stat)))
print(f"P-value: {p_value:.4f}")

# Critical value
z_critical = norm.ppf(1 - alpha / 2)
print(f"Critical value: ±{z_critical:.4f}") 

# Decision
if p_value < alpha:
    print("Reject H0: Sample mean is significantly different from population mean")
else:
    print("Fail to reject H0: No significant difference")

Z-statistic: 3.3333
P-value: 0.0009
Critical value: ±1.9600
Reject H0: Sample mean is significantly different from population mean


**T-test**

In [None]:
# One-sample t-test
# H0: μ = 50
# H1: μ ≠ 50

sample = np.random.normal(55, 10, 30)
# t = (x̄ - μ) / (s / √n)
t_stat, p_value = ttest_1samp(sample, 50)

print(f"t-statistic: {t_stat:.4f}")
print(f"p-value: {p_value:.4f}")

alpha = 0.05
if p_value < alpha:
    print("Reject H0: Mean is significantly different from 50")
else:
    print("Fail to reject H0: No significant difference")
    

# Two-sample t-test
sample1 = np.random.normal(50, 10, 30)
sample2 = np.random.normal(55, 10, 30)
# t = (x̄1 - x̄2) / √((s^2↓1 / n↓1) + (s^2↓2 / n↓2))
t_stat, p_value = ttest_ind(sample1, sample2)

print(f"\nTwo-sample t-test:")
print(f"t-statistic: {t_stat:.4f}")
print(f"p-value: {p_value:.4f}")

t-statistic: 2.9679
p-value: 0.0060
Reject H0: Mean is significantly different from 50

Two-sample t-test:
t-statistic: -2.1360
p-value: 0.0369


**Chi-Square**

*Formula: χ² = Σ [(O - E)² / E]*

- O = Observed value
- E = Expected value

In [12]:
# Test independence between two categorical variables
observed = np.array([[10, 20, 30],
                      [15, 25, 35]])

chi2_stat, p_value, dof, expected = chi2_contingency(observed)

print(f"Chi-square statistic: {chi2_stat:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Degrees of freedom: {dof}")
print(f"\nExpected values:\n{expected}")


# Manual calculation
chi2_manual = np.sum((observed - expected) ** 2 / expected)
print(f"\nManual calculation: {chi2_manual:.4f}")

# Critical value
alpha = 0.05
chi2_critical = chi2.ppf(1 - alpha, dof)
print(f"Critical value: {chi2_critical:.4f}")

if p_value < alpha:
    print("\nReject H0: Variables are NOT independent")
else:
    print("\nFail to reject H0: Variables are independent")

Chi-square statistic: 0.2769
P-value: 0.8707
Degrees of freedom: 2

Expected values:
[[11.11111111 20.         28.88888889]
 [13.88888889 25.         36.11111111]]

Manual calculation: 0.2769
Critical value: 5.9915

Fail to reject H0: Variables are independent


**ANOVA (Analysis of Variance)**

- Based on F-statistic: F = Variance between groups / Variance within groups

In [13]:
# Example: Compare test scores across three teaching methods
method1 = np.random.normal(75, 10, 30)
method2 = np.random.normal(80, 10, 30)
method3 = np.random.normal(85, 10, 30)

# One-way ANOVA
f_stat, p_value = f_oneway(method1, method2, method3)

print(f"F-statistic: {f_stat:.4f}")
print(f"P-value: {p_value:.4f}")

alpha = 0.05
if p_value < alpha:
    print("Reject H0: At least one group mean is significantly different")
else:
    print("Fail to reject H0: No significant difference between groups")

F-statistic: 7.5692
P-value: 0.0009
Reject H0: At least one group mean is significantly different


**Post-hoc Tests**

In [14]:
# Pairwise comparisons (if ANOVA is significant)
# Note: In practice, use Tukey's HSD or Bonferroni correction
p12 = ttest_ind(method1, method2)[1]
p13 = ttest_ind(method1, method3)[1]
p23 = ttest_ind(method2, method3)[1]

print(f"\nPairwise comparisons (p-values):")
print(f"Method 1 vs Method 2: {p12:.4f}")
print(f"Method 1 vs Method 3: {p13:.4f}")
print(f"Method 2 vs Method 3: {p23:.4f}")


Pairwise comparisons (p-values):
Method 1 vs Method 2: 0.0622
Method 1 vs Method 3: 0.0002
Method 2 vs Method 3: 0.0521


**Tukey's HSD(Important)**

In [21]:
np.random.seed(42)
method1 = np.random.normal(75, 5, 30)
method2 = np.random.normal(80, 5, 30)
method3 = np.random.normal(85, 5, 30)

all_data = [method1, method2, method3]

df = pd.DataFrame({
    'score': np.concatenate([method1, method2, method3]),
    'method': ['Method1']*30 + ['Method2']*30 + ['Method3']*30
})

# Tukey's HSD
tukey = pairwise_tukeyhsd(endog=df['score'], groups=df['method'], alpha=0.05)

print("--- Tukey's HSD Results ---")
print(tukey)

--- Tukey's HSD Results ---
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1  group2 meandiff p-adj  lower   upper  reject
-----------------------------------------------------
Method1 Method2   5.3349 0.0001 2.4357  8.2341   True
Method1 Method3  11.0052    0.0  8.106 13.9044   True
Method2 Method3   5.6702    0.0  2.771  8.5694   True
-----------------------------------------------------


**Bonferroni Correction**

In [22]:
p_values = []
pairs = []

# Method 1 vs 2
t, p = ttest_ind(method1, method2)
p_values.append(p)
pairs.append("Method1 vs Method2")

# Method 1 vs 3
t, p = ttest_ind(method1, method3)
p_values.append(p)
pairs.append("Method1 vs Method3")

# Method 2 vs 3
t, p = ttest_ind(method2, method3)
p_values.append(p)
pairs.append("Method2 vs Method3")

# Bonferroni Correction
reject, p_adjusted, _, _ = multipletests(p_values, alpha=0.05, method='bonferroni')

print("\n--- Bonferroni Correction Results ---")
for i in range(len(pairs)):
    print(f"{pairs[i]}:")
    print(f"  Original P-value: {p_values[i]:.4f}")
    print(f"  Adjusted P-value: {p_adjusted[i]:.4f}")
    print(f"  Significant (Reject H0)? {reject[i]}")


--- Bonferroni Correction Results ---
Method1 vs Method2:
  Original P-value: 0.0000
  Adjusted P-value: 0.0001
  Significant (Reject H0)? True
Method1 vs Method3:
  Original P-value: 0.0000
  Adjusted P-value: 0.0000
  Significant (Reject H0)? True
Method2 vs Method3:
  Original P-value: 0.0000
  Adjusted P-value: 0.0001
  Significant (Reject H0)? True
