## Chi-square Hypothesis Test

In [1]:
import scipy.stats as stats
import numpy as np

In [2]:
observed = np.array([[50,80],[30,50],[40,50]])

In [3]:
observed

array([[50, 80],
       [30, 50],
       [40, 50]])

In [6]:
stats.chi2_contingency(observed)

Chi2ContingencyResult(statistic=np.float64(1.0772792022792022), pvalue=np.float64(0.5835415619187718), dof=2, expected_freq=array([[52., 78.],
       [32., 48.],
       [36., 54.]]))

In [7]:
i, p, dof, expected = stats.chi2_contingency(observed)

In [8]:
expected

array([[52., 78.],
       [32., 48.],
       [36., 54.]])

In [11]:
p

np.float64(0.5835415619187718)

In [9]:
p1 = .05

In [12]:
if p < p1:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")
# The p-value is less than .05, so we reject the null hypothesis
# and conclude that there is a significant difference in the proportions of
# the two groups.

Fail to reject the null hypothesis


## Anova Hypothesis Test (acceptance of variance test)

In [13]:
import numpy as np
import pandas as pd
from scipy import stats

# Given data
fertilizer_A = np.array([55, 60, 52, 58, 62])
fertilizer_B = np.array([65, 70, 68, 66, 72])
fertilizer_C = np.array([75, 78, 74, 76, 80])

# Step 1: Compute the Group Means
mean_A = np.mean(fertilizer_A)
mean_B = np.mean(fertilizer_B)
mean_C = np.mean(fertilizer_C)

data_combined = np.concatenate([fertilizer_A, fertilizer_B, fertilizer_C])
overall_mean = np.mean(data_combined)

# Step 2: Compute Sum of Squares Between Groups (SSB)
n_A = len(fertilizer_A)
n_B = len(fertilizer_B)
n_C = len(fertilizer_C)

SSB = (n_A * (mean_A - overall_mean) ** 2 +
       n_B * (mean_B - overall_mean) ** 2 +
       n_C * (mean_C - overall_mean) ** 2)

# Step 3: Compute Sum of Squares Within Groups (SSW)
SSW_A = np.sum((fertilizer_A - mean_A) ** 2)
SSW_B = np.sum((fertilizer_B - mean_B) ** 2)
SSW_C = np.sum((fertilizer_C - mean_C) ** 2)
SSW = SSW_A + SSW_B + SSW_C

# Step 4: Compute Total Sum of Squares (SST)
SST = SSB + SSW

# Step 5: Compute Mean Squares (MSB and MSW)
k = 3  # Number of groups (fertilizers)
N = len(data_combined)  # Total number of observations

MSB = SSB / (k - 1)
MSW = SSW / (N - k)

# Step 6: Compute F-statistic
F_statistic = MSB / MSW

# Step 7: Compute p-value
p_value = 1 - stats.f.cdf(F_statistic, k - 1, N - k)

# Organizing the results into a readable DataFrame
anova_manual_results = pd.DataFrame({
    "Sum of Squares": [SSB, SSW, SST],
    "Degrees of Freedom": [k - 1, N - k, N - 1],
    "Mean Squares": [MSB, MSW, None],
    "F-Statistic": [F_statistic, None, None],
    "P-Value": [p_value, None, None]
}, index=["Between Groups (SSB)", "Within Groups (SSW)", "Total (SST)"])

# Display the detailed manual ANOVA calculation results
print(anova_manual_results)

# Final Interpretation
if p_value < 0.05:
    manual_result = "Since the p-value is < 0.05, we reject the null hypothesis. At least one fertilizer significantly affects plant growth."
else:
    manual_result = "Since the p-value is ≥ 0.05, we fail to reject the null hypothesis. The fertilizers have similar effects on plant growth."

# Display final interpretation
print(manual_result)

                      Sum of Squares  Degrees of Freedom  Mean Squares  \
Between Groups (SSB)           926.4                   2    463.200000   
Within Groups (SSW)            119.2                  12      9.933333   
Total (SST)                   1045.6                  14           NaN   

                      F-Statistic   P-Value  
Between Groups (SSB)    46.630872  0.000002  
Within Groups (SSW)           NaN       NaN  
Total (SST)                   NaN       NaN  
Since the p-value is < 0.05, we reject the null hypothesis. At least one fertilizer significantly affects plant growth.
