In [None]:
%pip install pandas openpyxl scipy==1.15.3 statsmodels

In [3]:
import pandas as pd

df = pd.read_excel('data.xlsx')
df.head()

Unnamed: 0,sample_id,treatment,pH,temperature,relative_humidity,grade
0,1,A,5.7,35.6,90,1
1,2,B,6.0,36.0,89,1
2,3,A,5.6,36.1,88,0
3,4,A,5.7,35.3,92,2
4,5,B,6.3,36.2,85,0


In [4]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

model = ols('grade ~ C(treatment)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(treatment),0.1,1.0,0.117647,0.740439
Residual,6.8,8.0,,


In [5]:
df['humidity_cat'] = pd.cut(df['relative_humidity'], bins=[79, 85, 90, 95], labels=['Low', 'Medium', 'High'])

model = ols('grade ~ C(treatment) + C(humidity_cat) + C(treatment):C(humidity_cat)', data=df).fit()
anova_table2 = sm.stats.anova_lm(model, typ=2)
anova_table2

Unnamed: 0,sum_sq,df,F,PR(>F)
C(treatment),0.030047,1.0,0.040063,0.851123
C(humidity_cat),3.36338,2.0,2.242254,0.222263
C(treatment):C(humidity_cat),0.43662,2.0,0.29108,0.762043
Residual,3.0,4.0,,


In [6]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd

tukey = pairwise_tukeyhsd(endog=df['grade'], groups=df['treatment'], alpha=0.05)
tukey.summary()

group1,group2,meandiff,p-adj,lower,upper,reject
A,B,0.2,0.7404,-1.1446,1.5446,False


In [7]:
import pandas as pd
import numpy as np
from itertools import combinations
from scipy.stats import studentized_range
import statsmodels.api as sm
from statsmodels.formula.api import ols


# --- One-Way ANOVA ---
model = ols('grade ~ C(treatment)', data=df).fit()  # <--- Grade vs Treatment
anova = sm.stats.anova_lm(model, typ=2)

print("ANOVA:\n", anova)

# --- Step 1: Group Means ---
groups = df.groupby('treatment')['grade']
means = groups.mean()
n_groups = groups.size()
group_labels = means.index.tolist()
sorted_means = means.sort_values(ascending=False)

print("\nGroup means:\n", sorted_means)

# --- Step 2: Mean Square Error (MSE) from ANOVA ---
mse = anova['sum_sq']['Residual'] / anova['df']['Residual']
df_error = anova['df']['Residual']

# --- Step 3: Perform pairwise comparisons ---
alpha = 0.05
results = []

for (i, g1), (j, g2) in combinations(enumerate(sorted_means.index), 2):
    m1, m2 = sorted_means[g1], sorted_means[g2]
    n1, n2 = n_groups[g1], n_groups[g2]
    k = abs(i - j) + 1  # Number of groups between, used in Duncan q
    q_crit = studentized_range.ppf(1 - alpha, k, df_error)
    se = np.sqrt(mse * (1 / n1 + 1 / n2))
    diff = abs(m1 - m2)
    lsd = q_crit * se / np.sqrt(2)
    sig = diff > lsd
    results.append({
        'Comparison': f"{g1} vs {g2}",
        'Mean Diff': round(diff, 3),
        'Critical Range': round(lsd, 3),
        'Significant': 'Yes' if sig else 'No'
    })

# --- Step 4: Display Results ---
print("\nDuncan’s Multiple Range Test Results:")
duncan_df = pd.DataFrame(results)
print(duncan_df)


ANOVA:
               sum_sq   df         F    PR(>F)
C(treatment)     0.1  1.0  0.117647  0.740439
Residual         6.8  8.0       NaN       NaN

Group means:
 treatment
B    1.0
A    0.8
Name: grade, dtype: float64

Duncan’s Multiple Range Test Results:
  Comparison  Mean Diff  Critical Range Significant
0     B vs A        0.2           1.345          No


In [8]:
from scipy.stats import ttest_ind

groupA = df[df['treatment'] == 'A']['grade']
groupB = df[df['treatment'] == 'B']['grade']

t_stat, p_val = ttest_ind(groupA, groupB)
print(f't = {t_stat:.3f}, p = {p_val:.3f}')

t = -0.343, p = 0.740
