In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.anova import anova_lm
from scipy import stats

In [None]:
group1 = [11.4, 11.0, 11.3, 9.5]  # 0.25 tsp
group2 = [27.8, 29.2, 26.8, 26.0]  # 0.5 tsp
group3 = [47.6, 47.0, 47.3, 45.5]  # 0.75 tsp
group4 = [61.6, 62.4, 63.0, 63.9]  # 1 tsp

data = pd.DataFrame({
    'RiseHeight': group1 + group2 + group3 + group4,
    'BakingPowder': ['0.25 tsp']*4 + ['0.5 tsp']*4 + ['0.75 tsp']*4 + ['1 tsp']*4
})
data

In [None]:
# Fit the model (ANOVA)
model = smf.ols('RiseHeight ~ BakingPowder', data=data).fit()

# Perform ANOVA
anova_results = anova_lm(model)

# Output the ANOVA table
anova_results

In [None]:
n = 4

# Treatment means
means = np.array([np.array(group1).mean(), np.array(group2).mean(), np.array(group3).mean(), np.array(group4).mean()])

# Contrast coefficients for linear trend
c = np.array([-3, -1, 1, 3])

# Compute contrast value
L = np.sum(c * means)

# Compute sum of squares for the contrast
SS_contrast = (L**2) / (np.sum(c**2) / n)

# Mean square for the contrast (df=1)
MS_contrast = SS_contrast

# Residual sum of squares from previous ANOVA
SS_residual = 13.4875
df_residual = 12

# Mean square error
MSE = SS_residual / df_residual

# F-statistic for the contrast
F = MS_contrast / MSE

# p-value
p_value = 1 - stats.f.cdf(F, 1, df_residual)

print("Linear contrast L =", L)
print("SS_contrast =", SS_contrast)
print("F-statistic =", F)
print("p-value =", p_value)

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Data
data = {
    'Rise': [11.4, 11.0, 11.3, 9.5,
             27.8, 29.2, 26.8, 26.0,
             47.6, 47.0, 47.3, 45.5,
             61.6, 62.4, 63.0, 63.9],
    'BakingPowder': ['0.25','0.25','0.25','0.25',
                     '0.5','0.5','0.5','0.5',
                     '0.75','0.75','0.75','0.75',
                     '1','1','1','1']
}

df = pd.DataFrame(data)

# Convert BakingPowder to categorical with numerical ordering
df['BakingPowder_num'] = df['BakingPowder'].map({'0.25':1, '0.5':2, '0.75':3, '1':4})

# Fit the ANOVA model
model = ols('Rise ~ C(BakingPowder)', data=df).fit()
anova_table = sm.stats.anova_lm(model)
print("ANOVA Table:\n", anova_table)

# Linear contrast coefficients for 4 levels: -3, -1, 1, 3
contrast = np.array([-3, -1, 1, 3])

# Use statsmodels to test the linear contrast
from statsmodels.stats.contrast import Contrast

# Create the contrast matrix
contrast_matrix = np.zeros((1,4))
contrast_matrix[0,:] = contrast

# Get the model parameters (intercept + 3 dummy variables)
# Note: statsmodels uses treatment coding by default
# So we need to adjust the contrast for dummy coding
# Alternatively, use the numeric variable directly for linear trend

# Fit a regression using the numeric coding
model_linear = ols('Rise ~ BakingPowder_num', data=df).fit()
print("\nLinear trend model summary:\n", model_linear.summary())

# Test if slope is zero (linear trend)
F_linear = model_linear.fvalue
p_linear = model_linear.f_pvalue
print("\nF-statistic for linear trend:", F_linear)
print("p-value for linear trend:", p_linear)


In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Create the dataset
data = {
    'SpearLength': [
        94.7, 96.1, 86.5, 98.5, 94.9,     # Control
        89.9, 94.0, 99.1, 92.8, 99.4,     # IAA
        96.8, 87.8, 89.1, 91.1, 89.4,     # ABA
        99.1, 95.3, 94.6, 93.1, 95.7,     # GA3
        104.4, 98.9, 98.9, 106.5, 104.8   # CPPU
    ],
    'Treatment': [
        'Control']*5 + ['IAA']*5 + ['ABA']*5 + ['GA3']*5 + ['CPPU']*5
}

df = pd.DataFrame(data)

# Fit the linear model
model = ols('SpearLength ~ C(Treatment)', data=df).fit()

# Perform ANOVA
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)


In [None]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Perform Tukey HSD
tukey = pairwise_tukeyhsd(endog=df['SpearLength'], 
                          groups=df['Treatment'], 
                          alpha=0.01)

print(tukey)

In [None]:
import pandas as pd
from scipy.stats import kruskal

# Data
control = [94.7, 96.1, 86.5, 98.5, 94.9]
IAA     = [89.9, 94.0, 99.1, 92.8, 99.4]
ABA     = [96.8, 87.8, 89.1, 91.1, 89.4]
GA3     = [99.1, 95.3, 94.6, 93.1, 95.7]
CPPU    = [104.4, 98.9, 98.9, 106.5, 104.8]

# Perform Kruskal-Wallis H test
H, p = kruskal(control, IAA, ABA, GA3, CPPU)
print("Kruskal-Wallis H-statistic:", H)
print("p-value:", p)


In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

# Step 1: Create the dataset
data = {
    'Block': [1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,9,10,10,10],
    'Method': [1,2,3]*10,
    'Score': [73,81,92,76,78,89,72,80,87,74,79,90,76,71,88,
              75,75,86,68,72,88,72,84,87,65,73,81,62,69,78]
}

df = pd.DataFrame(data)

# Step 2: Fit the randomized block model
model = ols('Score ~ C(Method) + C(Block)', data=df).fit()

# Step 3: Extract residuals and fitted values
df['Fitted'] = model.fittedvalues
df['Residuals'] = model.resid

# Step 4: Residuals vs Fitted plot
plt.figure(figsize=(8,5))
sns.scatterplot(x='Fitted', y='Residuals', data=df)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel('Fitted Values')
plt.ylabel('Residuals')
plt.title('Residuals vs Fitted Values')
plt.show();

# Step 5: Normal probability plot of residuals
plt.figure(figsize=(8,5))
sm.qqplot(df['Residuals'], line='45', fit=True)
plt.show();


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming df is defined as before
plt.figure(figsize=(8,6))
sns.lineplot(data=df, x='Method', y='Score', hue='Block', marker='o', palette='tab10')
plt.xlabel('Training Method')
plt.ylabel('Proficiency Score')
plt.legend(title='Block', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()
