In [1]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Create a new dataset
data = {
    'Fertilizer': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'A', 'A', 'B', 'B'],
    'Irrigation': ['Low', 'Low', 'High', 'High', 'Low', 'Low', 'High', 'High', 'Low', 'High', 'Low', 'High'],
    'Yield': [45, 48, 60, 63, 50, 53, 68, 70, 47, 62, 51, 69]
}

df = pd.DataFrame(data)
# Perform Two-Way ANOVA
model = ols('Yield ~ C(Fertilizer) + C(Irrigation) + C(Fertilizer):C(Irrigation)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

# Display the ANOVA table
print("ANOVA Table:")
print(anova_table)

# Interpretation of results
alpha = 0.05
print("\nInterpretation:")
for factor in anova_table.index:
    p_value = anova_table.loc[factor, 'PR(>F)']
    if p_value <= alpha:
        print(f"The effect of {factor} is statistically significant (p = {p_value:.4f}).")
    else:
        print(f"The effect of {factor} is NOT statistically significant (p = {p_value:.4f}).")

ANOVA Table:
                                 sum_sq   df           F        PR(>F)
C(Fertilizer)                108.000000  1.0   54.000000  8.005674e-05
C(Irrigation)                800.333333  1.0  400.166667  4.067255e-08
C(Fertilizer):C(Irrigation)    5.333333  1.0    2.666667  1.411133e-01
Residual                      16.000000  8.0         NaN           NaN

Interpretation:
The effect of C(Fertilizer) is statistically significant (p = 0.0001).
The effect of C(Irrigation) is statistically significant (p = 0.0000).
The effect of C(Fertilizer):C(Irrigation) is NOT statistically significant (p = 0.1411).
The effect of Residual is NOT statistically significant (p = nan).


In [2]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

data = {
    'Exercise': ['Aerobic', 'Aerobic', 'Aerobic', 'Aerobic',
                 'Strength', 'Strength', 'Strength', 'Strength',
                 'Aerobic', 'Aerobic', 'Strength', 'Strength'],
    'Diet': ['Vegetarian', 'Vegetarian', 'Non-Vegetarian', 'Non-Vegetarian',
             'Vegetarian', 'Vegetarian', 'Non-Vegetarian', 'Non-Vegetarian',
             'Vegetarian', 'Non-Vegetarian', 'Vegetarian', 'Non-Vegetarian'],
    'WeightLoss': [3.2, 3.5, 2.8, 3.0, 4.0, 4.2, 3.6, 3.9, 3.3, 2.9,
4.1, 3.8]
}

df=pd.DataFrame(data)
model=ols('WeightLoss~C(Exercise)+C(Diet)+C(Exercise):C(Diet)',data=df).fit()
anova_table=sm.stats.anova_lm(model,typ=2)
print("ANOVA_table")
print(anova_table)

alpha = 0.05
print("\nInterpretation:")
for factor in anova_table.index:
    p_value = anova_table.loc[factor, 'PR(>F)']
    if p_value <=alpha:
        print(f"The effect of {factor} is statistically significant (p = {p_value:.4f}).")
    else:
        print(f"The effect of {factor} is NOT statistically significant (p = {p_value:.4f}).")

ANOVA_table
                       sum_sq   df       F    PR(>F)
C(Exercise)          2.000833  1.0  120.05  0.000004
C(Diet)              0.440833  1.0   26.45  0.000882
C(Exercise):C(Diet)  0.007500  1.0    0.45  0.521227
Residual             0.133333  8.0     NaN       NaN

Interpretation:
The effect of C(Exercise) is statistically significant (p = 0.0000).
The effect of C(Diet) is statistically significant (p = 0.0009).
The effect of C(Exercise):C(Diet) is NOT statistically significant (p = 0.5212).
The effect of Residual is NOT statistically significant (p = nan).


In [3]:
import pandas as pd
from scipy import stats


data = {
    'group': ['email_A'] * 100 + ['email_B'] * 100,
    'clicked': [1, 0, 0, 1, 1, 0, 0, 1, 1, 0] * 10 + [1, 1, 0, 1, 1, 1, 1, 0, 1, 1] * 10
}

df = pd.DataFrame(data)

# Separate the data into control and treatment groups
control = df[df['group'] == 'email_A']
treatment = df[df['group'] == 'email_B']
# Calculate Click-Through Rates (CTR)
control_ctr = control['clicked'].mean()
treatment_ctr = treatment['clicked'].mean()

t_stat, p_value = stats.ttest_ind(control['clicked'], treatment['clicked'], equal_var=False)

print("Metric: Click-Through Rate (CTR)")
print("Control CTR (email_A):", control_ctr)
print("Treatment CTR (email_B):", treatment_ctr)
print("t-statistic:", t_stat)
print("p-value:", p_value)

alpha = 0.05
if p_value < alpha:
    print("Reject H0: The CTR is significantly different between email_A and email_B.")
    if treatment_ctr > control_ctr:
        print("email_B has a higher CTR.")
    else:
        print("email_A has a higher CTR.")
else:
    print("Fail to reject H0: The CTR is not significantly different between email_A and email_B.")

Metric: Click-Through Rate (CTR)
Control CTR (email_A): 0.5
Treatment CTR (email_B): 0.8
t-statistic: -4.66172793254531
p-value: 5.9132881491861184e-06
Reject H0: The CTR is significantly different between email_A and email_B.
email_B has a higher CTR.
