In [2]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Example data
data = {'Group': ['A', 'A', 'B', 'B', 'C', 'C'],
        'Values': [ 10, 13, 16, 12, 15, 8]}
df = pd.DataFrame(data)

#fit one-way Anova model
model = ols('Values~Group', data=df).fit()
anova_table = sm.stats.anova_lm(model)

#Extract sums of squares from the ANOVA table
SST = anova_table['sum_sq'].sum() # Total sum of squares
SSE = anova_table['sum_sq'][0] #Explained sum of squares (b/w-group sum of squares)
SSR = anova_table['sum_sq'][1]# Residual sum of squares (within-group sum of squares)

print("SST:", SST)
print("SSE:",SSE)
print("SSR:", SSR)

SST: 45.33333333333331
SSE: 8.333333333333309
SSR: 37.0


In [6]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Example data
data = {'Group1' : ['A', 'A', 'B', 'B', 'C', 'C'],
        'Group2' : ['X', 'Y', 'X', 'Y', 'X', 'Y'],
        'Values' : [9, 14, 12, 13, 5, 10]}
df = pd.DataFrame(data)

#fit two-way anova model
model = ols('Values ~ Group1 + Group2 + Group1:Group2', data = df).fit()
anova_table = sm.stats.anova_lm(model)

# Extract main effects and interaction effect from the ANOVA table
main_effect1 = anova_table['sum_sq']['Group1']
main_effect2 = anova_table['sum_sq']['Group2']
interaction_effect = anova_table['sum_sq']['Group1:Group2']

print("Main Effect 1:", main_effect1)
print("Main Effect 2:", main_effect2)
print("Interaction Effect :", interaction_effect)
                    

Main Effect 1: 28.00000000000002
Main Effect 2: 20.166666666666647
Interaction Effect : 5.333333333333318


  (model.ssr / model.df_resid))


In [5]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Example data
data = {'Group': ['A', 'A', 'B', 'B', 'C', 'C'],
        'Values': [10, 12, 8, 9, 11, 13]}
df = pd.DataFrame(data)

# Fit one-way ANOVA model
model = ols('Values ~ Group', data=df).fit()
anova_table = sm.stats.anova_lm(model)

# Extract F-statistic and p-value from the ANOVA table
F_statistic = anova_table['F'][0]
p_value = anova_table['PR(>F)'][0]

print("F-statistic:", F_statistic)
print("p-value:", p_value)

# Check significance level at 0.05
if p_value < 0.05:
    print("There are significant differences between the groups.")
else:
    print("There are no significant differences between the groups.")


F-statistic: 4.333333333333333
p-value: 0.1303952278723997
There are no significant differences between the groups.


In [3]:
import numpy as np
import scipy.stats as stats

diet_A = np.array([2, 3, 4, 3, 5, 4, 3, 2, 1, 2, 3, 4, 3, 4, 2, 3, 4, 5, 3, 4,
                   2, 3, 4, 3, 5, 4, 3, 2, 1, 2, 3, 4, 3, 4, 2, 3, 4, 5, 3, 4,
                   2, 3, 4, 3, 5, 4, 3, 2, 1, 2])
diet_B = np.array([1, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 1, 2, 3, 2, 1, 2, 1, 2,
                   3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 1, 2, 3, 2, 1, 2, 1, 2,
                   3, 2, 1, 2, 3, 2, 1, 2, 3])
diet_C = np.array([3, 3, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
                   2, 3, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
                   2, 3, 2, 3, 2, 1, 2, 3])

# prfrom one-way ANOVA
f_statistic, p_value = stats.f_oneway(diet_A, diet_B, diet_C)

print("F-statistic :", f_statistic)
print("p-value:" , p_value)

F-statistic : 28.308230737350062
p-value: 4.282591817456252e-11


In [2]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

data = {
    'Program': ['A', 'A', 'B', 'B', 'C', 'C', 'A', 'B', 'C', 'A', 'A', 'B', 'C', 'C', 'A', 'B', 'B', 'C', 'A', 'B', 'C', 'A', 'A', 'B', 'C', 'C', 'A', 'B', 'C'],
    'Experience': ['Novice', 'Novice', 'Experienced', 'Experienced', 'Novice', 'Novice', 'Experienced', 'Experienced', 'Novice', 'Novice', 'Experienced', 'Experienced', 'Novice', 'Novice', 'Experienced', 'Experienced', 'Novice', 'Novice', 'Experienced', 'Experienced', 'Novice', 'Novice', 'Experienced', 'Experienced', 'Novice', 'Novice', 'Experienced', 'Experienced'],
    'Time': [10, 12, 8, 9, 11, 13, 15, 14, 12, 10, 11, 9, 13, 12, 11, 10, 9, 8, 12, 10, 11, 13, 14, 9, 8, 7, 13, 12, 11]
}

df = pd.DataFrame(data)
model = ols('Time ~ Program + Experience + Program:Experience' , data = df).fit()
anova_table = sm.stats.anova_lm(model)

print(anova_table)
print(model)

ValueError: All arrays must be of the same length

In [1]:
import numpy as np
import scipy.stats as stats

# traditional teaching method
control_group = np.array([78, 85, 72, 90, 82, 88, 76, 80, 85, 79, 84, 77, 81, 87, 83, 79, 86, 75, 80, 88, 84, 76, 79, 83, 85, 82, 81, 87, 80, 84])

# new teaching method
experimental_group = np.array([82, 88, 85, 91, 87, 84, 76, 79, 83, 79, 85, 80, 87, 90, 83, 78, 81, 85, 89, 86, 83, 80, 84, 88, 81, 86, 89, 82, 85, 90])


t_stat, p_value = stats.ttest_ind(control_group, experimental_group)

print("t_statistic : " , t_stat)
print("p_value :", p_value)


t_statistic :  -2.1892700609774267
p_value : 0.03261489978774544


In [1]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Create a DataFrame with the data
data = {
    'Day': list(range(30)),
    'Store A': [100, 120, 110, 90, 95, 105, 115, 100, 105, 110, 115, 105, 100, 120, 110, 95, 105, 115, 100, 105, 110, 115, 105, 100, 120, 110, 90, 95, 105, 115],
    'Store B': [90, 95, 105, 115, 105, 100, 120, 110, 115, 105, 100, 120, 110, 95, 105, 115, 100, 105, 110, 115, 105, 100, 90, 95, 105, 115, 105, 100, 120, 110],
    'Store C': [100, 105, 110, 115, 105, 100, 120, 110, 115, 105, 100, 120, 110, 95, 105, 115, 100, 105, 110, 115, 105, 100, 90, 95, 105, 115, 105, 100, 120, 110]
}

df = pd.DataFrame(data)

# Reshape the data into long format
df_long = pd.melt(df, id_vars='Day', var_name='Store', value_name='Sales')

# Fit the repeated measures ANOVA model
model = ols('Sales ~ Store + C(Day)', data=df_long).fit()
anova_table = sm.stats.anova_lm(model)

# Print the ANOVA table
print(anova_table)
import statsmodels.stats.multicomp as mc

# Perform the Tukey HSD test
tukey_results = mc.MultiComparison(df_long['Sales'], df_long['Store']).tukeyhsd()

# Print the pairwise group comparisons and p-values
print(tukey_results.summary())


            df       sum_sq    mean_sq         F    PR(>F)
Store      2.0    11.666667   5.833333  0.106674  0.898995
C(Day)    29.0  2856.666667  98.505747  1.801366  0.028515
Residual  58.0  3171.666667  54.683908       NaN       NaN
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1  group2 meandiff p-adj   lower  upper  reject
-----------------------------------------------------
Store A Store B  -0.1667 0.9967 -5.2916 4.9583  False
Store A Store C   0.6667 0.9484 -4.4583 5.7916  False
Store B Store C   0.8333 0.9206 -4.2916 5.9583  False
-----------------------------------------------------
