In [None]:
import numpy as np
import scipy.stats as stats

# Example data (DV and IV)
np.random.seed(42)

# Generate example data for three groups
group1 = np.random.normal(10, 2, 20)
group2 = np.random.normal(12, 2, 20)
group3 = np.random.normal(15, 2, 20)

# Combine data into a single array
data = np.concatenate([group1, group2, group3])

# Define corresponding group labels
labels = ['Group1'] * 20 + ['Group2'] * 20 + ['Group3'] * 20

# Calculate overall mean of the data
overall_mean = np.mean(data)

# Calculate group means
group_means = []
for group in [group1, group2, group3]:
    group_means.append(np.mean(group))

# Calculate SST (Total Sum of Squares)
SST = np.sum((data - overall_mean)**2)

# Calculate SSE (Explained Sum of Squares)
SSE = np.sum([len(group) * (mean - overall_mean)**2 for group, mean in zip([group1, group2, group3], group_means)])

# Calculate SSR (Residual Sum of Squares)
SSR = np.sum([(x - group_means[labels[i]])**2 for i, x in enumerate(data)])

# Output the results
print(f"Total Sum of Squares (SST): {SST}")
print(f"Explained Sum of Squares (SSE): {SSE}")
print(f"Residual Sum of Squares (SSR): {SSR}")


In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Example data (DV and two IVs)
np.random.seed(42)

# Generate example data for two factors (IVs)
factor_A = np.repeat(['A1', 'A2', 'A3'], 10)  # Factor A with 3 levels
factor_B = np.tile(['B1', 'B2', 'B3'], 10)    # Factor B with 3 levels
data = pd.DataFrame({
    'Factor_A': factor_A,
    'Factor_B': factor_B,
    'DV': np.random.normal(50, 10, 30)  # Example DV data
})

# Fit the ANOVA model
model = ols('DV ~ C(Factor_A) + C(Factor_B) + C(Factor_A):C(Factor_B)', data=data).fit()

# Extract ANOVA table
anova_table = sm.stats.anova_lm(model, typ=2)

# Print ANOVA table to see main effects and interaction effect
print(anova_table)

# Extract main effects and interaction effect
main_effect_A = anova_table.loc['C(Factor_A)', 'sum_sq'] / anova_table.loc['C(Factor_A)', 'df']
main_effect_B = anova_table.loc['C(Factor_B)', 'sum_sq'] / anova_table.loc['C(Factor_B)', 'df']
interaction_effect = anova_table.loc['C(Factor_A):C(Factor_B)', 'sum_sq'] / anova_table.loc['C(Factor_A):C(Factor_B)', 'df']

# Output the results
print(f"Main effect of Factor A: {main_effect_A}")
print(f"Main effect of Factor B: {main_effect_B}")
print(f"Interaction effect: {interaction_effect}")


                              sum_sq    df         F    PR(>F)
C(Factor_A)               686.309084   2.0  5.922432  0.009128
C(Factor_B)               218.989778   2.0  1.889749  0.175926
C(Factor_A):C(Factor_B)   144.356529   4.0  0.622855  0.651294
Residual                 1216.771236  21.0       NaN       NaN
Main effect of Factor A: 343.1545419535651
Main effect of Factor B: 109.4948891801507
Interaction effect: 36.089132306337945


In [3]:
import numpy as np
from scipy.stats import f_oneway

# Example data (mean weight loss for each diet)
diet_A = np.random.normal(5, 1, 50)  # Mean weight loss for diet A
diet_B = np.random.normal(4.5, 1.2, 50)  # Mean weight loss for diet B
diet_C = np.random.normal(4.8, 1.1, 50)  # Mean weight loss for diet C

# Perform one-way ANOVA
f_statistic, p_value = f_oneway(diet_A, diet_B, diet_C)

# Print the results
print(f"One-way ANOVA results:")
print(f"F-statistic: {f_statistic}")
print(f"P-value: {p_value}")

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("The one-way ANOVA result is significant, indicating that there are likely significant differences "
          "in mean weight loss between at least two of the diets (A, B, or C).")
else:
    print("The one-way ANOVA result is not significant, indicating that there are no significant differences "
          "in mean weight loss between the diets (A, B, and C).")

One-way ANOVA results:
F-statistic: 2.1998458860662016
P-value: 0.11445565766520953
The one-way ANOVA result is not significant, indicating that there are no significant differences in mean weight loss between the diets (A, B, and C).


In [4]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Example data generation
np.random.seed(0)

# Generate example data
n = 30
software_programs = np.random.choice(['Program A', 'Program B', 'Program C'], n)
experience_levels = np.random.choice(['Novice', 'Experienced'], n)
times = np.random.normal(10, 2, n)  # Example times, normally distributed

# Create DataFrame
data = pd.DataFrame({
    'Software': software_programs,
    'Experience': experience_levels,
    'Time': times
})

# Fit the ANOVA model
model = ols('Time ~ C(Software) + C(Experience) + C(Software):C(Experience)', data=data).fit()

# Print ANOVA table
anova_table = sm.stats.anova_lm(model, typ=2)
print("Two-way ANOVA results:")
print(anova_table)

# Interpret the results
alpha = 0.05
print("\nInterpretation:")
if anova_table.loc['C(Software):C(Experience)', 'PR(>F)'] < alpha:
    print("There is a significant interaction effect between software programs and experience level.")
else:
    print("There is no significant interaction effect between software programs and experience level.")
    
if anova_table.loc['C(Software)', 'PR(>F)'] < alpha:
    print("There is a significant main effect of software programs.")
else:
    print("There is no significant main effect of software programs.")
    
if anova_table.loc['C(Experience)', 'PR(>F)'] < alpha:
    print("There is a significant main effect of experience level.")
else:
    print("There is no significant main effect of experience level.")


Two-way ANOVA results:
                              sum_sq    df         F    PR(>F)
C(Software)                11.141545   2.0  2.113814  0.142706
C(Experience)               2.102143   1.0  0.797652  0.380665
C(Software):C(Experience)   6.013261   2.0  1.140857  0.336272
Residual                   63.249921  24.0       NaN       NaN

Interpretation:
There is no significant interaction effect between software programs and experience level.
There is no significant main effect of software programs.
There is no significant main effect of experience level.


In [5]:
import numpy as np
from scipy.stats import ttest_ind
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Example data (test scores)
np.random.seed(0)

control_scores = np.random.normal(70, 10, 50)   # Control group (traditional method)
experimental_scores = np.random.normal(75, 10, 50)  # Experimental group (new method)

# Perform two-sample t-test
t_statistic, p_value = ttest_ind(control_scores, experimental_scores)

# Print t-test results
print("Two-sample t-test results:")
print(f"T-statistic: {t_statistic}")
print(f"P-value: {p_value}")

# Interpret the t-test results
alpha = 0.05
if p_value < alpha:
    print("The two-sample t-test result is significant, indicating that there is a significant difference "
          "in test scores between the control and experimental groups.")
else:
    print("The two-sample t-test result is not significant, indicating that there is no significant difference "
          "in test scores between the control and experimental groups.")

# Perform Tukey's HSD post-hoc test if t-test is significant
if p_value < alpha:
    data = np.concatenate([control_scores, experimental_scores])
    groups = ['Control'] * 50 + ['Experimental'] * 50
    tukey_results = pairwise_tukeyhsd(data, groups, alpha=0.05)
    print("\nTukey's HSD post-hoc test results:")
    print(tukey_results)



Two-sample t-test results:
T-statistic: -1.6677351961320235
P-value: 0.09856078338184605
The two-sample t-test result is not significant, indicating that there is no significant difference in test scores between the control and experimental groups.


In [6]:
import pandas as pd
import numpy as np
from statsmodels.stats.anova import AnovaRM
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Example data generation
np.random.seed(0)

# Generate example data
days = np.arange(1, 31)
store_A_sales = np.random.normal(1000, 100, 30)  # Example sales for Store A
store_B_sales = np.random.normal(1100, 120, 30)  # Example sales for Store B
store_C_sales = np.random.normal(1050, 110, 30)  # Example sales for Store C

# Create DataFrame
data = pd.DataFrame({
    'Day': np.repeat(days, 3),
    'Store': np.tile(['A', 'B', 'C'], 30),
    'Sales': np.concatenate([store_A_sales, store_B_sales, store_C_sales])
})

# Convert 'Day' and 'Store' to categorical variables
data['Day'] = pd.Categorical(data['Day'])
data['Store'] = pd.Categorical(data['Store'])

# Perform repeated measures ANOVA
anova_rm = AnovaRM(data, 'Sales', 'Day', within=['Store'])
results = anova_rm.fit()

# Print ANOVA table
print("Repeated measures ANOVA results:")
print(results)

# Perform Tukey's HSD post-hoc test if ANOVA is significant
if results.anova_table['Pr > F'][0] < 0.05:  # Check p-value for the 'Store' factor
    tukey_results = pairwise_tukeyhsd(data['Sales'], data['Store'], alpha=0.05)
    print("\nTukey's HSD post-hoc test results:")
    print(tukey_results)


Repeated measures ANOVA results:
               Anova
      F Value Num DF  Den DF Pr > F
-----------------------------------
Store  1.5049 2.0000 58.0000 0.2306

