In [4]:
import pandas as pd 
espresso_data = pd.read_csv('EspressoData.csv')
espresso_data

Unnamed: 0,cereme,brewmethod
0,36.64,1
1,39.65,1
2,37.74,1
3,35.96,1
4,38.52,1
5,21.02,1
6,24.81,1
7,34.18,1
8,23.08,1
9,70.84,2


In [5]:
# Descriptive statistics grouped by brew method
descriptive_stats = espresso_data.groupby('brewmethod')['cereme'].describe()
descriptive_stats

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
brewmethod,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,9.0,32.4,7.30006,21.02,24.81,35.96,37.74,39.65
2,9.0,61.3,10.100604,46.68,54.26,62.53,70.84,73.19
3,9.0,39.7,7.700768,32.68,35.35,37.12,40.11,56.19


In [6]:
# Next, I will test the assumptions required for one-way ANOVA: normality, homogeneity of variances, and independence. Let's begin with the normality test
from scipy.stats import normaltest

# Perform normality test (D'Agostino and Pearson's test) for each brew method
normality_results = espresso_data.groupby('brewmethod')['cereme'].apply(normaltest)

# Format results for better readability
normality_results_df = normality_results.apply(pd.Series).rename(columns={0: 'Statistic', 1: 'p-value'})
normality_results_df






Unnamed: 0_level_0,Statistic,p-value
brewmethod,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2.587553,0.274233
2,1.644417,0.43946
3,6.208392,0.044861


In [None]:
"""
Brew Method 1 and Brew Method 2 show no significant deviation from normality (p>0.05).
Brew Method 3 shows a potential deviation from normality (p<0.05).

The results of Bartlett's test indicate no significant violation of the homogeneity of variances assumption (p>0.05).

Finally, I will assume independence of observations based on the study design, as this cannot be tested statistically from the data alone.
"""

In [7]:
from scipy.stats import bartlett

# Perform Bartlett's test for homogeneity of variances
bartlett_stat, bartlett_p = bartlett(
    *[espresso_data[espresso_data['brewmethod'] == method]['cereme'] 
      for method in espresso_data['brewmethod'].unique()]
)

# Display Bartlett's test results
bartlett_results = pd.DataFrame({
    "Test": ["Bartlett's Test"],
    "Statistic": [bartlett_stat],
    "p-value": [bartlett_p]
})
bartlett_results

# The one-way ANOVA test reveals a highly significant difference in crème measurements among the brew methods (p<0.001).


Unnamed: 0,Test,Statistic,p-value
0,Bartlett's Test,0.963309,0.61776


In [8]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Perform Tukey's Honest Significant Difference (HSD) test for post-hoc analysis
tukey_results = pairwise_tukeyhsd(
    endog=espresso_data['cereme'],  # Dependent variable
    groups=espresso_data['brewmethod'],  # Independent variable
    alpha=0.05
)

# Convert Tukey's results to a DataFrame for display
tukey_results_df = pd.DataFrame(data=tukey_results.summary().data[1:], columns=tukey_results.summary().data[0])
tukey_results_df


Unnamed: 0,group1,group2,meandiff,p-adj,lower,upper,reject
0,1,2,28.9,0.0,18.9429,38.8571,True
1,1,3,7.3,0.1811,-2.6571,17.2571,False
2,2,3,-21.6,0.0,-31.5571,-11.6429,True


In [None]:
# The post-hoc Tukey analysis reveals the following:
# 
# Brew Method 1 vs. Brew Method 2: Significant difference (p<0.05).
# Brew Method 1 vs. Brew Method 3: No significant difference (p>0.05).
# Brew Method 2 vs. Brew Method 3: Significant difference (p<0.05).


In [9]:
# size effect 
# Calculate eta-squared for effect size
ss_between = sum(
    espresso_data.groupby('brewmethod').size() * 
    (espresso_data.groupby('brewmethod')['cereme'].mean() - espresso_data['cereme'].mean())**2
)
ss_total = sum((espresso_data['cereme'] - espresso_data['cereme'].mean())**2)
eta_squared = ss_between / ss_total

# Display the effect size result
effect_size_result = pd.DataFrame({
    "Metric": ["Eta-Squared"],
    "Value": [eta_squared]
})

effect_size_result



Unnamed: 0,Metric,Value
0,Eta-Squared,0.703063
