In [1]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.formula.api import ols

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('tsrankings.csv')
df = df.drop(0)
print(df.head())

      Album                          Song  Total Weighted Score  Enjoyability  \
1  folklore               illicit affairs                 10.00          10.0   
2  evermore  no body, no crime (ft. HAIM)                 10.00          10.0   
3  folklore                      cardigan                  9.80          10.0   
4  folklore             my tears ricochet                  9.54          10.0   
5  evermore                        willow                  9.52          10.0   

   Emotional Impact  Messaging  Lyricism  Nostalgia / Longetivity  \
1              10.0       10.0      10.0                     10.0   
2              10.0       10.0      10.0                     10.0   
3              10.0        9.0      10.0                     10.0   
4              10.0       10.0       9.0                     10.0   
5              10.0        9.0       9.0                      9.0   

   Overall Sound  Total RAW Score  ... Nostalgia Overall Sound.1  \
1           10.0             6

In [3]:
album_mapping = {
    "Debut \(Taylor Swift\).*": "Debut (Taylor Swift)",
    "Fearless.*": "Fearless",
    "Speak Now.*": "Speak Now",
    "Red.*": "Red",
    "1989.*": "1989",
    "Reputation.*": "Reputation",
    "Lover.*": "Lover",
    "folklore.*": "folklore",
    "evermore.*": "evermore",
    "Midnights.*": "Midnights",
    "The Tortured Poets Department.*": "The Tortured Poets Department",
    "SINGLE.*": "SINGLE:"
}

df['Album'] = df['Album'].replace(to_replace=album_mapping, regex=True)

unique_albums = df['Album'].unique()

In [4]:
descriptive_stats = df.groupby('Album').describe()
print(descriptive_stats)

                              Total Weighted Score                            \
                                             count      mean       std   min   
Album                                                                          
1989                                          21.0  3.494762  1.449405  1.75   
Debut (Taylor Swift)                          14.0  1.820000  1.001645  1.00   
Fearless                                      25.0  2.060400  1.441953  1.00   
Lover                                         19.0  4.676316  1.541310  1.86   
Midnights                                     22.0  4.593636  1.971684  1.92   
Red                                           29.0  3.931379  1.951567  1.60   
Reputation                                    15.0  4.452000  1.573432  2.64   
SINGLE:                                       13.0  3.856154  2.193169  1.00   
Speak Now                                     23.0  4.208696  1.965984  2.05   
The Tortured Poets Department           

In [6]:
# Rename the column to avoid spaces
df.rename(columns={'Total Weighted Score': 'Total_Weighted_Score'}, inplace=True)

model = ols('Total_Weighted_Score ~ C(Album)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

print(anova_table)

if anova_table.loc['C(Album)', 'PR(>F)'] < 0.05:  # Use .loc for safer indexing
    tukey = pairwise_tukeyhsd(endog=df['Total_Weighted_Score'], groups=df['Album'], alpha=0.05)
    print(tukey)
else:
    print("ANOVA was not significant; Tukey's HSD is not necessary.")


              sum_sq     df          F        PR(>F)
C(Album)  565.866628   11.0  13.679855  3.929953e-20
Residual  879.945443  234.0        NaN           NaN
                       Multiple Comparison of Means - Tukey HSD, FWER=0.05                       
            group1                        group2            meandiff p-adj   lower  upper  reject
-------------------------------------------------------------------------------------------------
                         1989          Debut (Taylor Swift)  -1.6748 0.3433 -3.8838 0.5342  False
                         1989                      Fearless  -1.4344 0.3459 -3.3295 0.4607  False
                         1989                         Lover   1.1816 0.7428 -0.8456 3.2087  False
                         1989                     Midnights   1.0989 0.7839 -0.8543 3.0521  False
                         1989                           Red   0.4366 0.9997 -1.3978 2.2711  False
                         1989                    Reputati