In [1]:

import pandas as pd
import scipy.stats as stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd


In [4]:
# Load your dataset

df = pd.read_csv("vader.csv", usecols=["Holiday", "Source", "content_compound"])
df = df.dropna()
print(df.head())

  Holiday Source  content_compound
0     MLK    FOX            0.9916
1     MLK    FOX           -0.8316
2     MLK    FOX            0.6543
3     MLK    FOX            0.8745
4     MLK    FOX            0.9109


In [9]:
def run_anova_for_holiday(holiday_name, df):
    print(f"\nRunning ANOVA for {holiday_name}...\n")
    
    # Filter data for the given holiday
    holiday_df = df[df["Holiday"] == holiday_name]
    print(holiday_df)
    
    # Ensure at least 2 sources exist
    sources = holiday_df["Source"].unique()
    if len(sources) < 2:
        print(f"Not enough sources for {holiday_name} ANOVA.")
        return
    print(sources)
    
    # Run ANOVA
    f_stat, p_anova = stats.f_oneway(
        *[holiday_df[holiday_df["Source"] == source]["content_compound"] for source in sources]
    )
    print(f"ANOVA F-statistic: {f_stat:.4f}, p-value: {p_anova:.4f}")





In [10]:
run_anova_for_holiday("MLK", df)



Running ANOVA for MLK...

   Holiday Source  content_compound
0      MLK    FOX            0.9916
1      MLK    FOX           -0.8316
2      MLK    FOX            0.6543
3      MLK    FOX            0.8745
4      MLK    FOX            0.9109
5      MLK    FOX            0.5859
6      MLK    FOX            0.9827
7      MLK    FOX           -0.3532
8      MLK    FOX            0.9955
9      MLK     AP            0.9618
10     MLK     AP           -0.9713
11     MLK     AP            0.8829
12     MLK     AP            0.9785
13     MLK     AP           -0.9884
14     MLK     AP            0.9879
15     MLK     AP           -0.9958
16     MLK     AP            0.8842
17     MLK     AP           -0.4782
18     MLK    CNN           -0.2071
19     MLK    CNN            0.9975
20     MLK    CNN            0.9912
21     MLK    CNN            0.9997
22     MLK    CNN            0.9804
23     MLK    CNN            0.9218
24     MLK    CNN            0.9981
25     MLK    CNN            0.8555
2

In [8]:
# Run ANOVA separately for MLK Day and July 4th
run_anova_for_holiday("MLK", df)
run_anova_for_holiday("July4th", df)
run_anova_for_holiday("Veterans", df)



Running ANOVA for MLK...

ANOVA F-statistic: 1.0026, p-value: 0.4186

Running ANOVA for July4th...

ANOVA F-statistic: 1.4087, p-value: 0.2444

Running ANOVA for Veterans...

ANOVA F-statistic: 1.3622, p-value: 0.2625
