In [5]:
import os
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr
from scipy.stats import ttest_ind

THIS_DIR = os.path.dirname(os.path.abspath("__file__"))
ROOT_DIR = os.path.dirname(os.path.dirname(THIS_DIR))
DATA_DIR = os.path.join(ROOT_DIR,"data")

# Correlation

In [6]:
sources = ["AP","ABC","CBS","CNN","DailyMail","Express","Fox","Guardian","Mirror","NYT","Reuters"]
df_ntr = pd.read_parquet(os.path.join(DATA_DIR,"processed","All_n10.parquet"))
r_avg_system = df_ntr["Resonance"].mean()
n_avg_system = df_ntr["Novelty"].mean()
model = smf.ols(formula=f"Resonance ~ Novelty", data=df_ntr)
results = model.fit()
df_ntr[f"Resonance_Predicted"] = results.predict(df_ntr[f"Novelty"])
delta_r_avg_system = r_avg_system - df_ntr["Resonance_Predicted"].mean()

sources_dfs = []
for source in sources:
    df_filter = df_ntr.query(f"Source == '{source}'")
    r_avg = df_filter["Resonance"].mean()
    n_avg = df_filter["Novelty"].mean()
    delta_r_avg = r_avg - df_filter["Resonance_Predicted"].mean()
    diff_r_avg = r_avg - r_avg_system
    diff_n_avg = n_avg - n_avg_system
    diff_delta_r_avg = delta_r_avg - delta_r_avg_system
    t_results = source + " -> "
    for variable in ["Resonance","Novelty","Resonance_Predicted"]:
        t_statistic, p_value = ttest_ind(df_filter[variable], df_ntr[variable])
        if p_value < 0.01:
            t_results += f"{variable} (***) "
        elif p_value < 0.05:
            t_results += f"{variable} (**) "
        elif p_value < 0.1:
            t_results += f"{variable} (*) "
        else:
            t_results += f"{variable} () "
    print(t_results)
    print(r_avg_system)
    sources_dfs.append(pd.DataFrame({"Source":source,"diff_r_avg":diff_r_avg,"diff_n_avg":diff_n_avg,"diff_delta_r_avg":diff_delta_r_avg}, index=[0]))

df = pd.concat(sources_dfs, ignore_index=True)
df.round(4).head(11)

AP -> Resonance () Novelty (***) Resonance_Predicted (***) 
0.00025632972591696306
ABC -> Resonance () Novelty (***) Resonance_Predicted (***) 
0.00025632972591696306
CBS -> Resonance () Novelty () Resonance_Predicted () 
0.00025632972591696306
CNN -> Resonance () Novelty (***) Resonance_Predicted (***) 
0.00025632972591696306
DailyMail -> Resonance () Novelty (***) Resonance_Predicted (***) 
0.00025632972591696306
Express -> Resonance () Novelty (***) Resonance_Predicted (***) 
0.00025632972591696306
Fox -> Resonance () Novelty (***) Resonance_Predicted (***) 
0.00025632972591696306
Guardian -> Resonance () Novelty (***) Resonance_Predicted (***) 
0.00025632972591696306
Mirror -> Resonance () Novelty () Resonance_Predicted () 
0.00025632972591696306
NYT -> Resonance () Novelty (**) Resonance_Predicted (**) 
0.00025632972591696306
Reuters -> Resonance () Novelty (***) Resonance_Predicted (***) 
0.00025632972591696306


Unnamed: 0,Source,diff_r_avg,diff_n_avg,diff_delta_r_avg
0,AP,-0.0037,0.6715,-0.1787
1,ABC,0.0012,0.3408,-0.0876
2,CBS,0.0036,0.0206,-0.0018
3,CNN,0.0095,0.1845,-0.0386
4,DailyMail,0.0032,0.3072,-0.0769
5,Express,0.0093,-0.1994,0.0613
6,Fox,0.0017,-0.1596,0.0433
7,Guardian,-0.014,0.0546,-0.0282
8,Mirror,0.0053,-0.0003,0.0054
9,NYT,-0.0154,-0.0388,-0.0053
