In [None]:
import pandas as pd
import numpy as np
from glob import glob
from iesta.machine_learning.dataloader import IESTAData, METHODOLOGY

from iesta.machine_learning.feature_extraction import get_features_df
import iesta.loader as loader
import iesta.properties as prop  
import iesta.processor as proc  
from iesta.machine_learning.dataloader import IESTAData, METHODOLOGY

#from .autonotebook import tqdm as notebook_tqdm

import matplotlib.pyplot as plt

%matplotlib inline
import seaborn as sns

sns.reset_defaults()
sns.set(
    rc={'figure.figsize':(3,2)}, 
    #style="white" # nicer layout
)


In [None]:
ideology = prop.CONSERVATIVE_IDEOLOGY.lower()
ideology

In [None]:
path = "../data/extracted_features/"



dataloader = IESTAData(ideology=ideology, methodology=METHODOLOGY.EACH)

training_df, training_data_path = dataloader.get_training_data()
training_data = pd.read_parquet(training_data_path)

In [None]:
len(training_data)

## Fetching MPQA and EMPATH

In [None]:
style_features_path = glob(f"{path}/{ideology}_style-features_1000/*.parquet")
transformer_features_path = glob(f"{path}/{ideology}_transformer-features_100/*.parquet")

In [None]:
style_features_df = get_features_df(style_features_path, 1000, training_data)
transformers_features_df = get_features_df(transformer_features_path, 100, training_data)


## Running significance tests

### All Effects

In [None]:

import iesta.stats.significance
significance_empath_mpqa_effects_df = iesta.stats.significance.calc_sign_effects(
                        style_features_df, 
                        ideology, 
                        "empath-mpqa", 
                        "effect", 
                        exclude_iv_vals = [])

significance_transformers_effects_df = iesta.stats.significance.calc_sign_effects(
                        transformers_features_df, 
                        ideology, 
                        "transformers", 
                        "effect", 
                        exclude_iv_vals = [])

In [None]:
significance_empath_mpqa_effects_df

In [None]:
significance_transformers_effects_df

### All Effects - Excluding *Okay*

In [None]:
significance_empath_mpqa_NOOKAY_effects_df = iesta.stats.significance.calc_sign_effects(
                        style_features_df, 
                        ideology, 
                        "empath-mpqa", 
                        "effect", 
                        exclude_iv_vals = ["okay"])


significance_transformers_NOOKAY_effects_df = iesta.stats.significance.calc_sign_effects(
                        transformers_features_df, 
                        ideology, 
                        "transformers", 
                        "effect", 
                        exclude_iv_vals = ["okay"])

In [None]:
significance_empath_mpqa_NOOKAY_effects_df

In [None]:
significance_transformers_NOOKAY_effects_df

### Binary Effects

In [None]:
significance_empath_mpqa_binaryeffects_df = iesta.stats.significance.calc_sign_effects(
                        style_features_df, 
                        ideology, 
                        "empath-mpqa", 
                        "binary_effect", 
                        exclude_iv_vals = [])

significance_transformers_binaryeffects_df = iesta.stats.significance.calc_sign_effects(
                        transformers_features_df, 
                        ideology, 
                        "transformers", 
                        "binary_effect", 
                        exclude_iv_vals = [])

In [None]:
significance_empath_mpqa_binaryeffects_df

In [None]:
significance_transformers_binaryeffects_df

### Binary Effects - Excluding *Okay*

In [None]:
significance_empath_mpqa_NOOKAY_binaryeffects_df = iesta.stats.significance.calc_sign_effects(
                        style_features_df[style_features_df["effect"] != "okay"], 
                        ideology, 
                        "empath-mpqa", 
                        "binary_effect", 
                        exclude_iv_vals = [])

significance_transformers_NOOKAY_binaryeffects_df = iesta.stats.significance.calc_sign_effects(
                        transformers_features_df[transformers_features_df["effect"] != "okay"], 
                        ideology, 
                        "transformers", 
                        "binary_effect", 
                        exclude_iv_vals = [])

In [None]:
significance_empath_mpqa_NOOKAY_binaryeffects_df

In [None]:
significance_transformers_NOOKAY_binaryeffects_df