In [None]:
import pandas as pd
import numpy as np
import pickle
import shap
shap.initjs()

pd.set_option('display.max_colwidth', -1)

from helper.data_loading import *
from helper.explainability_helper import *

germEval = pd.read_pickle('data/GERMEVAL_with_topic_distribution.pkl')
germEval = germEval.rename(columns={'label_1': 'label'})

germEval_topic_distribution = get_topic_distribution_over_dataset(germEval.loc[germEval.label=="OTHER",:])
print("germEval topic distribution:\n",germEval_topic_distribution, "\n")


'''
# LEFT
left = pd.read_pickle('data/LEFT_with_topic_distribution.pkl')
left_pool = create_sample_pool(left, germEval_topic_distribution, sample_factor = 5)
left = None

df_left = get_dataset(left_pool, germEval, germEval_topic_distribution)
left_tweets, left_labels = create_train_label(df_left)

model_left, word_index_left = train_model(left_tweets, left_labels)

model_left.save('output/shap/left_model.h5') 

with open("output/shap/left_dict","wb") as f:
    pickle.dump(word_index_left, f)
    
with open("output/shap/left_df","wb") as f:
    pickle.dump(df_left, f)
    

# RIGHT
right = pd.read_pickle('data/RIGHT_with_topic_distribution.pkl')
right_pool = create_sample_pool(right, germEval_topic_distribution, sample_factor = 5)
right = None

df_right = get_dataset(right_pool, germEval, germEval_topic_distribution)
right_tweets, right_labels = create_train_label(df_right)

model_right, word_index_right = train_model(right_tweets, right_labels)

model_right.save('output/shap/right_model.h5') 

with open("output/shap/right_dict","wb") as f:
    pickle.dump(word_index_right, f)
    
with open("output/shap/right_df","wb") as f:
    pickle.dump(df_right, f)


# NEUTRAL
neutral = pd.read_pickle('data/NEUTRAL_with_topic_distribution.pkl')
neutral_pool = create_sample_pool(neutral, germEval_topic_distribution, sample_factor = 5)
neutral = None

df_neutral = get_dataset(neutral_pool, germEval, germEval_topic_distribution)
neutral_tweets, neutral_labels = create_train_label(df_neutral)

model_neutral, word_index_neutral = train_model(neutral_tweets, neutral_labels)

model_neutral.save('output/shap/neutral_model.h5') 

with open("output/shap/neutral_dict","wb") as f:
    pickle.dump(word_index_neutral, f)
    
with open("output/shap/neutral_df","wb") as f:
    pickle.dump(df_neutral, f)
'''

left_model, left_df, left_dict = load_model_dict_data("output/shap/left")
right_model, right_df, right_dict = load_model_dict_data("output/shap/right")
neutral_model, neutral_df, neutral_dict = load_model_dict_data("output/shap/neutral")

germEval = prepare_data(germEval)    
germEval = replace_label_to_binary(germEval)
germEval_tweet, germEval_labels = create_train_label(germEval)

germEval = germEval.filter(["text","token","label"])

left_germEval_tweet, __ = prepare_data_for_training_single(germEval_tweet, left_dict)
right_germEval_tweet, __ = prepare_data_for_training_single(germEval_tweet, right_dict)
neutral_germEval_tweet, __ = prepare_data_for_training_single(germEval_tweet, neutral_dict)

germEval["left_pred"] = left_model.predict(left_germEval_tweet)
germEval["right_pred"] = right_model.predict(right_germEval_tweet)
germEval["neutral_pred"] = neutral_model.predict(neutral_germEval_tweet)

right_wrong = germEval.loc[(germEval.label == 1) & (germEval.right_pred < 0.5) & ((germEval.left_pred > 0.5) & (germEval.neutral_pred > 0.5))].sort_values("right_pred")
left_wrong = germEval.loc[(germEval.label == 1) & (germEval.left_pred < 0.5) & ((germEval.right_pred > 0.5) & (germEval.neutral_pred > 0.5))].sort_values("left_pred")
neutral_wrong = germEval.loc[(germEval.label == 1) & (germEval.neutral_pred < 0.5) & ((germEval.right_pred > 0.5) & (germEval.left_pred > 0.5))].sort_values("neutral_pred")

# Look at right_wrong

In [None]:
right_wrong.head(10)

In [None]:
#RIGHT
do_explain_model(right_model, right_dict, right_wrong[:1000], look_first=10 )

In [None]:
#LEFT
do_explain_model(left_model, left_dict, right_wrong[:1000], look_first=10 )

In [None]:
#NEUTRAL
do_explain_model(neutral_model, neutral_dict, right_wrong[:1000], look_first=10 )

In [None]:
#Investigation

In [None]:
#RIGHT
counter = 0
for i, row in right_df.iterrows():
     if "gutmenschen" in row.token and row.label==0:
            counter += 1
counter

In [None]:
#LEFT
counter = 0
for i, row in left_df.iterrows():
     if "gutmenschen" in row.token and row.label==0:
            counter += 1
counter

In [None]:
#NEUTAL
counter = 0
for i, row in neutral_df.iterrows():
     if "gutmenschen" in row.token and row.label==0:
            counter += 1
counter

# Look at neutral_wrong

In [None]:
neutral_wrong.head(10)

In [None]:
#RIGHT
do_explain_model(right_model, right_dict, neutral_wrong[:1000], look_first=10 )

In [None]:
#LEFT
do_explain_model(left_model, left_dict, neutral_wrong[:1000], look_first=10 )

In [None]:
#NEUTRAL
do_explain_model(neutral_model, neutral_dict, neutral_wrong[:1000], look_first=10 )

# Look at left_wrong

In [None]:
left_wrong.head(10)

In [None]:
#RIGHT
do_explain_model(right_model, right_dict, left_wrong[:1000], look_first=10 )

In [None]:
#LEFT
do_explain_model(left_model, left_dict, left_wrong[:1000], look_first=10 )

In [None]:
#NEUTRAL
do_explain_model(neutral_model, neutral_dict, left_wrong[:1000], look_first=10 )

In [None]:
#RIGHT
counter = 0
for i, row in right_df.iterrows():
     if "heil" in row.token and row.label==0:
            counter += 1
counter

In [None]:
#LEFT
counter = 0
for i, row in left_df.iterrows():
     if "heil" in row.token and row.label==0:
            counter += 1
counter

In [None]:
#NEUTAL
counter = 0
for i, row in neutral_df.iterrows():
     if "heil" in row.token and row.label==0:
            counter += 1
counter