# Evaluation for Model Performance: Classification Report, Confusion Matrix, Distribution of Probabilites, Sub-Label and Stance

## Imports, Load Model

In [None]:
import os 
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TextClassificationPipeline as pipeline
import pandas as pd
import torch
import yaml
import wandb
import import_ipynb
from sklearn.metrics import classification_report
import plotly.express as px

In [None]:
import helpers

In [None]:
config = yaml.safe_load(open('config_ct.yaml'))

In [None]:
model_path = config['path_to_model']

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

classifier = pipeline(task="text-classification", model=model, tokenizer=tokenizer, device=1)

In [None]:
def get_prediction(pipeline, text): 
    return pipeline(text, padding=True, truncation=True)

In [None]:
def get_predictions(pipeline, texts):
    res = pd.DataFrame(columns=['text', 'predicted_label', 'score'])
    for text in texts:
        result = get_prediction(pipeline, text)
        d_temp = {'text': [text], 'predicted_label': [result[0]['label']], 'score': [result[0]['score']]}
        df_temp = pd.DataFrame.from_dict(d_temp)
        res = pd.concat([res,df_temp])
    return res

## Make predictions using Pipeline()

In [None]:
raw = pd.read_pickle('path_to_data/dataset.pkl')

In [None]:
df = helpers.select_text(df=raw.copy(), remove_emojis=False, remove_footer=False)

In [None]:
df.reset_index(drop=True, inplace=True)

In [None]:
df.head()

In [None]:
import warnings
warnings.filterwarnings('ignore')
preds = get_predictions(pipeline=classifier, texts=df.text.tolist())

In [None]:
preds['true_label'] = df.label.tolist()

In [None]:
preds.predicted_label= preds.predicted_label.map({'LABEL_0':0, 'LABEL_1': 1})

### Print classification report

In [None]:
print(classification_report(preds['true_label'], preds['predicted_label']))

## Confusion matrix

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
import matplotlib.pyplot as plt

In [None]:
def plot_confusion_matrix(y_preds, y_true, labels):
    cm = confusion_matrix(y_true, y_preds, normalize="true")
    fig, ax = plt.subplots(figsize=(6, 6))
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
    disp.plot(cmap="Blues", values_format=".2f", ax=ax, colorbar=False)
    plt.title("Normalized confusion matrix")
    plt.show()

In [None]:
plot_confusion_matrix(preds['predicted_label'], preds['true_label'], ["Negative", "Positive"])

## Probability Distribution

In [None]:
preds['prediction_correct'] = preds.apply(lambda x: (x.predicted_label==x.true_label), axis=1)

In [None]:
preds.prediction_correct = preds.prediction_correct.map({False:'not correct', True:'correct'})
preds.predicted_label = preds.predicted_label.map({0:'negative', 1:'positive'})

In [None]:
fig = px.box(preds, x="prediction_correct", y="score", color="predicted_label", width=1300, height=700, # points="all",
                              labels={
                     "prediction_correct": "prediction",
                     "predicted_label": "predicted class",
                 })
fig.update_layout(
    font_size=20,
    boxmode='group'
)
fig.show()

## Sub-Labels and Stance

Not shown: Regain stance and sub-labels for data; columns of df now

Columns of df after merge: "label", "text", "actor", "strategy", "goal", "reference", "pandemic reference", "belief", "authenticating", "directive", "rhetorical question"




## Exemplary: Sub-labels

In [None]:
combos = df.groupby(["actor", "strategy", "goal", "reference"]).size().reset_index().rename(columns={0:'count'})
combos['count_corr'] = 0
combos['count_incorr'] = 0

In [None]:
for index, row in combos.iterrows():
    df_temp = df[(df.actor==row['actor'])&(df.strategy==row['strategy'])&(df.goal==row['goal'])&(df.reference==row['reference'])]
    values = df_temp.prediction_correct.value_counts()
    if True in values:
        combos.loc[index, 'count_corr'] = df_temp.prediction_correct.value_counts()[True]
    else:
        combos.loc[index, 'count_corr'] = 0
    if False in values:
        combos.loc[index, 'count_incorr'] = df_temp.prediction_correct.value_counts()[False]
    else:
        combos.loc[index, 'count_incorr'] = 0

In [None]:
combos['count_corr_norm'] = 0
combos['count_incorr_norm'] = 0

for index, row in combos.iterrows():
    perc_true = row['count_corr']/row['count']
    perc_false = row['count_incorr']/row['count']
    combos.loc[index, 'count_corr_norm'] = perc_true
    combos.loc[index, 'count_incorr_norm'] = perc_false