# Question 5

In [62]:
import numpy as np
import pandas as pd
import nltk
import torch
nltk.download('omw-1.4')
import checklist
from copy import deepcopy
from checklist.editor import Editor
from checklist.perturb import Perturb
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, confusion_matrix
np.random.seed(42)

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/nedim.azar/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [2]:
# Loading the subset data
df = pd.read_csv('data/olid-subset-diagnostic-tests.csv')

# Creating a version with typos
df_typos = deepcopy(df)
df_typos['text'] = df['text'].apply(Perturb.add_typos, args=(5,))

df.to_csv('data/no_typos.csv')
df_typos.to_csv('data/typos.csv')

## For the predictions on the typo data refer to the `Colab

### We got the predictions on Google colab because it wouldn't work locally due to GPU and CUDA issues. We load those predictions in...

In [3]:
predictions = pd.read_csv('predictions/No_Typos_Prediction.csv').drop(['Unnamed: 0'], axis=1)

predictions_typos = pd.read_csv('predictions/typos.csv').drop(['Unnamed: 0'], axis=1)

y_true = predictions['labels']
y_pred = predictions['Predictions']
y_pred_typos = predictions_typos['Predictions']
pos_label = 1

## Precision Scores

In [4]:
# No Typos
round(precision_score(y_true=y_true, y_pred=y_pred, pos_label=pos_label), 2)

0.95

In [5]:
# Typos
round(precision_score(y_true=y_true, y_pred=y_pred_typos, pos_label=pos_label), 2)

0.92

## Recall Scores

In [6]:
# No Typos
round(recall_score(y_true=y_true, y_pred=y_pred, pos_label=pos_label), 2)

0.7

In [7]:
# Typos
round(recall_score(y_true=y_true, y_pred=y_pred_typos, pos_label=pos_label), 2)

0.66

## F1 Scores (Macro)

In [8]:
# No Typos
round(f1_score(y_true=y_true, y_pred=y_pred, pos_label=pos_label, average='macro'), 2)

0.83

In [9]:
# Typos
round(f1_score(y_true=y_true, y_pred=y_pred_typos, pos_label=pos_label, average='macro'), 2)

0.8

## How many messages were identified correctly in the original dataset, but erroneously after the perturbation?

In [35]:
masked = predictions_typos[predictions['labels'] == predictions['Predictions']]
len(masked[masked['labels'] != masked['Predictions']])

5

## Provide 3 examples when the model failed to assign the correct label after perturbation.

In [36]:
masked[masked['labels'] != masked['Predictions']]

Unnamed: 0,id,text,labels,Predictions
11,21826,(cr1tikal vocie) msef my sasc eheks,1,0
30,76135,@USER @USER @USER @USER Beats the hlel NJ out ...,1,0
35,13433,#uCckservaitve Traitors Are Wore sThan Fortnti...,1,0
73,58690,#MAGA #KavanaugChonfirmationHearing spostponin...,0,1
95,45518,@USER He is obvioulsy getting ssupended. He is...,0,1


In [53]:
predictions_typos.iloc[95, 1]

'@USER He is obvioulsy getting ssupended. He is nto an asste for anyone'

## What is the main source of the erroneous predictions produced by the model (main source of errors caused by typos)?

In [64]:
confusion_matrix(y_true=y_true, y_pred=y_pred_typos, labels=[1, 0])

array([[33, 17],
       [ 3, 47]])

In [77]:
errors_typos = predictions_typos[predictions_typos['labels'] != predictions_typos['Predictions']]
# False negatives
print("FN:", len(errors_typos[errors_typos['labels'] == 1]))
print("FP:", len(errors_typos[errors_typos['labels'] == 0]))

FN: 17
FP: 3


In [78]:
errors = predictions[predictions['labels'] != predictions['Predictions']]
# False negatives
print("FN:", len(errors[errors['labels'] == 1]))
print("FP:", len(errors[errors['labels'] == 0]))

FN: 15
FP: 2
