# Error Analysis: GoEmotion classification

In [None]:
from pathlib import Path
from typing import *

In [None]:
import sys

PROJ_ROOT = Path().cwd().parent

if str(PROJ_ROOT) not in sys.path:
    sys.path.append(str(PROJ_ROOT))

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats

In [None]:
import altair as alt

from training.chart import adhoc_theme

alt.themes.register("adhoc_theme", adhoc_theme)
alt.themes.enable("adhoc_theme")
alt.data_transformers.enable("default", max_rows=30000)

## Data preprocessing

In [None]:
from training.preprocessing import Preprocessor

preprocessor = Preprocessor(with_lemmtatization=False)

In [None]:
with pd.option_context("display.max_colwidth", None):
    display(preprocessor.df_train.sample(n=3, random_state=1))

In [None]:
preprocessor.count_emotions()

In [None]:
preprocessor.bar_chart_label_proportion()

In [None]:
%%time

preprocessor.bar_chart_count_docs_by_length()

In [None]:
preprocessor.heatmap_label_correlation()

### Signal words

Here a signal word is a text token which increases the proportion of a specific label. In other words it is a token with high conditional probability `P(label|token)`.

In [None]:
%%time

# preprocessor.histogram_positive_rate()  ## around 3 min

In [None]:
%%time

# preprocessor.bar_chart_of_top5_signal_words()

## Evaluation of the model on the dev set

In [None]:
from emo_classifier.classifiers import load_model

classifier = load_model()
X_dev, Y_dev = preprocessor.get_dev_X_and_Y()
Y_hat_dev = classifier.predict_proba(X_dev)

In [None]:
from training.evaluation import PredictionOnDevSetEvaluator

evaluator_dev = PredictionOnDevSetEvaluator(Y_true=Y_dev, Y_prob=Y_hat_dev, X_text=preprocessor.df_dev["text"])
evaluator_dev.save_thresholds_metrics_and_predictions()

In [None]:
evaluator_dev.prediction_bar_chart_by_label()

In [None]:
evaluator_dev.macro_f1_score()

In [None]:
evaluator_dev.best_thresholds.sort_values(by="f1_score", ascending=False)

In [None]:
evaluator_dev.metrics_scatter_plot()

In [None]:
evaluator_dev.positive_rate_scatter_plot()

In [None]:
with pd.option_context("display.max_colwidth", None):
    display(evaluator_dev.false_positive_by_label().sort_values(by="label").head(18))

In [None]:
with pd.option_context("display.max_colwidth", None):
    display(evaluator_dev.false_negative_by_label().sort_values(by="label").head(18))

## Model evaluation on the test set

In [None]:
from training.evaluation import PredictionOnTestSetEvaluator

X_test, Y_test = preprocessor.get_test_X_and_Y()
Y_hat_test = classifier.predict_proba(X_test)
evaluator_test = PredictionOnTestSetEvaluator(Y_true=Y_test, Y_prob=Y_hat_test, thresholds=evaluator_dev.thresholds())
evaluator_test.macro_f1_score()
evaluator_test.save_test_metrics()

In [None]:
evaluator_test.metrics_scatter_plot()

In [None]:
evaluator_test.positive_rate_scatter_plot()

## Environment

In [None]:
%load_ext watermark
%watermark -v -n -m -p numpy,scipy,sklearn,pandas,matplotlib,seaborn,altair,torch