# Model Ensemble

---

## Imports

In [1]:
import itertools
import json
import os
import pickle
import random
import re
import string

from pathlib import Path

import contractions
import emoji
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from nltk.corpus import stopwords

from tqdm import tqdm

In [2]:
STOP_WORDS = set(stopwords.words("english"))

---

## List Experiments 

In [3]:
parent_folder = Path(f"../experiments/")
all_subfolder = os.listdir(parent_folder)

experiments = list()
for sub in all_subfolder:
    folder = parent_folder / sub
    try:
        with open(folder / "grid_params.json", "r") as f:
            grid = json.load(f)
    except FileNotFoundError:
        continue
    with open(folder / "metrics_validation.json", "r") as f:
        metrics = json.load(f)
    if os.path.exists(folder / "history.pkl"):
        with open(folder / "history.pkl", "rb") as f:
            history = pickle.load(f)
            actual_epochs = len(history["loss"])
    else:
        actual_epochs = 10
    grid["code"] = sub
    grid["f1_macro"] = metrics["f1_macro"]
    grid["actual_epoch"] = actual_epochs
    experiments.append(grid)
experiments = (
    pd.DataFrame(experiments)
    .assign(trainable=lambda f: f["trainable"].fillna("all"))
    .assign(dropout=lambda f: f["dropout"].fillna(0.3))
    .assign(label_smoothing=lambda f: f["label_smoothing"].fillna(0))
    .assign(scheduler=lambda f: f["scheduler"].fillna(0.2))
    .sort_values(by=["f1_macro"], ascending=False)
)

---

## Code

In [29]:
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score


dataset = pd.read_parquet("../data/clean_data.parquet")
TARGET_NAMES = sorted(list(dataset["goemotion"].unique()))


def evaluate_model(p_set: str, proba: pd.DataFrame, dataset: pd.DataFrame) -> dict:
    """
    Evaluate the model results
    
    :param p_set: which prediction set to use (test, validation)
    :param dataset: the complete dataset
    :return: model evaluation values
    """
    outputs = dict()
    df = dataset.loc[lambda f: f["set"] == p_set]
    
    # get the one-hot-encoded values
    pv = df.pivot_table(index=["code"], columns="goemotion", values="set", aggfunc="count", fill_value=0)
    
    # generate the predictions
    proba = proba.loc[pv.index, pv.columns]
    predictions = (proba.values > 0.2).astype(int)
    pred = (
        proba.reset_index()
        .rename(columns={"index": "code"})
        .melt(id_vars=["code"], var_name="goemotion", value_name="proba")
    )
    pred["flag"] = pred["proba"] > 0.2
    outputs["predictions"] = pred
    
    # calculate metrics
    outputs["f1_macro"] = f1_score(pv[TARGET_NAMES].values, predictions, average="macro")
    outputs["f1_micro"] = f1_score(pv[TARGET_NAMES].values, predictions, average="micro")
    outputs["roc_auc"] = roc_auc_score(pv[TARGET_NAMES].values, proba.values, average="macro", multi_class="ovo")
    outputs["confusion_matrix"] = confusion_matrix(
        np.argmax(pv[TARGET_NAMES].values, axis=1), np.argmax(predictions, axis=1)
    )
    outputs["classification_report"] = classification_report(
        pv[TARGET_NAMES].values, predictions, target_names=TARGET_NAMES
    )
    
    # get the misclassification value
    df = df.merge(pred.loc[lambda f: f["flag"] == 1], on=["code", "goemotion"], how="left")
    corrclass = df[df["flag"].notnull()]
    misclass = df[df["flag"].isnull()]
    outputs["misclassification"] = misclass
    
    # get misclassification examples
    outputs["misclassification_examples"] = {
        label: misclass[misclass["goemotion"] == label]
        .sample(3, replace=True)
        .drop_duplicates()
        .text
        .to_list()
        for label in TARGET_NAMES
        if misclass[misclass["goemotion"] == label].shape[0] > 0
    }
    
    return outputs

---

## Load Predictions 

### BERT Model 

In [4]:
experiment = "20230729T214900"
folder = Path(f"../experiments/{experiment}")
with open(folder / "original_predictions.pkl", "rb") as f:
    bert_predictions = pickle.load(f)

### RoBERTa Model 

In [5]:
experiment = "20230730T160140"
folder = Path(f"../experiments/{experiment}")
with open(folder / "original_predictions.pkl", "rb") as f:
    roberta_predictions = pickle.load(f)

## Combine Models 

In [46]:
p_set = "test"

### Get Predictions

In [47]:
base_model = list(bert_predictions)[0]
gs_model = list(bert_predictions)[1]
ap_model = list(bert_predictions)[2]

In [48]:
base_val = bert_predictions[base_model][p_set]
gs_val = bert_predictions[gs_model][p_set]
ap_val = roberta_predictions[ap_model][p_set]

### Combine Tables 

In [49]:
ensemble = pd.concat(
    [
        base_val.drop(columns=["grief_sadness", "pride_admiration"]),
        gs_val.multiply(base_val["grief_sadness"], axis=0),
        ap_val.multiply(base_val["pride_admiration"], axis=0)
    ],
    axis=1
)

## Evaluate Predictions

In [50]:
evaluate_model(p_set, ensemble, dataset)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'predictions':            code   goemotion     proba   flag
 0       eczc44a  admiration  0.035331  False
 1       eczctf2  admiration  0.022908  False
 2       eczcvgx  admiration  0.000733  False
 3       eczd20f  admiration  0.004353  False
 4       eczd32o  admiration  0.006658  False
 ...         ...         ...       ...    ...
 151951  efh9yyp    surprise  0.000099  False
 151952  efha0zn    surprise  0.002135  False
 151953  efhbv2x    surprise  0.000717  False
 151954  efhc56l    surprise  0.000002  False
 151955  efhcnhz    surprise  0.000307  False
 
 [151956 rows x 4 columns],
 'f1_macro': 0.5134078388956397,
 'f1_micro': 0.6025019885747342,
 'roc_auc': 0.9264151262490408,
 'confusion_matrix': array([[395,   5,   1,   4,  10,   4,   0,   4,   0,   3,   1,   0,   0,
           4,   2,  12,   0,   8,  20,   0,  20,   4,   2,   0,   0,   0,
           2,   3],
        [  9, 217,   4,   0,   0,   0,   3,   2,   1,   0,   1,   1,   0,
           2,   0,   4,   0,   0,   0,   0,

---