In [1]:
import json
import pickle
import subprocess
import time

import datasets
import numpy as np
import pandas as pd
import torch
import transformers
from datasets import Dataset
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
from transformers import (
    AutoModel,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    set_seed,
)
from transformers.modeling_outputs import SequenceClassifierOutput
from transformers.trainer_callback import EarlyStoppingCallback

import helper.training as tr

pd.set_option("display.precision", 3)

### Settings

In [2]:
MODEL_NAME = "german-nlp-group/electra-base-german-uncased"
DIR_OUTPT = "./results"
DIR_LOG = "./logs"
DIR_TRAINED_MODEL = "./models/final"
SIZE_VALIDATION_SET = 0.1
SEED = 7

In [3]:
set_seed(SEED)

## Prepare dataset

### Load data

In [4]:
columns_a = ["Text", "type", "sentences"]
emotions = [
    "hf_anger",
    "hf_fear",
    "hf_disgust",
    "hf_sadness",
    "hf_joy",
    "hf_enthusiasm",
    "hf_pride",
    "hf_hope",
]

df_train_validation = pd.read_pickle("./data/labeled_training_data_df.pkl")
df_train_validation = df_train_validation[columns_a + emotions]
df_train_validation = df_train_validation.astype(
    {
        "hf_anger": int,
        "hf_fear": int,
        "hf_disgust": int,
        "hf_sadness": int,
        "hf_joy": int,
        "hf_enthusiasm": int,
        "hf_pride": int,
        "hf_hope": int,
        "type": str,
    }
)
df_train_validation["list"] = df_train_validation.apply(
    lambda x: [
        x["hf_anger"],
        x["hf_fear"],
        x["hf_disgust"],
        x["hf_sadness"],
        x["hf_joy"],
        x["hf_enthusiasm"],
        x["hf_pride"],
        x["hf_hope"],
    ],
    axis=1,
)
df_test = pd.read_pickle("./data/labeled_test_data_df.pkl")
# df_train_validation = df_train_validation.sample(n=1000)
df_test = df_test[columns_a + emotions]
df_test = df_test.astype(
    {
        "hf_anger": int,
        "hf_fear": int,
        "hf_disgust": int,
        "hf_sadness": int,
        "hf_joy": int,
        "hf_enthusiasm": int,
        "hf_pride": int,
        "hf_hope": int,
        "type": str,
    }
)
df_test["list"] = df_test.apply(
    lambda x: [
        x["hf_anger"],
        x["hf_fear"],
        x["hf_disgust"],
        x["hf_sadness"],
        x["hf_joy"],
        x["hf_enthusiasm"],
        x["hf_pride"],
        x["hf_hope"],
    ],
    axis=1,
)

df_train, df_validation = train_test_split(
    df_train_validation, test_size=0.1, random_state=SEED
)

print("Size of training set:\t", len(df_train))
print("Size of validation set:\t", len(df_validation))
print("Size of test set:\t", len(df_test))

Size of training set:	 8017
Size of validation set:	 891
Size of test set:	 990


### Convert to Dataset format

In [5]:
dataset_train = Dataset.from_pandas(df_train)
dataset_validation = Dataset.from_pandas(df_validation)
dataset_test = Dataset.from_pandas(df_test)
dataset_test_fb = dataset_test.filter(lambda x: x["type"] == "fb_sent")
dataset_test_ps = dataset_test.filter(lambda x: x["type"] == "ps_sent")

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




### Tokenize dataset

In [6]:
# load model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=8)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# preprocess data
field_text = "sentences"
field_label = "list"

dataset_train = Dataset.from_pandas(df_train)
dataset_validation = Dataset.from_pandas(df_validation)
dataset_test = Dataset.from_pandas(df_test)
dataset_test_fb = dataset_test.filter(lambda x: x["type"] == "fb_sent")
dataset_test_ps = dataset_test.filter(lambda x: x["type"] == "ps_sent")

# tokenize data
train_encodings = tokenizer(dataset_train[field_text], truncation=True, padding=True)
val_encodings = tokenizer(dataset_validation[field_text], truncation=True, padding=True)
test_encodings = tokenizer(dataset_test[field_text], truncation=True, padding=True)
test_fb_encodings = tokenizer(
    dataset_test_fb[field_text], truncation=True, padding=True
)
test_ps_encodings = tokenizer(
    dataset_test_ps[field_text], truncation=True, padding=True
)

train_dataset = tr.EmotionDataset(train_encodings, dataset_train[field_label])
val_dataset = tr.EmotionDataset(val_encodings, dataset_validation[field_label])
test_dataset = tr.EmotionDataset(test_encodings, dataset_test[field_label])
test_fb_dataset = tr.EmotionDataset(test_fb_encodings, dataset_test_fb[field_label])
test_ps_dataset = tr.EmotionDataset(test_ps_encodings, dataset_test_ps[field_label])

Some weights of the model checkpoint at german-nlp-group/electra-base-german-uncased were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at german-nlp-group/electra-base-german-uncased and are newly initi

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




## Train model

In [7]:
training_args = TrainingArguments(
    output_dir=DIR_OUTPT,  # output directory
    num_train_epochs=4,  # total # of training epochs
    per_device_train_batch_size=32,  # batch size per device during training
    per_device_eval_batch_size=32,  # batch size for evaluation
    warmup_steps=250,  # number of warmup steps for learning rate scheduler
    weight_decay=0.01,  # strength of weight decay
    logging_dir=DIR_LOG,  # directory for storing logs
    seed=SEED,
    evaluation_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1_loss",
    greater_is_better=False,
    run_name=MODEL_NAME,
)

trainer = tr.MultilabelTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=tr.compute_metrics,
)

_ = trainer.train()
trainer.evaluate()

trainer.model.save_pretrained(f"{DIR_TRAINED_MODEL}/{MODEL_NAME}/")

[34m[1mwandb[0m: Currently logged in as: [33mmawic[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.29 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




Epoch,Training Loss,Validation Loss,Accuracy Thresh,F1 Loss,Runtime,Samples Per Second
1,No log,0.341154,0.85303,8.093995,2.0151,491.289
2,0.396700,0.307472,0.86452,6.173054,2.1682,456.61
3,0.396700,0.314565,0.863384,5.354288,2.051,482.702
4,0.229800,0.318964,0.866667,5.469135,2.0121,492.026


## Evaluate model on test set

In [8]:
results_all = trainer.predict(test_dataset)
results_fb = trainer.predict(test_fb_dataset)
results_ps = trainer.predict(test_ps_dataset)

### Complete test set

In [9]:
data = dict({"emotion": emotions})
to_add = {
    "Recall": tr.compute_fine_metrics2(results_all, emotions)["recall"],
    "Precision": tr.compute_fine_metrics2(results_all, emotions)["precision"],
    "F1": tr.compute_fine_metrics2(results_all, emotions)["f1"],
}
df = pd.DataFrame.from_dict(dict(data, **to_add))
df

Unnamed: 0,emotion,Recall,Precision,F1
0,hf_anger,0.821,0.863,0.842
1,hf_fear,0.677,0.612,0.643
2,hf_disgust,0.651,0.622,0.636
3,hf_sadness,0.592,0.607,0.599
4,hf_joy,0.552,0.705,0.62
5,hf_enthusiasm,0.709,0.627,0.665
6,hf_pride,0.576,0.607,0.591
7,hf_hope,0.784,0.677,0.726


### Facebook test set

In [10]:
data = dict({"emotion": emotions})
to_add = {
    "Recall": tr.compute_fine_metrics2(results_fb, emotions)["recall"],
    "Precision": tr.compute_fine_metrics2(results_fb, emotions)["precision"],
    "F1": tr.compute_fine_metrics2(results_fb, emotions)["f1"],
}
df = pd.DataFrame.from_dict(dict(data, **to_add))
df

Unnamed: 0,emotion,Recall,Precision,F1
0,hf_anger,0.825,0.846,0.835
1,hf_fear,0.741,0.64,0.687
2,hf_disgust,0.729,0.652,0.688
3,hf_sadness,0.681,0.658,0.67
4,hf_joy,0.59,0.7,0.641
5,hf_enthusiasm,0.72,0.669,0.693
6,hf_pride,0.582,0.654,0.616
7,hf_hope,0.828,0.741,0.782


### Parliament speech test set

In [11]:
data = dict({"emotion": emotions})
to_add = {
    "Recall": tr.compute_fine_metrics2(results_ps, emotions)["recall"],
    "Precision": tr.compute_fine_metrics2(results_ps, emotions)["precision"],
    "F1": tr.compute_fine_metrics2(results_ps, emotions)["f1"],
}
df = pd.DataFrame.from_dict(dict(data, **to_add))
df

Unnamed: 0,emotion,Recall,Precision,F1
0,hf_anger,0.816,0.882,0.848
1,hf_fear,0.593,0.571,0.582
2,hf_disgust,0.481,0.542,0.51
3,hf_sadness,0.477,0.532,0.503
4,hf_joy,0.5,0.714,0.588
5,hf_enthusiasm,0.693,0.57,0.626
6,hf_pride,0.567,0.551,0.559
7,hf_hope,0.728,0.604,0.66
