# Training BERT Classifier for Moralisation 

The steps in this script are identical to PART 4, but it contains the values for epochs set to 5 to show how the model is overfitting.

In [51]:
# import own functions written in moralisation classifier notebook (NB II) saved to .py
from finalproject_functions import remove_bad_rows

import gzip
import json
import pickle
import random
import sys
import csv
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import ticker
from sklearn import metrics
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
import torch
from transformers import Trainer, TrainingArguments
from sklearn.metrics import f1_score

from collections import defaultdict

sns.set(style='ticks', font_scale=1.2)
%matplotlib inline
import matplotlib.pyplot as plt

from sklearn.utils import compute_sample_weight

from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification

## Read Required Data

In [52]:
labelled_posts = pd.read_excel("labs_labelled_posts_new.xlsx")
unlabelled_posts = pd.read_csv("unlabelled_posts_new.csv")

## Data Preprocessing: Remove Duplicates & NA's

In [53]:
labelled_posts = remove_bad_rows(labelled_posts, "title")

## Train Test Split

In [54]:
x_list = labelled_posts["title"].values.tolist()

In [55]:
y_list = labelled_posts["moral_label"].values.tolist()

In [56]:
# Train Test Split using the preprocessed comments column and the overall morality label. 
# X_test_f and y_test_f are set aside to test the final model.
X_train, X_test_f, y_train, y_test_f = train_test_split(
    x_list,
    y_list,
    test_size=0.2,
    random_state=99)

# Split the training data again, this time with test size = .25 to achieve a final split of 
# 60 training data; 20 validation data (this is where baseline is tested on); 20 final testing data (best model testing)
X_train_sec, X_val, y_train_sec, y_val = train_test_split(
    X_train,
    y_train,
    test_size=0.25,
    random_state=99)

In [57]:
print(f"Training data: {len(X_train_sec)}")
print(f"Validation data: {len(X_val)}")
print(f"Test data: {len(X_test_f)}")

Training data: 588
Validation data: 196
Test data: 197


## Loading the English-language Model

In [58]:
bertmodel = 'bert-base-cased'

device_name = 'cuda'

max_length = 512

save_directory = 'moralisation_model'

## Data Preparation 

In [59]:
tokenizer = AutoTokenizer.from_pretrained(bertmodel)

In [60]:
unique_labels = set(label for label in y_train_sec)
label2id = {label: id for id, label in enumerate(unique_labels)}
id2label = {id: label for label, id in label2id.items()}

In [61]:
#check: 
label2id.keys()

dict_keys([0, 1])

In [62]:
#check: 
id2label.keys()

dict_keys([0, 1])

In [63]:
train_encodings = tokenizer(X_train_sec, truncation=True, padding=True, max_length=max_length)
val_encodings = tokenizer(X_val, truncation=True, padding=True, max_length=max_length)
test_encodings  = tokenizer(X_test_f, truncation=True, padding=True, max_length=max_length)

train_labels_encoded = [label2id[y] for y in y_train_sec]
val_labels_encoded = [label2id[y] for y in y_val]
test_labels_encoded  = [label2id[y] for y in y_test_f]

## Custom Torch Dataset

In [64]:
#Initiate MyDataset Class
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [65]:
#convert the data 
train_dataset = MyDataset(train_encodings, train_labels_encoded)
val_dataset = MyDataset(val_encodings, val_labels_encoded)
test_dataset = MyDataset(test_encodings, test_labels_encoded)

## Pre-Trained Bert Model:

In [66]:
model_m = AutoModelForSequenceClassification.from_pretrained(bertmodel, num_labels=len(id2label)).to(device_name)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

## Fine-Tuning Bert Model: 

In [67]:
def compute_metrics(eval_pred):
    labels = eval_pred.label_ids
    preds = eval_pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    macro_f1 = f1_score(labels, preds, average='macro', sample_weight=compute_sample_weight('balanced', labels))
    return {'accuracy': acc, 'macro_f1': macro_f1}

In [68]:
metric_name = 'macro_f1'

In [69]:
# Instantiate an object of the TrainingArguments class with the following parameters:
training_args = TrainingArguments(
    
    # Number of training epochs
    num_train_epochs=5, #setting to 3 epochs, it began to overfit- changed to 2 makes it cut off before overfitting. 
                        # Ran with 5 to show where it begins overfitting and plot train/validation loss curve.
    
    # Batch size for training
    per_device_train_batch_size=8,
    
    # Batch size for evaluation
    per_device_eval_batch_size=8,
    
    # Learning rate for optimization
    learning_rate=5e-5,
    
    # Load the best model at the end of training
    load_best_model_at_end=True,
    
    # Metric used for selecting the best model
    metric_for_best_model=metric_name,
    
    # Number of warmup steps for the optimizer
    warmup_steps=0,
    
    # L2 regularization weight decay
    weight_decay=0.01, #increased to avoid overfitting 
    
    # Directory to save the fine-tuned model and configuration files
    output_dir='./results',
    
    # Directory to store logs
    logging_dir='./logs',
    
    # Log results every n steps
    logging_steps=20,
    
    # Strategy for evaluating the model during training
    evaluation_strategy='steps'
)


In [70]:
trainer = Trainer(
    model=model_m,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset,           # evaluation dataset (usually a validation set; here we just send our test set)
    compute_metrics=compute_metrics)      # our custom evaluation function 


In [71]:
trainer.train()



Step,Training Loss,Validation Loss,Accuracy,Macro F1
20,0.6104,0.579096,0.729592,0.333333
40,0.5135,0.548197,0.760204,0.463043
60,0.5477,0.507165,0.755102,0.430363
80,0.4778,0.465717,0.790816,0.766985
100,0.351,0.460456,0.806122,0.71568
120,0.3597,0.509234,0.826531,0.69695
140,0.3383,0.681263,0.80102,0.664268
160,0.2381,0.77273,0.806122,0.708116
180,0.142,0.843409,0.816327,0.777667
200,0.184,0.838558,0.831633,0.760963


TrainOutput(global_step=370, training_loss=0.26064979233653157, metrics={'train_runtime': 78.1973, 'train_samples_per_second': 37.597, 'train_steps_per_second': 4.732, 'total_flos': 87628314765600.0, 'train_loss': 0.26064979233653157, 'epoch': 5.0})

## Save fine tuned model:

In [72]:
trainer.save_model(save_directory)

## Testing on Validation Set:

In [73]:
trainer.evaluate()

{'eval_loss': 0.9317600727081299,
 'eval_accuracy': 0.826530612244898,
 'eval_macro_f1': 0.7210294205280927,
 'eval_runtime': 1.5191,
 'eval_samples_per_second': 129.022,
 'eval_steps_per_second': 16.457,
 'epoch': 5.0}

## Evaluate on Test Set: 

In [74]:
predicted_results = trainer.predict(test_dataset)

In [75]:
predicted_results.predictions.shape

(197, 2)

In [76]:
predicted_labels = predicted_results.predictions.argmax(-1) 
predicted_labels = predicted_labels.flatten().tolist()      
predicted_labels = [id2label[l] for l in predicted_labels]  

In [77]:
len(predicted_labels)

197

In [78]:
print(classification_report(y_test_f, 
                           predicted_labels))

              precision    recall  f1-score   support

           0       0.76      0.91      0.83       128
           1       0.75      0.48      0.58        69

    accuracy                           0.76       197
   macro avg       0.76      0.70      0.71       197
weighted avg       0.76      0.76      0.75       197



## Evaluation of Final Model:

In [79]:
# print examples of correct predictions
for _true_label, _predicted_label, _text in random.sample(list(zip(y_test_f, predicted_labels, X_test_f)), 20):
  if _true_label == _predicted_label:
    print('LABEL:', _true_label)
    print('REVIEW TEXT:', _text[:100], '...')
    print()

LABEL: 0
REVIEW TEXT: Canada's immigration website just crashed ...

LABEL: 1
REVIEW TEXT: Media outlets take Trump out of context to suggest he called undocumented immigrants 'animals' ...

LABEL: 0
REVIEW TEXT: Immigration as Economic Warfare ...

LABEL: 0
REVIEW TEXT: Fontana man sent to prison for posing as an immigration officer ...

LABEL: 0
REVIEW TEXT: Trump announces tariffs on Mexico until 'immigration remedied' ...

LABEL: 0
REVIEW TEXT: Trump immigration figure changes famous immigrant quote ...

LABEL: 1
REVIEW TEXT: South Jersey Restaurant Owner Outraged After ‘Don’t Tip Immigrants’ Found Written On Check ...

LABEL: 1
REVIEW TEXT: Police provide an update on the illegal immigrant fugitive wanted in the murder of a California poli ...

LABEL: 1
REVIEW TEXT: NEW JERSEY MUSLIM immigrant charged with scouting locations in major U.S. cities for multiple terror ...

LABEL: 1
REVIEW TEXT: Lawyer: Mollie Tibbetts murder suspect is not an illegal immigrant ...

LABEL: 1
REVIEW TE

In [80]:
# print missclassifications: 
for _true_label, _predicted_label, _text in random.sample(list(zip(y_test_f, predicted_labels, X_test_f)), 80):
  if _true_label != _predicted_label:
    print('TRUE LABEL:', _true_label)
    print('PREDICTED LABEL:', _predicted_label)
    print('REVIEW TEXT:', _text[:100], '...')
    print()

TRUE LABEL: 0
PREDICTED LABEL: 1
REVIEW TEXT: Immigrant Detained After Press Conference ...

TRUE LABEL: 1
PREDICTED LABEL: 0
REVIEW TEXT: A computer engineer who worked under contract for the U.S. Department of Immigration and Customs Enf ...

TRUE LABEL: 1
PREDICTED LABEL: 0
REVIEW TEXT: Veterans Day disgrace: Stop deporting immigrants who served ...

TRUE LABEL: 1
PREDICTED LABEL: 0
REVIEW TEXT: H.R.1044/S386 - Fairness for High-Skilled Immigrants Act of 2019: Altruistic Fair Amendment or Giant ...

TRUE LABEL: 1
PREDICTED LABEL: 0
REVIEW TEXT: Best News: Immigrants Broke in to Moving Car Transporter ...

TRUE LABEL: 1
PREDICTED LABEL: 0
REVIEW TEXT: Trump vows mass immigration arrests, removals of ‘millions of illegal aliens’ starting next week ...

TRUE LABEL: 1
PREDICTED LABEL: 0
REVIEW TEXT: DOCTORS CAUGHT FAKING MEDICAL RECORDS TO HELP IMMIGRANTS GET CITIZENSHIP! ...

TRUE LABEL: 1
PREDICTED LABEL: 0
REVIEW TEXT: US immigration ban: Thousands gather outside airports as anti-Tru