In [None]:
%load_ext autoreload
%autoreload 2

import random
import pickle
import pandas as pd
import torch
import numpy as np
from transformers import BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, f1_score
from ray import tune
from pathlib import Path
from tqdm.notebook import tqdm

from utils import initialize_seeds
from data_utils import HateDataset, get_results

In [None]:
initialize_seeds()

In [None]:
data_path = Path("./hatecheck-data")

In [None]:
hatecheck_df = pd.read_csv(data_path/"test_suite_cases.csv", index_col=0)

In [None]:
hatecheck_df.label_gold.replace({'hateful': 1, 'non-hateful': 0}, inplace=True)

## Leave one functionality out

In [None]:
funcs = pd.unique(hatecheck_df.functionality); funcs

In [None]:
# check CUDA availability
if torch.cuda.is_available():
    print(torch.cuda.get_device_name())
    print(torch.cuda.device_count(), 'GPUs')
else:
    print("Oops! No GPU found.")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
tokenizer = BertTokenizerFast.from_pretrained(Path("./hatecheck-experiments/Models/BERT_davidson2017_weighted/Final"))

In [None]:
def preprocess_data_one_out(df, held_out_col, funcs, tokenizer, task_train_df=None):
    df_seen = df[~(df[held_out_col].isin(funcs))].copy()
    df_unseen = df[(df[held_out_col].isin(funcs))].copy()
    df_train, df_valtest = train_test_split(df_seen, test_size=0.5, stratify=df_seen.label_gold, random_state=42)
    df_valtest = pd.concat([df_valtest, df_unseen])
    df_val, df_test = train_test_split(df_valtest, test_size=0.5, stratify=df_valtest.label_gold, random_state=42)
    
    train_texts = df_train.test_case.astype("string").tolist()
    val_texts = df_val.test_case.astype("string").tolist()
    test_texts = df_test.test_case.astype("string").tolist()

    train_labels = df_train.label_gold.tolist()
    val_labels = df_val.label_gold.tolist()
    test_labels = df_test.label_gold.tolist()
    
    if task_train_df is not None:
        train_texts += task_train_df.text.astype("string").tolist()
        train_labels += task_train_df.label.tolist()
        
    
    class_weights = compute_class_weight('balanced', classes = np.unique(train_labels), y = train_labels)
    
    train_encodings = tokenizer(train_texts, truncation=True, padding=True)
    val_encodings = tokenizer(val_texts, truncation=True, padding=True)
    test_encodings = tokenizer(test_texts, truncation=True, padding=True)
        
    train_dataset = HateDataset(train_encodings, train_labels)
    val_dataset = HateDataset(val_encodings, val_labels)
    test_dataset = HateDataset(test_encodings, test_labels)
    return train_dataset, val_dataset, test_dataset, class_weights, df_train, df_val, df_test

In [None]:
def train_model(train_config, train_dataset, val_dataset, name):
    # Use best hperparameters found during the fixed held out fine tuning experiments
    if train_config == "davidson2017":
        training_args = TrainingArguments(
        save_steps = 2500,
        output_dir="./hatecheck-experiments/Models/leave1out/BERT_davidson2017_weighted_leave1out/checkpoints", # output directory
        save_strategy="no",
        num_train_epochs=4,              # total number of training epochs
        per_device_train_batch_size=32,  # batch size per device during training
        per_device_eval_batch_size=64,   # batch size for evaluation
        evaluation_strategy = 'epoch',
        warmup_steps=500,                # number of warmup steps for learning rate scheduler
        weight_decay=0.01,               # strength of weight decay
        learning_rate = 2e-5,
        seed = 123,
        disable_tqdm=True
        )
        model = BertForSequenceClassification.from_pretrained(f"./hatecheck-experiments/Models/BERT_davidson2017_weighted/Final")
        model_path = f"./hatecheck-experiments/Models/leave1out/BERT_{train_config}_hatecheck_weighted_leaveOut_{name}/Final"
        
    elif train_config == "founta2018":
        training_args = TrainingArguments(
        save_steps = 2500,
        output_dir="./hatecheck-experiments/Models/leave1out/BERT_founta2018_weighted_leave1out/checkpoints", # output directory
        save_strategy="no",
        num_train_epochs=3,              # total number of training epochs
        per_device_train_batch_size=32,  # batch size per device during training
        per_device_eval_batch_size=64,   # batch size for evaluation
        evaluation_strategy = 'epoch',
        warmup_steps=500,                # number of warmup steps for learning rate scheduler
        weight_decay=0.01,               # strength of weight decay
        learning_rate = 3e-5,
        seed = 123,
        disable_tqdm=True
        )
        model = BertForSequenceClassification.from_pretrained(f"./hatecheck-experiments/Models/BERT_founta2018_weighted/Final")
        model_path = f"./hatecheck-experiments/Models/leave1out/BERT_{train_config}_hatecheck_weighted_leaveOut_{name}/Final"
        
    elif train_config == "hateCheck+davidson":
        training_args = TrainingArguments(
        save_steps = 2500,
        output_dir=f"./models/leave1out/BERT_{train_config}_weighted_leave1out/checkpoints", # output directory
        save_strategy="no",
        num_train_epochs=4,              # total number of training epochs
        per_device_train_batch_size=16,  # batch size per device during training
        per_device_eval_batch_size=64,   # batch size for evaluation
        evaluation_strategy = 'epoch',
        warmup_steps=500,                # number of warmup steps for learning rate scheduler
        weight_decay=0.01,               # strength of weight decay
        learning_rate = 5e-5,
        seed = 123,
        disable_tqdm=True
        )
        model = BertForSequenceClassification.from_pretrained("bert-base-uncased")
        model.resize_token_embeddings(len(tokenizer))
        model_path = f"./models/leave1out/BERT_{train_config}_weighted_leaveOut_{name}/final"
    
    class WeightedTrainer(Trainer):
        def compute_loss(self, model, inputs, return_outputs=False):
            labels = inputs.pop("labels")
            outputs = model(**inputs)
            logits = outputs[0]
            weighted_loss = torch.nn.CrossEntropyLoss(weight=torch.FloatTensor(class_weights)).to(device)
            return (weighted_loss(logits,labels), outputs) if return_outputs else weighted_loss(logits, labels)
    
    trainer = WeightedTrainer(
        model=model,
        args=training_args,                  
        train_dataset=train_dataset,         
        eval_dataset=val_dataset,            
    )
    trainer.train()
    trainer.save_model(model_path)

In [None]:
def evaluate_model(train_config, train_dataset, val_dataset, test_dataset, name):
    if train_config == "davidson2017":
        model = BertForSequenceClassification.from_pretrained(f"./hatecheck-experiments/Models/leave1out/BERT_{train_config}_hatecheck_weighted_leaveOut_{name}/Final")
        model_path = f"./hatecheck-experiments/Models/leave1out/BERT_{train_config}_hatecheck_weighted_leave1out/Test"
        
    elif train_config == "founta2018":
        model = BertForSequenceClassification.from_pretrained(f"./hatecheck-experiments/Models/leave1out/BERT_{train_config}_hatecheck_weighted_leaveOut_{name}/Final")
        model_path = f"./hatecheck-experiments/Models/leave1out/BERT_{train_config}_hatecheck_weighted_leave1out/Test"
        
    elif train_config == "hateCheck+davidson":
        model = BertForSequenceClassification.from_pretrained(f"./models/leave1out/BERT_{train_config}_weighted_leaveOut_{name}/final")
        model_path = f"./models/leave1out/BERT_{train_config}_weighted_leave1out/Test"
    trainer = Trainer(
        model=model,         
        args=TrainingArguments(
            output_dir=(model_path),
            per_device_eval_batch_size = 64)
    )
    print("Train set results:")
    train_results, train_preds = get_results(trainer, train_dataset)
    print("Validation set results:")
    val_results, val_preds = get_results(trainer, val_dataset)
    print("Test set results:")
    test_results, test_preds = get_results(trainer, test_dataset)
    return train_preds, val_preds, test_preds

In [None]:
for train_config in ["davidson2017", "founta2018"]:
    for func in funcs:
        print(f"Preprocessing data leaving out {func}")
        train_dataset, val_dataset, test_dataset, class_weights, df_train, df_val, df_test = preprocess_data_one_out(hatecheck_df, "functionality", [func], tokenizer)
        print(f"Finetuning model pretrained on {train_config}")
        train_model(train_config, train_dataset, val_dataset, func)
        train_preds, val_preds, test_preds = evaluate_model(train_config, train_dataset, val_dataset, test_dataset, func)
        df_train["preds"], df_train["split"] = train_preds, "train"
        df_val["preds"], df_val["split"] = val_preds, "val"
        df_test["preds"], df_test["split"] = test_preds, "test"
        preds_df = pd.concat([df_train, df_val, df_test], axis= 0)
        preds_df[["case_id", "preds", "split"]].to_pickle(f'./results/hatecheck/leave1out/results_BERT_{train_config}_weighted_leaveOut_{func}.pkl')

### Data augmentation

In [None]:
training_data = pd.read_pickle('./hatecheck-experiments/Data/Clean Training Data/training_data_binary.pkl')

df_raw = {}

# write to dict
for dataset in training_data:
    df_raw[dataset] = training_data[dataset].copy()
    
df_train_target = {}

for dataset in df_raw:
    df_train_target[dataset], _ = train_test_split(df_raw[dataset], test_size=0.2, stratify=df_raw[dataset].label, random_state=123)

In [None]:
for train_config in ["hateCheck+davidson"]:
    for func in funcs:
        print(f"Preprocessing data leaving out {func}")
        train_dataset, val_dataset, test_dataset, class_weights, df_train, df_val, df_test = preprocess_data_one_out(hatecheck_df, "functionality", [func], tokenizer, df_train_target["davidson2017"])
        print(f"Finetuning model with configuration {train_config}")
        train_model(train_config, train_dataset, val_dataset, func)
        train_preds, val_preds, test_preds = evaluate_model(train_config, train_dataset, val_dataset, test_dataset, func)
        df_train["preds"], df_train["split"] = train_preds[:len(df_train)], "train"
        df_val["preds"], df_val["split"] = val_preds, "val"
        df_test["preds"], df_test["split"] = test_preds, "test"
        preds_df = pd.concat([df_train, df_val, df_test], axis= 0)
        preds_df[["case_id", "preds", "split"]].to_pickle(f'./results/hatecheck/leave1out/results_BERT_{train_config}_weighted_leaveOut_{func}.pkl')

## Leave one identity out

In [None]:
idents = pd.unique(hatecheck_df["target_ident"].dropna()); idents

In [None]:
for pretrained_data in ["davidson2017", "founta2018"]:
    for func in idents:
        print(f"Preprocessing data leaving out {func}")
        train_dataset, val_dataset, test_dataset, class_weights, df_train, df_val, df_test = preprocess_data_one_out(hatecheck_df, "target_ident", [func], tokenizer)
        print(f"Finetuning model pretrained on {pretrained_data}")
        train_model(pretrained_data, train_dataset, val_dataset, func)
        train_preds, val_preds, test_preds = evaluate_model(pretrained_data, train_dataset, val_dataset, test_dataset, func)
        df_train["preds"], df_train["split"] = train_preds, "train"
        df_val["preds"], df_val["split"] = val_preds, "val"
        df_test["preds"], df_test["split"] = test_preds, "test"
        preds_df = pd.concat([df_train, df_val, df_test], axis= 0)
        preds_df[["case_id", "preds", "split"]].to_pickle(f'./results/hatecheck/leave1out/results_BERT_{pretrained_data}_weighted_leaveOut_{func}.pkl')

### Data augmentation

In [None]:
for train_config in ["hateCheck+davidson"]:
    for func in idents:
        print(f"Preprocessing data leaving out {func}")
        train_dataset, val_dataset, test_dataset, class_weights, df_train, df_val, df_test = preprocess_data_one_out(hatecheck_df, "functionality", [func], tokenizer, df_train_target["davidson2017"])
        print(f"Finetuning model with configuration {train_config}")
        train_model(train_config, train_dataset, val_dataset, func)
        train_preds, val_preds, test_preds = evaluate_model(train_config, train_dataset, val_dataset, test_dataset, func)
        df_train["preds"], df_train["split"] = train_preds[:len(df_train)], "train"
        df_val["preds"], df_val["split"] = val_preds, "val"
        df_test["preds"], df_test["split"] = test_preds, "test"
        preds_df = pd.concat([df_train, df_val, df_test], axis= 0)
        preds_df[["case_id", "preds", "split"]].to_pickle(f'./results/hatecheck/leave1out/results_BERT_{train_config}_weighted_leaveOut_{func}.pkl')

## Leave one cluster out

In [None]:
clusters = {}
for func in funcs:
    clusters.setdefault(func.split("_")[0], []).append(func)

In [None]:
clusters

In [None]:
for pretrained_data in ["davidson2017", "founta2018"]:
    for k, cluster in clusters.items():
        print(f"Preprocessing data leaving out {k}")
        train_dataset, val_dataset, test_dataset, class_weights, df_train, df_val, df_test = preprocess_data_one_out(hatecheck_df, "functionality", cluster, tokenizer)
        print(f"Finetuning model pretrained on {pretrained_data}")
        train_model(pretrained_data, train_dataset, val_dataset, k)
        train_preds, val_preds, test_preds = evaluate_model(pretrained_data, train_dataset, val_dataset, test_dataset, k)
        df_train["preds"], df_train["split"] = train_preds, "train"
        df_val["preds"], df_val["split"] = val_preds, "val"
        df_test["preds"], df_test["split"] = test_preds, "test"
        preds_df = pd.concat([df_train, df_val, df_test], axis= 0)
        preds_df[["case_id", "preds", "split"]].to_pickle(f'./results/hatecheck/leave1out/results_BERT_{pretrained_data}_weighted_leaveOut_{k}.pkl')

### Data augmentation

In [None]:
for train_config in ["hateCheck+davidson"]:
    for k, cluster in clusters.items():
        print(f"Preprocessing data leaving out {k}")
        train_dataset, val_dataset, test_dataset, class_weights, df_train, df_val, df_test = preprocess_data_one_out(hatecheck_df, "functionality", cluster, tokenizer, df_train_target["davidson2017"])
        print(f"Finetuning model with configuration {train_config}")
        train_model(train_config, train_dataset, val_dataset, k)
        train_preds, val_preds, test_preds = evaluate_model(train_config, train_dataset, val_dataset, test_dataset, k)
        df_train["preds"], df_train["split"] = train_preds[:len(df_train)], "train"
        df_val["preds"], df_val["split"] = val_preds, "val"
        df_test["preds"], df_test["split"] = test_preds, "test"
        preds_df = pd.concat([df_train, df_val, df_test], axis= 0)
        preds_df[["case_id", "preds", "split"]].to_pickle(f'./results/hatecheck/leave1out/results_BERT_{train_config}_weighted_leaveOut_{k}.pkl')