<a href="https://colab.research.google.com/github/mrinaltak/HateSpeechDetection/blob/main/HatEval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Baseline
subtask_b

In [None]:
# Use Google Colab
use_colab = True
try:
    from google.colab import drive
    colab_available = True
except:
    colab_available = False

if use_colab and colab_available:
    drive.mount('/content/drive')

    # cd to the appropriate working directory under my Google Drive
    %cd '/content/drive/My Drive/685'

    !pip install -r requirements.txt

!pip install sentencepiece

from datasets import load_dataset

import sys
import os
import random
import shutil
import copy
import inspect


import numpy as np
import torch
import transformers
import datasets
import sklearn.metrics
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import tqdm

from transformers import BertForSequenceClassification, AdamW, BertConfig, BertTokenizer


In [None]:
assert torch.cuda.is_available()
dataset = load_dataset('csv', data_files={'train': 'hateval2019_en_train.csv','test': 'hateval2019_en_test.csv','valid': 'hateval2019_en_dev.csv'})
# Get the GPU device name.
device_name = torch.cuda.get_device_name()
n_gpu = torch.cuda.device_count()
print(f"Found device: {device_name}, n_gpu: {n_gpu}")
device = torch.device("cuda")

train_dataloader = torch.utils.data.DataLoader(dataset['train'], shuffle=True, batch_size=8)
val_dataloader = torch.utils.data.DataLoader(dataset['valid'], shuffle=True, batch_size=8)
test_dataloader = torch.utils.data.DataLoader(dataset['test'], shuffle=True, batch_size=8)

model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
    num_labels = 2, # The number of output labels.   
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False, # Whether the model returns all hidden-states.
    cache_dir='./bert_cache'
)

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tell pytorch to run this model on the GPU.
model.cuda()

batch_size = 99
optimizer = AdamW(model.parameters(),
                  lr = 5e-5, # args.learning_rate - default is 5e-5
                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8
                )
epochs = 5


In [3]:
import numpy as np
from sklearn.metrics import classification_report
# function to get validation accuracy
def get_validation_performance(val_dataloader):
    # Put the model in evaluation mode
    model.eval()
    y_pred = []
    y_true = []
    # Tracking variables 
    total_eval_accuracy = 0
    #total_eval_loss = 0

    total_correct = 0

    for batch in val_dataloader:
      if len(batch) == 0: continue
      inputs = tokenizer(batch['text'],padding='longest',return_tensors="pt").to(device)
      labels = batch['TR'].to(device)
      # Tell pytorch not to bother with constructing the compute graph during
      # the forward pass, since this is only needed for backprop (training).
      with torch.no_grad():        

        # Forward pass, calculate logit predictions.
        outputs = model(**inputs)
        loss = outputs.loss
        logits = outputs.logits
            
        # Accumulate the validation loss.
        #total_eval_loss += loss.item()
        
        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = labels.to('cpu').numpy()

        # Calculate the number of correctly labeled examples in batch
        pred_flat = np.argmax(logits, axis=1).flatten()
        labels_flat = label_ids.flatten()
        num_correct = np.sum(pred_flat == labels_flat)
        total_correct += num_correct
        y_true.extend(labels_flat)
        y_pred.extend(pred_flat)
    # Report the final accuracy for this validation run.
    avg_val_accuracy = total_correct / (len(val_dataloader) * val_dataloader.batch_size)
    report = classification_report(y_true=y_true, y_pred=y_pred)
    print ("val accuracy: ", report)
    return avg_val_accuracy


In [4]:
import random

for epoch_i in range(0, epochs):
    # Perform one full pass over the training set.
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')
    total_train_loss = 0
    model.train()
    for batch in tqdm.notebook.tqdm(train_dataloader):
      if len(batch) == 0: continue
      inputs = tokenizer(batch['text'],padding='longest',return_tensors="pt").to(device)
      labels = batch['TR'].to(device)
      # Clear the previously calculated gradient
      model.zero_grad()        
      # Perform a forward pass (evaluate the model on this training batch).
      outputs = model(**inputs, labels=labels)
      loss = outputs.loss
      logits = outputs.logits
      total_train_loss += loss.item()
      # Perform a backward pass to calculate the gradients.
      loss.backward()
      # Update parameters and take a step using the computed gradient.
      optimizer.step()
    # ========================================
    #               Validation
    # ========================================
    print(f"Total loss: {total_train_loss}")
    val_acc = get_validation_performance(val_dataloader)
    print(f"Validation accuracy: {val_acc}")
    torch.save(model.state_dict(),os.path.join('./trained_model_hateval_baseline_subtask_b/', 'subtask_b_baseline_{}.pt'.format(epoch_i)))
    
print("")
print("Training complete!")

#get_validation_performance(test_dataloader)


Training...


  0%|          | 0/1125 [00:00<?, ?it/s]

Total loss: 299.29262269684114
val accuracy:                precision    recall  f1-score   support

           0       0.90      0.89      0.90       781
           1       0.63      0.63      0.63       219

    accuracy                           0.84      1000
   macro avg       0.76      0.76      0.76      1000
weighted avg       0.84      0.84      0.84      1000

Validation accuracy: 0.837

Training...


  0%|          | 0/1125 [00:00<?, ?it/s]

Total loss: 231.7446395503357
val accuracy:                precision    recall  f1-score   support

           0       0.96      0.87      0.91       781
           1       0.65      0.85      0.74       219

    accuracy                           0.87      1000
   macro avg       0.80      0.86      0.82      1000
weighted avg       0.89      0.87      0.87      1000

Validation accuracy: 0.867

Training...


  0%|          | 0/1125 [00:00<?, ?it/s]

Total loss: 163.26337245467585
val accuracy:                precision    recall  f1-score   support

           0       0.90      0.93      0.91       781
           1       0.71      0.63      0.67       219

    accuracy                           0.86      1000
   macro avg       0.81      0.78      0.79      1000
weighted avg       0.86      0.86      0.86      1000

Validation accuracy: 0.863

Training...


  0%|          | 0/1125 [00:00<?, ?it/s]

Total loss: 108.95744873471267
val accuracy:                precision    recall  f1-score   support

           0       0.88      0.96      0.92       781
           1       0.77      0.53      0.63       219

    accuracy                           0.86      1000
   macro avg       0.82      0.74      0.77      1000
weighted avg       0.85      0.86      0.85      1000

Validation accuracy: 0.862

Training...


  0%|          | 0/1125 [00:00<?, ?it/s]

Total loss: 67.43394142648322
val accuracy:                precision    recall  f1-score   support

           0       0.86      0.97      0.91       781
           1       0.80      0.43      0.56       219

    accuracy                           0.85      1000
   macro avg       0.83      0.70      0.74      1000
weighted avg       0.85      0.85      0.83      1000

Validation accuracy: 0.852

Training complete!


In [7]:
model.load_state_dict(torch.load(os.path.join('./trained_model_hateval_baseline_subtask_b/', 'subtask_b_baseline_4.pt')))

get_validation_performance(test_dataloader)

val accuracy:                precision    recall  f1-score   support

           0       0.90      0.91      0.91      2471
           1       0.56      0.55      0.55       529

    accuracy                           0.84      3000
   macro avg       0.73      0.73      0.73      3000
weighted avg       0.84      0.84      0.84      3000



0.8436666666666667