In [2]:
import pandas as pd
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import transformers
from sklearn.metrics import accuracy_score,f1_score


<h1>Data Preprocessing + dataloading</h1>

In [3]:
from tokenize_B import tokenize_BERT



# import + preprocess the data
def preprocessing(tuple):     
    # changing labels 0,...,7 to one-hot encoded list
    labels = tuple[3]
    l = []
    for i in range(len(labels)):
        list_class = [0] * 8
        list_class[int(labels[i])] = 1
        l.append(list_class)
        
    new_tuple = (tuple[0], tuple[1], tuple[2], torch.tensor(l))
    return new_tuple

def get_class(output):
    l = []
    for pred in output:
        class_pred = [0] * 8
        idx = np.argmax(pred)
        class_pred[idx] = 1.0
        l.append(class_pred)
    return l

train_data, val_data, test_data = tokenize_BERT()
all_labels = torch.cat((train_data[3], val_data[3], test_data[3]))
WEIGHTS = 1 / (torch.sqrt(torch.unique(all_labels, return_counts = True)[1])).to('cuda')

# Custom the data for our need
class HateSpeechData(Dataset):
    def __init__(self, X):
        self.X = (X[1], X[2])
        self.y = X[3]
        self.id = X[0]
        
    def __getitem__(self, index):
        # get the item out of the tuple
        inputs_id = self.X[0][index]
        attention_mask = self.X[1][index]
        label = self.y[index]
        # create dictionnary
        item = {
            'input_ids':inputs_id,
            'attention_mask':attention_mask,
            'labels':label
        }
        return item
    
    def __len__(self):
        return len(self.X[1])
    

# Dataloader
def data_loader(data,batch_size):
    
    # preprocessing
    data = preprocessing(data)

    # Map style for Dataloader
    dataset = HateSpeechData(data)

    # dataloader
    dataloader_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)

    return dataloader_loader
    

<h1>Define Model + Train script + Validation script</h1>

In [17]:
class BERTForFineTuningtWithPooling(torch.nn.Module):
    def __init__(self):
        super(BERTForFineTuningtWithPooling, self).__init__()
        # first layer is the bert
        self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased', output_hidden_states = True)
        # apply a dropout
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 8)
    
    def forward(self, ids, mask):
        outputs = self.l1(ids, attention_mask=mask)
        pooled_output = outputs[1]
        output_2 = self.l2(pooled_output)
        output = self.l3(output_2)
        return outputs.hidden_states, output

def loss_fn(outputs, targets):
    # return torch.nn.BCEWithLogitsLoss(pos_weight=WEIGHTS)(outputs, targets)
    return torch.nn.BCEWithLogitsLoss()(outputs, targets) 


def validation(validation_loader, model):

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    fin_targets=[]
    fin_outputs=[]
    running_loss = 0.0

    with torch.no_grad():
        for _, data in enumerate(validation_loader, 0):

            ids = data['input_ids'].to(device, dtype = torch.long)
            mask = data['attention_mask'].to(device, dtype = torch.long)
            targets = data['labels'].to(device, dtype = torch.float)
            
            # forward
            _,output = model.forward(ids, mask)
            # evaluate the loss
            loss = loss_fn(output, targets)

            # adding to list
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(output).cpu().detach().numpy().tolist())

            # add the loss to the running loss
            running_loss+=loss.item()

    return fin_outputs, fin_targets, running_loss/len(validation_loader)


def training_model(nb_epochs, train_dataloader, val_dataloader, patience):
    """
    This function trains the model on training data
    """
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = BERTForFineTuningtWithPooling()
    model.to(device)
    optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-5)
    best_val_loss = np.inf
    # keep track of the performances
    summary = []


    for epoch in range(nb_epochs):
            # dict containing the information
        report_epoch = {
                'epoch': epoch,
                'training_loss': 0.0,
                'valid_loss':0.0,
                'valid_accuracy':0.0,
                'valid_f1_macro':0.0
            }
        model.train()
        running_loss = 0.0

        for i, data in enumerate(train_dataloader, 0):

            ids = data['input_ids'].to(device, dtype = torch.long)
            attention_mask = data['attention_mask'].to(device, dtype = torch.long)
            labels = data['labels'].to(device, dtype = torch.float)
            
             # initialize the optimizer
            optimizer.zero_grad()
            #forward inputs
            _, output = model.forward(ids, attention_mask)
            # define the loss
            loss = loss_fn(output, labels)
            # backpropagate
            loss.backward()
            # print("Capturing:", torch.cuda.is_current_stream_capturing())
            optimizer.step()
            # add the loss to the running loss
            running_loss+=loss.item()
            
            print('\rEpoch: {}\tbatch: {}\tLoss =  {:.3f}'.format(epoch, i, loss), end="")

        running_loss = running_loss / len(train_dataloader)
        report_epoch['training_loss'] = running_loss
        print("\n")
        # validation
        model.eval()
        with torch.no_grad():

            outputs, targets, val_loss = validation(validation_loader=val_dataloader, model= model)
            # getting the predominant class
            outputs = get_class(outputs)
            outputs = np.array(outputs)

            report_epoch['valid_accuracy'] = accuracy_score(targets, outputs)
            report_epoch['valid_f1_macro'] = f1_score(targets, outputs, average='macro')
            print(f"Epoch {epoch+1}: train CE loss = {running_loss}", 
                  f"|| Valid: CE loss = {val_loss}   acc = {report_epoch['valid_accuracy']}   macro-F1 = {report_epoch['valid_f1_macro']}")
            

        # early-stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            dict_model = model.state_dict()
            pat = 0
        else:
            pat += 1
            print("pat ", pat)
            if pat == patience:
                print("Early Stopping: Validation Loss did not decrease for", patience, "epochs.")
                break
        
        print("\n")
        # add performances of the epoch to the overall summary
        summary.append(report_epoch)

    torch.save(dict_model, 'new_learning_rate.pt')
    
    return summary

<h1>Load data + Run model</h1>

In [None]:
# batch size is 4 
train_loader = data_loader(train_data, 4)
valid_loader = data_loader(val_data, 4)

# summary get all info about performance
summary = training_model(nb_epochs = 3, train_dataloader = train_loader, val_dataloader = valid_loader, patience = 2)

<h1>Testing the Models and showcase results</h1>

In [10]:
def tester(model_path, testloader, path_to_save_metrics = None):

    # set the device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # list containing all the targets and outputs
    targets=[]
    outputs=[]

    # load the model
    model = BERTForFineTuningtWithPooling()
    model.load_state_dict(torch.load(model_path))
    model.to(device)
    
    # test model
    model.eval()
    with torch.no_grad():
        for data in testloader:

            ids = data['input_ids'].to(device, dtype = torch.long)
            attention_mask = data['attention_mask'].to(device, dtype = torch.long)
            labels = data['labels'].to(device, dtype = torch.float)

            _, output = model.forward(ids, attention_mask)
            # adding to list
            targets.extend(labels.cpu().detach().numpy().tolist())
            outputs.extend(torch.sigmoid(output).cpu().detach().numpy().tolist())
    
    # get the prediction
    outputs = get_class(outputs)
    outputs = np.array(outputs)
    # generate the metrics we want to showcase knowing : number of samples/class, accuracy per class, macro f1 per class, accuracy on the testset,
    # normalized accuracy ( average of the accuracy on each class), normalized macro f1 ( average of the f1 on each class)
    # building a dict thaht contains all of the info

    list_summary_per_class = [{
            'nb_sample':0,
            'accuracy':0.0,
            'macro_f1':0.0
        },
        {
            'nb_sample':0,
            'accuracy':0.0,
            'macro_f1':0.0
        },
        {
            'nb_sample':0,
            'accuracy':0.0,
            'macro_f1':0.0
        },
        {
            'nb_sample':0,
            'accuracy':0.0,
            'macro_f1':0.0
        },
        {
            'nb_sample':0,
            'accuracy':0.0,
            'macro_f1':0.0
        },
        {
            'nb_sample':0,
            'accuracy':0.0,
            'macro_f1':0.0
        },
        {
            'nb_sample':0,
            'accuracy':0.0,
            'macro_f1':0.0
        },
        {
            'nb_sample':0,
            'accuracy':0.0,
            'macro_f1':0.0
        }]

    nbr_class = len(list_summary_per_class)
    # calculating all the metrics for each class
    for i in range(nbr_class):
        output_class = []
        target_class = []
        # looping over all the targets
        for j in range(len(targets)):
            if targets[j][i]==1:
                list_summary_per_class[i]['nb_sample'] +=1
                output_class.append(outputs[j])
                target_class.append(targets[j])

        output_class = np.array(output_class)
        target_class = np.array(target_class)

        list_summary_per_class[i]['accuracy'] = accuracy_score(target_class, output_class)
        list_summary_per_class[i]['macro_f1'] = f1_score(target_class, output_class, average='macro')

    # calculate the other metrics
    overall_accuracy = accuracy_score(outputs, targets)
    overall_f1_macro = f1_score(outputs, targets, average='macro')

    # calculating the normalized accuracy
    normalized_accuracy = 0.0
    normalized_f1 = 0.0

    for i in range(nbr_class):
        normalized_accuracy += list_summary_per_class[i]['accuracy']
        normalized_f1 += list_summary_per_class[i]['macro_f1']

    normalized_accuracy *= (1/nbr_class)
    normalized_f1 *= (1/nbr_class)

    # adding the result to the list of results
    overall_summary = {
        'nb_sample': len(outputs),
        'accuracy': overall_accuracy,
        'macro_f1': overall_f1_macro
    }
    normalized_summary = {
        'nb_sample': len(outputs),
        'accuracy':normalized_accuracy,
        'macro_f1':normalized_f1
    }

    list_summary_per_class.append(overall_summary)
    list_summary_per_class.append(normalized_summary)

    if path_to_save_metrics:
        # create a dataframe out of all of this
        df = pd.DataFrame(list_summary_per_class)
        df.to_csv(path_to_save_metrics)

    return list_summary_per_class


In [6]:
testloader = data_loader(test_data, 4)

In [14]:
model_path = 'fine_tuned_bert1.pt'
list_summary_per_class = tester(model_path, testloader)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf

In [15]:
list_summary_per_class

[{'nb_sample': 3843, 'accuracy': 0.0, 'macro_f1': 0.0},
 {'nb_sample': 458, 'accuracy': 0.0, 'macro_f1': 0.0},
 {'nb_sample': 210, 'accuracy': 0.0, 'macro_f1': 0.0},
 {'nb_sample': 266,
  'accuracy': 0.03759398496240601,
  'macro_f1': 0.009057971014492752},
 {'nb_sample': 716,
  'accuracy': 0.0824022346368715,
  'macro_f1': 0.01903225806451613},
 {'nb_sample': 337,
  'accuracy': 0.07418397626112759,
  'macro_f1': 0.017265193370165743},
 {'nb_sample': 19, 'accuracy': 0.0, 'macro_f1': 0.0},
 {'nb_sample': 69,
  'accuracy': 0.8405797101449275,
  'macro_f1': 0.11417322834645668},
 {'nb_sample': 5918,
  'accuracy': 0.02568435282189929,
  'macro_f1': 0.020797695528918886},
 {'nb_sample': 5918,
  'accuracy': 0.12934498825066656,
  'macro_f1': 0.019941081349453913}]

list_summary_per_class