In [1]:
# !pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Import Libraries

In [3]:
import numpy as np
import pandas as pd
import transformers
import torch.optim as optim
import torch.nn as nn
import torch
import string
import re

from torch import cuda
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig
from transformers import AdamW, get_linear_schedule_with_warmup
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, roc_curve, auc

import matplotlib.pyplot as plt
from tqdm.auto import tqdm

# Read Data

In [5]:
path = '/content/drive/MyDrive/Colab Notebooks/data/toxic/'
device = 'cuda' if cuda.is_available() else 'cpu'

In [6]:
# Load train data
df = pd.read_csv(path+'train.csv')
LABEL_COLUMNS = df.columns.tolist()[2:]

In [7]:
# Load test data
test_df = pd.read_csv(path+'test.csv')
test_labels_df = pd.read_csv(path+'test_labels.csv')
test_df = test_df.merge(test_labels_df, on='id', how='left')
test_label_cols = list(test_df.columns[2:])

test_df = test_df[~test_df[test_label_cols].eq(-1).any(axis=1)] #remove irrelevant rows/comments with -1 values

# Preprocessing

In [8]:
# # Preprocess text using lemmatization and word tokenization
def clean(text):
    # remove emojis
    regrex_pattern = re.compile(pattern = "["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags = re.UNICODE)
    text = regrex_pattern.sub(r'',text)
    # remove links and hashtags
    text = text.replace('\r', '').replace('\n', ' ').replace('\n', ' ').lower() #remove \n and \r and lowercase
    text = re.sub(r"(?:\@|https?\://)\S+", "", text) #remove links and mentions
    text = re.sub(r'[^\x00-\x7f]',r'', text) #remove non utf8/ascii characters such as '\x9a\x91\x97\x9a\x97'
    banned_list= string.punctuation + 'Ã'+'±'+'ã'+'¼'+'â'+'»'+'§'
    table = str.maketrans('', '', banned_list)
    text = text.translate(table)
    text = " ".join(word.strip() for word in re.split('#(?!(?:hashtag)\b)[\w-]+(?=(?:\s+#[\w-]+)*\s*$)', text)) #remove last hashtags
    text = " ".join(word.strip() for word in re.split('#|_', text)) #remove hashtags symbol from words in the middle of the sentence
    return text

def clean_df(df):
  rows = []
  for idx in tqdm(range(len(df))):
      row = df.iloc[idx].copy()
      
      # first we remove numeric characters and lowercase everything
      # cleaned_headline = re.sub("[^A-Za-z]+", ' ', row['comment_text'].replace('<br />', ' ')).lower()
      cleaned_headline = clean(row['comment_text'])

      if len(cleaned_headline) > 1:
          row['comment_text'] = cleaned_headline
      rows.append(row)
  df = pd.DataFrame(rows)
  return df

In [9]:
Clean train text and write to new file
df = clean_df(df)
df.to_csv('/content/drive/MyDrive/Colab Notebooks/data/toxic/toxic_cleaned.csv', index=False)

In [10]:
Clean test text and write to new file
test_df = clean_df(test_df)
test_df.to_csv('/content/drive/MyDrive/Colab Notebooks/data/toxic/toxic_cleaned_test.csv', index=False)

In [11]:
# Read cleaned data
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/data/toxic/toxic_cleaned.csv')
test_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/data/toxic/toxic_cleaned_test.csv')

In [12]:
# Split into train and validation sets
train_df, val_df = train_test_split(df, test_size=0.2)
train_df.shape, val_df.shape, test_df.shape #test hs only ids and comment_text

((127656, 8), (31915, 8), (63978, 8))

# Dataset

In [13]:
class CommentDataset(Dataset):
    def __init__(self, df, tokenizer):
        self.df = df
        self.tokenizer = tokenizer
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        comment = row['comment_text']
        y = torch.tensor(row[LABEL_COLUMNS]).float()
        
        # bert model will want all of these
        # max_length here is 512
        output_dict = tokenizer(comment, 
                                max_length=200,
                                padding="max_length", 
                                return_tensors="pt",
                                truncation=True,  
                                add_special_tokens=True, # [CLS] & [SEP]
                                return_token_type_ids=False,
                                return_attention_mask=True #attention_mask                                                 
                                )
        
        # had to squeeze it to get it to work
        return (torch.squeeze(output_dict['input_ids']),
                torch.squeeze(output_dict['attention_mask']), y)
    

# Model

In [14]:
# Let's use pretrained BERT
class ToxicModel(nn.Module):
    def __init__(self, model, hidden_size):
        super(ToxicModel, self).__init__()
        
        # freeze the parameters
        for param in model.parameters():
            param.requires_grad = False
        self.bert = model
        
        # size of embedding vector for BERT is 768
        self.linear1 = nn.Linear(768, hidden_size)
        self.linear2 = nn.Linear(hidden_size, 6)
        self.relu = nn.ReLU()
        
    def forward(self, input_ids, attention_mask):
        # get the bert emebedding
        outputs = self.bert(input_ids, attention_mask)
        x = outputs[0][:,0,:]
        # x = outputs[0].mean(axis=1)
        
        x = self.relu(self.linear1(x))
        x = self.linear2(x)
        return torch.squeeze(x)

# # make sure everything is working here...
# input_ids, token_type_ids, attention_mask, y = next(iter(dl_train))
# input_ids = input_ids.cuda()
# token_type_ids =  token_type_ids.cuda()
# attention_mask = attention_mask.cuda()
# y = y.cuda()
# BERT_model = BERT_sarcasm(model, 50).cuda()
# BERT_model(input_ids, token_type_ids, attention_mask).shape

# One Training Pass

In [15]:
# have to alter these functions to make them work
def one_pass(model, dataloader, optimizer, lossFun, backwards=True, print_loss=False):
    
    if backwards == True:
        model.train()
    else:
        model.eval()
        best_valid_loss = float('inf')

    y_hats = []
    ys = []        
    
    total_loss = 0.0
    for input_ids, attention_mask, y in tqdm(dataloader):
        input_ids = input_ids.cuda()
        attention_mask = attention_mask.cuda()
        y = y.cuda()

        y_pred = model(input_ids, attention_mask)
        loss = lossFun(y_pred, y)
        total_loss += loss.item()
        
        if backwards == True:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # deactivate autograd
        with torch.no_grad():    
          y_pred = y_pred.detach().cpu().numpy()
          y = y.detach().cpu().numpy()

          y_hats.append(y_pred)
          ys.append(y)  

    avg_loss = total_loss / len(dataloader)
    
    if print_loss == True:
        print(avg_loss)

    #save the best model
    if backwards == False:    
      if avg_loss < best_valid_loss:
          best_valid_loss = avg_loss
          # save your model parameters and optimizer checkpoint
          print('Saving model...')
          torch.save(model, path + model_name + model_suffix)       

    ys = np.concatenate(ys)
    y_hats = np.concatenate(y_hats)

    y = np.array(ys)
    y_pred = np.array(y_hats>0.5)   

    acc = metrics.accuracy_score(y, y_pred)    
    macro_f1 = metrics.f1_score(y, y_pred, average='macro')
    micro_f1 = metrics.f1_score(y, y_pred, average='micro')
    macro_auc = metrics.roc_auc_score(y, y_pred, average='macro')
    micro_auc = metrics.roc_auc_score(y, y_pred, average='micro')
    classification_report = metrics.classification_report(y, y_pred, 
                                                  target_names=LABEL_COLUMNS, 
                                                  zero_division=0) 

    result = {
        'avg_loss': avg_loss,
        'acc': acc,
        'macro_f1': macro_f1,
        'micro_f1': micro_f1,
        'macro_auc': macro_auc,
        'micro_auc': micro_auc,
        'cr': classification_report
    }
                     
    return result

# Run Encapsulation for Experimentation

In [16]:
def run(model, tokenizer):
  ds_train = CommentDataset(train_df, tokenizer)
  ds_val = CommentDataset(val_df, tokenizer) 

  # had to turn the batch size down to keep my laptop from crashing
  dl_train = DataLoader(ds_train, batch_size=10, shuffle=True)
  dl_val = DataLoader(ds_val, batch_size=10, shuffle=True)

  BERT_model = ToxicModel(model, 100).cuda()
  optimizer = optim.Adam(BERT_model.parameters(), lr = 0.01)
  num_epochs = 5

  for epoch in tqdm(range(num_epochs)):
      print('Epoch: ', epoch)
      
      train_result = one_pass(BERT_model, dl_train, optimizer, lossFun)
      val_result = one_pass(BERT_model, dl_val, optimizer, lossFun, backwards=False)
      print('Train Accuracy: ', train_result['acc'], 'Train AUC: ', train_result['micro_auc'])
      print('Val Accuracy: ', val_result['acc'], 'Val AUC: ', val_result['micro_auc'])

  return train_result, val_result

# BERT

In [18]:
lossFun = nn.BCEWithLogitsLoss()

In [19]:
%%time
model_name = "bert-base-uncased"
model_suffix = '_custom.pt'

tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
model = transformers.AutoModel.from_pretrained(model_name)

train_result_bert, val_result_bert = run(model, tokenizer)
print(train_result_bert)
print(val_result_bert)
print(train_result_bert['cr'])
print(val_result_bert['cr'])

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch:  0


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9053785172651501 Train AUC:  0.6862450008008315
Val Accuracy:  0.9024596584678051 Val AUC:  0.7134842341023803
Epoch:  1


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9054333521338598 Train AUC:  0.6911320329059337
Val Accuracy:  0.9101676327745574 Val AUC:  0.6783304034780534
Epoch:  2


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9052766810804036 Train AUC:  0.6848909887408298
Val Accuracy:  0.9048409838633871 Val AUC:  0.6845590295602697
Epoch:  3


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9061853731904493 Train AUC:  0.7011148827796245
Val Accuracy:  0.9073789754034153 Val AUC:  0.6778974325872514
Epoch:  4


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9061148712163941 Train AUC:  0.7045566017864846
Val Accuracy:  0.9103242989190036 Val AUC:  0.7396258361502056
{'avg_loss': 0.07126087143084024, 'acc': 0.9061148712163941, 'macro_f1': 0.31541905943424325, 'micro_f1': 0.5516603854085187, 'macro_auc': 0.6153102324294107, 'micro_auc': 0.7045566017864846, 'cr': '               precision    recall  f1-score   support\n\n        toxic       0.83      0.54      0.66     12284\n severe_toxic       0.53      0.13      0.21      1284\n      obscene       0.89      0.40      0.56      6793\n       threat       0.00      0.00      0.00       379\n       insult       0.82      0.32      0.46      6355\nidentity_hate       0.09      0.00      0.01      1132\n\n    micro avg       0.83      0.41      0.55     28227\n    macro avg       0.53      0.23      0.32     28227\n weighted avg       0.79      0.41      0.53     28227\n  samples avg       0.05      0.04      0.04     28227\n'}
{'avg_loss': 0.06562626732858762

In [20]:
# Code for loading a saved model
model = torch.load(path+model_name+model_suffix)

In [21]:
ds_test = CommentDataset(test_df, tokenizer)
dl_test = DataLoader(ds_test, batch_size=10, shuffle=True)
test_result_bert = one_pass(model, dl_test, optimizer=None, lossFun=lossFun, backwards=False)
print(test_result_bert)
print(test_result_bert['cr'])

  0%|          | 0/6398 [00:00<?, ?it/s]

Saving model...
{'avg_loss': 0.08575877969197768, 'acc': 0.884757260308231, 'macro_f1': 0.3202236792082854, 'micro_f1': 0.534404327691488, 'macro_auc': 0.6314571328823974, 'micro_auc': 0.7202791821756225, 'cr': '               precision    recall  f1-score   support\n\n        toxic       0.60      0.61      0.61      6090\n severe_toxic       0.33      0.27      0.30       367\n      obscene       0.84      0.40      0.54      3691\n       threat       0.00      0.00      0.00       211\n       insult       0.74      0.35      0.48      3427\nidentity_hate       0.00      0.00      0.00       712\n\n    micro avg       0.66      0.45      0.53     14498\n    macro avg       0.42      0.27      0.32     14498\n weighted avg       0.65      0.45      0.51     14498\n  samples avg       0.06      0.04      0.04     14498\n'}
               precision    recall  f1-score   support

        toxic       0.60      0.61      0.61      6090
 severe_toxic       0.33      0.27      0.30       367

# RoBERTa

In [22]:
%%time
model_name = "roberta-base"
model_suffix = '_custom.pt'

tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
model = transformers.AutoModel.from_pretrained(model_name)

train_result_roberta, val_result_roberta = run(model, tokenizer)
print(train_result_roberta)
print(val_result_roberta)
print(train_result_roberta['cr'])
print(val_result_roberta['cr'])

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch:  0


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9052766810804036 Train AUC:  0.6882587010765023
Val Accuracy:  0.9086949710167633 Val AUC:  0.7102880345057125
Epoch:  1


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9072429028012784 Train AUC:  0.7050135820462075
Val Accuracy:  0.9144916183612721 Val AUC:  0.7424744535322004
Epoch:  2


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.907015729773767 Train AUC:  0.7117116978830056
Val Accuracy:  0.9104182986056714 Val AUC:  0.7051518910920139
Epoch:  3


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9081829291220154 Train AUC:  0.7098225787261838
Val Accuracy:  0.9112642957856807 Val AUC:  0.707271981074337
Epoch:  4


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9072977376699881 Train AUC:  0.7087062223769328
Val Accuracy:  0.9059376468745104 Val AUC:  0.7307686713031578
{'avg_loss': 0.06739765105410281, 'acc': 0.9072977376699881, 'macro_f1': 0.3206965925206796, 'micro_f1': 0.5600509566161033, 'macro_auc': 0.6197107678102249, 'micro_auc': 0.7087062223769328, 'cr': '               precision    recall  f1-score   support\n\n        toxic       0.86      0.48      0.62     12284\n severe_toxic       0.53      0.09      0.15      1284\n      obscene       0.85      0.48      0.61      6793\n       threat       0.00      0.00      0.00       379\n       insult       0.80      0.40      0.53      6355\nidentity_hate       0.24      0.00      0.01      1132\n\n    micro avg       0.84      0.42      0.56     28227\n    macro avg       0.55      0.24      0.32     28227\n weighted avg       0.79      0.42      0.54     28227\n  samples avg       0.04      0.04      0.04     28227\n'}
{'avg_loss': 0.058384310381334126

In [23]:
# Code for loading a saved model
model = torch.load(path+model_name+model_suffix)

In [24]:
%%time
ds_test = CommentDataset(test_df, tokenizer)
dl_test = DataLoader(ds_test, batch_size=10, shuffle=True)
test_result_roberta = one_pass(model, dl_test, optimizer=None, lossFun=lossFun, backwards=False)
print(test_result_roberta)
print(test_result_roberta['cr'])

  0%|          | 0/6398 [00:00<?, ?it/s]

Saving model...
{'avg_loss': 0.08223213152600853, 'acc': 0.8832723748788646, 'macro_f1': 0.26556843407356373, 'micro_f1': 0.5353742613263297, 'macro_auc': 0.6121656606428323, 'micro_auc': 0.7204313763462974, 'cr': '               precision    recall  f1-score   support\n\n        toxic       0.62      0.63      0.63      6090\n severe_toxic       0.47      0.04      0.07       367\n      obscene       0.69      0.58      0.63      3691\n       threat       0.00      0.00      0.00       211\n       insult       0.87      0.16      0.26      3427\nidentity_hate       0.00      0.00      0.00       712\n\n    micro avg       0.66      0.45      0.54     14498\n    macro avg       0.44      0.23      0.27     14498\n weighted avg       0.66      0.45      0.49     14498\n  samples avg       0.06      0.04      0.05     14498\n'}
               precision    recall  f1-score   support

        toxic       0.62      0.63      0.63      6090
 severe_toxic       0.47      0.04      0.07       

In [25]:
# Addressing imbalance using class weights
y = train_df[LABEL_COLUMNS].values
pos_weight = (y==0).sum(axis=0) / y.sum(axis=0)

pos_weight = torch.Tensor(pos_weight).to(device)
lossFun = nn.BCEWithLogitsLoss(weight = pos_weight)

# BERT with Weights

In [26]:
%%time
model_name = "bert-base-uncased"
model_suffix = '_custom_weights.pt'

tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
model = transformers.AutoModel.from_pretrained(model_name)

train_result_bert2, val_result_bert2 = run(model, tokenizer)
print(train_result_bert2)
print(val_result_bert2)
print(train_result_bert2['cr'])
print(val_result_bert2['cr'])

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch:  0


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9028561133045059 Train AUC:  0.6804031745422142
Val Accuracy:  0.9096976343412189 Val AUC:  0.7088836222741518
Epoch:  1


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9045481606818324 Train AUC:  0.7100942993350614
Val Accuracy:  0.9102929656901144 Val AUC:  0.6736941066769921
Epoch:  2


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9064203797706336 Train AUC:  0.7141579184044787
Val Accuracy:  0.9074103086323045 Val AUC:  0.7759086495408506
Epoch:  3


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9058406968728457 Train AUC:  0.7170785807451514
Val Accuracy:  0.9091336362212126 Val AUC:  0.6744163862305027
Epoch:  4


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9056526916086983 Train AUC:  0.713792308336261
Val Accuracy:  0.9123922920256933 Val AUC:  0.7273291876482152
{'avg_loss': 2.7985853446536884, 'acc': 0.9056526916086983, 'macro_f1': 0.29513356871437174, 'micro_f1': 0.5574752045340281, 'macro_auc': 0.6171367321352116, 'micro_auc': 0.713792308336261, 'cr': '               precision    recall  f1-score   support\n\n        toxic       0.84      0.48      0.61     12284\n severe_toxic       0.00      0.00      0.00      1284\n      obscene       0.76      0.49      0.59      6793\n       threat       0.00      0.00      0.00       379\n       insult       0.72      0.46      0.56      6355\nidentity_hate       0.00      0.00      0.00      1132\n\n    micro avg       0.79      0.43      0.56     28227\n    macro avg       0.39      0.24      0.30     28227\n weighted avg       0.71      0.43      0.54     28227\n  samples avg       0.04      0.04      0.04     28227\n'}
{'avg_loss': 2.6745751070123887, 'a

In [27]:
# Code for loading a saved model
model = torch.load(path+model_name+model_suffix)

In [28]:
ds_test = CommentDataset(test_df, tokenizer)
dl_test = DataLoader(ds_test, batch_size=10, shuffle=True)
test_result_bert2 = one_pass(model, dl_test, optimizer=None, lossFun=lossFun, backwards=False)
print(test_result_bert2)
print(test_result_bert2['cr'])

  0%|          | 0/6398 [00:00<?, ?it/s]

Saving model...
{'avg_loss': 3.05101368570422, 'acc': 0.8981681202913502, 'macro_f1': 0.27517010546897464, 'micro_f1': 0.5244787515207451, 'macro_auc': 0.6129000117743526, 'micro_auc': 0.7114013690874209, 'cr': '               precision    recall  f1-score   support\n\n        toxic       0.68      0.49      0.57      6090\n severe_toxic       0.00      0.00      0.00       367\n      obscene       0.68      0.48      0.56      3691\n       threat       0.00      0.00      0.00       211\n       insult       0.64      0.44      0.52      3427\nidentity_hate       0.00      0.00      0.00       712\n\n    micro avg       0.67      0.43      0.52     14498\n    macro avg       0.33      0.23      0.28     14498\n weighted avg       0.61      0.43      0.51     14498\n  samples avg       0.04      0.04      0.04     14498\n'}
               precision    recall  f1-score   support

        toxic       0.68      0.49      0.57      6090
 severe_toxic       0.00      0.00      0.00       367

# RoBERTa with Weights

In [29]:
%%time
model_name = "roberta-base"
model_suffix = '_custom_weights.pt'

tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
model = transformers.AutoModel.from_pretrained(model_name)

train_result_roberta2, val_result_roberta2 = run(model, tokenizer)
print(train_result_roberta2)
print(val_result_roberta2)
print(train_result_roberta2['cr'])
print(val_result_roberta2['cr'])

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch:  0


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.903177288964091 Train AUC:  0.6674602065005428
Val Accuracy:  0.9057183142722858 Val AUC:  0.7486068739777044
Epoch:  1


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9046264962085605 Train AUC:  0.6945078406397094
Val Accuracy:  0.9126116246279179 Val AUC:  0.7400147666430955
Epoch:  2


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.904563827787178 Train AUC:  0.6929588503739776
Val Accuracy:  0.9074729750900831 Val AUC:  0.7039072673979277
Epoch:  3


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.904524660023814 Train AUC:  0.7027840910254187
Val Accuracy:  0.9115149616167946 Val AUC:  0.7273224786336727
Epoch:  4


  0%|          | 0/12766 [00:00<?, ?it/s]

  0%|          | 0/3192 [00:00<?, ?it/s]

Saving model...
Train Accuracy:  0.9054411856865325 Train AUC:  0.7036304948562908
Val Accuracy:  0.9101989660034466 Val AUC:  0.6991976164446793
{'avg_loss': 2.6444143689768094, 'acc': 0.9054411856865325, 'macro_f1': 0.29476688250677735, 'micro_f1': 0.5461266776548152, 'macro_auc': 0.6112336565685679, 'micro_auc': 0.7036304948562908, 'cr': '               precision    recall  f1-score   support\n\n        toxic       0.84      0.49      0.62     12284\n severe_toxic       0.46      0.03      0.05      1284\n      obscene       0.81      0.45      0.58      6793\n       threat       0.00      0.00      0.00       379\n       insult       0.77      0.39      0.52      6355\nidentity_hate       0.20      0.00      0.00      1132\n\n    micro avg       0.81      0.41      0.55     28227\n    macro avg       0.51      0.23      0.29     28227\n weighted avg       0.76      0.41      0.53     28227\n  samples avg       0.04      0.03      0.04     28227\n'}
{'avg_loss': 2.5562601819754645, 

In [30]:
# Code for loading a saved model
model = torch.load(path+model_name+model_suffix)

In [31]:
%%time
ds_test = CommentDataset(test_df, tokenizer)
dl_test = DataLoader(ds_test, batch_size=10, shuffle=True)
test_result_roberta2 = one_pass(model, dl_test, optimizer=None, lossFun=lossFun, backwards=False)
print(test_result_roberta2)
print(test_result_roberta2['cr'])

  0%|          | 0/6398 [00:00<?, ?it/s]

Saving model...
{'avg_loss': 2.97805666646921, 'acc': 0.8987777048360374, 'macro_f1': 0.25429930327636135, 'micro_f1': 0.4906377456433074, 'macro_auc': 0.5938844070446236, 'micro_auc': 0.6801261432816338, 'cr': '               precision    recall  f1-score   support\n\n        toxic       0.71      0.46      0.56      6090\n severe_toxic       0.00      0.00      0.00       367\n      obscene       0.81      0.37      0.51      3691\n       threat       0.00      0.00      0.00       211\n       insult       0.77      0.32      0.45      3427\nidentity_hate       0.00      0.00      0.00       712\n\n    micro avg       0.75      0.37      0.49     14498\n    macro avg       0.38      0.19      0.25     14498\n weighted avg       0.69      0.37      0.47     14498\n  samples avg       0.04      0.03      0.03     14498\n'}
               precision    recall  f1-score   support

        toxic       0.71      0.46      0.56      6090
 severe_toxic       0.00      0.00      0.00       367