In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import sys
import random
import torch
import torch.nn as nn
import shutil
import numpy as np
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler, random_split
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup
import time
import datetime
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score

In [None]:
if torch.cuda.is_available():
    # torch.cuda.set_device(0)
    device = torch.device('cuda')
    print('Using GPU: ', torch.cuda.current_device())
else:
    device = torch.device('cpu')

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
print(device)

Using GPU:  0
cuda


In [None]:
import pandas as pd
import csv
import string

In [None]:
train_data=pd.read_csv('drive/MyDrive/olid-training-v1.0.tsv', delimiter='\t', encoding='utf-8')

train_tweets = train_data[['tweet']] #Extract tweets
train_task_a_labels= train_data[['subtask_a']] #Extract subtsak_a labels
train_task_b_labels= train_data[['subtask_b']] #Extract subtsak_b labels
train_task_c_labels= train_data[['subtask_c']] #Extract subtsak_c labels

train_task_a_labels.columns.values[0] = 'class_a' #Rename class attribute
train_task_b_labels.columns.values[0] = 'class_b' #Rename class attribute
train_task_c_labels.columns.values[0] = 'class_c' #Rename class attribute

print(train_data)


          id                                              tweet subtask_a  \
0      86426  @USER She should ask a few native Americans wh...       OFF   
1      90194  @USER @USER Go home you’re drunk!!! @USER #MAG...       OFF   
2      16820  Amazon is investigating Chinese employees who ...       NOT   
3      62688  @USER Someone should'veTaken" this piece of sh...       OFF   
4      43605  @USER @USER Obama wanted liberals &amp; illega...       NOT   
...      ...                                                ...       ...   
13235  95338  @USER Sometimes I get strong vibes from people...       OFF   
13236  67210  Benidorm ✅  Creamfields ✅  Maga ✅   Not too sh...       NOT   
13237  82921  @USER And why report this garbage.  We don't g...       OFF   
13238  27429                                        @USER Pussy       OFF   
13239  46552  #Spanishrevenge vs. #justice #HumanRights and ...       NOT   

      subtask_b subtask_c  
0           UNT       NaN  
1           TIN    

In [None]:
def clean_tweets(df):
    
    punctuations = string.punctuation
    
    df.loc[:, 'tweet'] = df.tweet.str.replace('@USER', '') #Remove mentions (@USER)
    df.loc[:, 'tweet'] = df.tweet.str.replace('URL', '') #Remove URLs
    df.loc[:, 'tweet'] = df.tweet.str.replace('&amp', 'and') #Replace ampersand (&) with and
    df.loc[:, 'tweet'] = df.tweet.str.replace('&lt','') #Remove &lt
    df.loc[:, 'tweet'] = df.tweet.str.replace('&gt','') #Remove &gt
    df.loc[:, 'tweet'] = df.tweet.str.replace('\d+','') #Remove numbers

    #Remove punctuations
    for punctuation in punctuations:
        df.loc[:, 'tweet'] = df.tweet.str.replace(punctuation, '')

    df.loc[:, 'tweet'] = df.astype(str).apply(
        lambda x: x.str.encode('ascii', 'ignore').str.decode('ascii')
    ) #Remove emojis
    df.loc[:, 'tweet'] = df.tweet.str.strip()

In [None]:
clean_tweets(train_tweets)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'tweet'] = df.tweet.str.replace('@USER', '') #Remove mentions (@USER)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'tweet'] = df.tweet.str.replace('URL', '') #Remove URLs
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'tweet'] = df.tweet.str.replace('&amp', 'and') #R

In [None]:
train_task_a_data = train_tweets.join(train_task_a_labels)

train_task_b_data = train_tweets.join(train_task_b_labels)
train_task_b_data = train_task_b_data.dropna() #Drop records with missing values

train_task_c_data = train_tweets.join(train_task_c_labels)
train_task_c_data = train_task_c_data.dropna() #Drop records with missing values

#Apply quotes to cleaned tweets
train_task_a_data.update(train_task_a_data[['tweet']].applymap('\'{}\''.format))
train_task_b_data.update(train_task_b_data[['tweet']].applymap('\'{}\''.format))
train_task_c_data.update(train_task_c_data[['tweet']].applymap('\'{}\''.format))

In [None]:
print(train_task_a_data)

                                                   tweet class_a
0      'She should ask a few native Americans what th...     OFF
1                      'Go home youre drunk  MAGA Trump'     OFF
2      'Amazon is investigating Chinese employees who...     NOT
3      'Someone shouldveTaken this piece of shit to a...     OFF
4      'Obama wanted liberals and illegals to move in...     NOT
...                                                  ...     ...
13235  'Sometimes I get strong vibes from people and ...     OFF
13236  'Benidorm   Creamfields   Maga    Not too shab...     NOT
13237  'And why report this garbage  We dont give a c...     OFF
13238                                            'Pussy'     OFF
13239  'Spanishrevenge vs justice HumanRights and Fre...     NOT

[13240 rows x 2 columns]


In [None]:
test_tweet_a=pd.read_csv('drive/MyDrive/testset-levela.tsv', delimiter='\t', encoding='utf-8')
test_tweet_b=pd.read_csv('drive/MyDrive/testset-levelb.tsv', delimiter='\t', encoding='utf-8')
test_tweet_c=pd.read_csv('drive/MyDrive/testset-levelc.tsv', delimiter='\t', encoding='utf-8')

#Read tweet labels
test_label_a=pd.read_csv('drive/MyDrive/labels-levela.csv', encoding='utf-8', 
                         index_col=False, names=['id', 'class_a'])
test_label_b=pd.read_csv('drive/MyDrive/labels-levelb.csv', encoding='utf-8', 
                         index_col=False, names=['id', 'class_b'])
test_label_c=pd.read_csv('drive/MyDrive/labels-levelc.csv', encoding='utf-8', 
                         index_col=False, names=['id', 'class_c'])

#Merge tweets with labels by id
test_tweet_a = test_tweet_a.merge(test_label_a, on='id')
test_tweet_b = test_tweet_b.merge(test_label_b, on='id')
test_tweet_c = test_tweet_c.merge(test_label_c, on='id')

# #Drop id column
# test_tweet_a = test_tweet_a.drop(columns='id')
# test_tweet_b = test_tweet_b.drop(columns='id')
# test_tweet_c = test_tweet_c.drop(columns='id')

#Clean tweets in test sets
clean_tweets(test_tweet_a)
clean_tweets(test_tweet_b)
clean_tweets(test_tweet_c)

#Apply quotes to cleaned tweets
test_tweet_a.update(test_tweet_a[['tweet']].applymap('\'{}\''.format))
test_tweet_b.update(test_tweet_b[['tweet']].applymap('\'{}\''.format))
test_tweet_c.update(test_tweet_c[['tweet']].applymap('\'{}\''.format))
print(test_tweet_a.head())

      id                                              tweet class_a
0  15923  'WhoIsQ WheresTheServer DumpNike DECLASFISA De...     OFF
1  27014  'ConstitutionDay is revered by Conservatives h...     NOT
2  30530  'FOXNews NRA MAGA POTUS TRUMP ndAmendment RNC ...     NOT
3  13876  'Watching Boomer getting the news that she is ...     NOT
4  60133  'NoPasaran Unity demo to oppose the farright i...     OFF


  df.loc[:, 'tweet'] = df.tweet.str.replace('\d+','') #Remove numbers
  df.loc[:, 'tweet'] = df.tweet.str.replace(punctuation, '')


In [None]:
print(test_tweet_a)


        id                                              tweet class_a
0    15923  'WhoIsQ WheresTheServer DumpNike DECLASFISA De...     OFF
1    27014  'ConstitutionDay is revered by Conservatives h...     NOT
2    30530  'FOXNews NRA MAGA POTUS TRUMP ndAmendment RNC ...     NOT
3    13876  'Watching Boomer getting the news that she is ...     NOT
4    60133  'NoPasaran Unity demo to oppose the farright i...     OFF
..     ...                                                ...     ...
855  73439  'DespicableDems lie again about rifles Dem Dis...     OFF
856  25657  'MeetTheSpeakers   will present in our event O...     NOT
857  67018  'people just unfollowed me for talking about m...     OFF
858  50665  'WednesdayWisdom Antifa calls the right fascis...     NOT
859  24583            'Kavanaugh typical liberals  Democrats'     NOT

[860 rows x 3 columns]


In [None]:
train_task_a_data

Unnamed: 0,tweet,class_a
0,'She should ask a few native Americans what th...,OFF
1,'Go home youre drunk MAGA Trump',OFF
2,'Amazon is investigating Chinese employees who...,NOT
3,'Someone shouldveTaken this piece of shit to a...,OFF
4,'Obama wanted liberals and illegals to move in...,NOT
...,...,...
13235,'Sometimes I get strong vibes from people and ...,OFF
13236,'Benidorm Creamfields Maga Not too shab...,NOT
13237,'And why report this garbage We dont give a c...,OFF
13238,'Pussy',OFF


In [None]:
df = train_task_a_data.assign(Offensive=0)
train_df = df.assign(NotOffensive=0)
train_df

Unnamed: 0,tweet,class_a,Offensive,NotOffensive
0,'She should ask a few native Americans what th...,OFF,0,0
1,'Go home youre drunk MAGA Trump',OFF,0,0
2,'Amazon is investigating Chinese employees who...,NOT,0,0
3,'Someone shouldveTaken this piece of shit to a...,OFF,0,0
4,'Obama wanted liberals and illegals to move in...,NOT,0,0
...,...,...,...,...
13235,'Sometimes I get strong vibes from people and ...,OFF,0,0
13236,'Benidorm Creamfields Maga Not too shab...,NOT,0,0
13237,'And why report this garbage We dont give a c...,OFF,0,0
13238,'Pussy',OFF,0,0


In [None]:
for index in train_df.index:
    k = train_df['class_a'][index]
    if k == 'OFF':
        train_df['Offensive'][index] = 1
        train_df['NotOffensive'][index] = 0
    else:
        train_df['Offensive'][index] = 0
        train_df['NotOffensive'][index] = 1
train_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['Offensive'][index] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['NotOffensive'][index] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['Offensive'][index] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['NotOffensive'][index] = 1


Unnamed: 0,tweet,class_a,Offensive,NotOffensive
0,'She should ask a few native Americans what th...,OFF,1,0
1,'Go home youre drunk MAGA Trump',OFF,1,0
2,'Amazon is investigating Chinese employees who...,NOT,0,1
3,'Someone shouldveTaken this piece of shit to a...,OFF,1,0
4,'Obama wanted liberals and illegals to move in...,NOT,0,1
...,...,...,...,...
13235,'Sometimes I get strong vibes from people and ...,OFF,1,0
13236,'Benidorm Creamfields Maga Not too shab...,NOT,0,1
13237,'And why report this garbage We dont give a c...,OFF,1,0
13238,'Pussy',OFF,1,0


In [None]:
train_df

Unnamed: 0,tweet,class_a,Offensive,NotOffensive
0,'She should ask a few native Americans what th...,OFF,1,0
1,'Go home youre drunk MAGA Trump',OFF,1,0
2,'Amazon is investigating Chinese employees who...,NOT,0,1
3,'Someone shouldveTaken this piece of shit to a...,OFF,1,0
4,'Obama wanted liberals and illegals to move in...,NOT,0,1
...,...,...,...,...
13235,'Sometimes I get strong vibes from people and ...,OFF,1,0
13236,'Benidorm Creamfields Maga Not too shab...,NOT,0,1
13237,'And why report this garbage We dont give a c...,OFF,1,0
13238,'Pussy',OFF,1,0


In [None]:
train_df.columns

Index(['tweet', 'class_a', 'Offensive', 'NotOffensive'], dtype='object')

In [None]:
categories = train_df.columns[2:]
counts = []
for category in categories:
    counts.append((category, train_df[category].sum()))
df_stats = pd.DataFrame(counts, columns=['category', 'number of comments'])
df_stats

Unnamed: 0,category,number of comments
0,Offensive,4400
1,NotOffensive,8840


In [None]:
target_list = categories

In [None]:
from transformers import AutoModel, AutoTokenizer

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import sys
import random
import torch
import torch.nn as nn
import shutil
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler, random_split
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup
from transformers import AutoModel, AutoTokenizer
import time
import datetime
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score

In [None]:
tokenizer = AutoTokenizer.from_pretrained('ai4bharat/indic-bert')

In [None]:
def tokenizeWithBert(example):
  encodings = tokenizer.encode_plus(
    example,
    add_special_tokens = True,   # tokens CLS, PAD, SEP
    max_length = 512, #MAX_LEN
    padding = 'max_length',
    truncation = True,
    return_attention_mask = True,
    return_tensors = 'pt'
  )
  return encodings

In [None]:
encodings = tokenizeWithBert("Hello world")

In [None]:
encodings

{'input_ids': tensor([[ 101, 7592, 2088,  102,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,

In [None]:
text = 'New Delhi is the capital of India'  #"Here is the sentence I want embeddings for."
marked_text = "[CLS] " + text + " [SEP]"

# Tokenize our sentence with the BERT tokenizer.
tokenized_text = tokenizer.tokenize(marked_text)

# Print out the tokens.
print (tokenized_text)

['[CLS]', 'new', 'delhi', 'is', 'the', 'capital', 'of', 'india', '[SEP]']


In [None]:
encodingsCapital1 = tokenizeWithBert("New Delhi is the capital of India")

In [None]:
encodingsCapital1

{'input_ids': tensor([[ 101, 2047, 6768, 2003, 1996, 3007, 1997, 2634,  102,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,

In [None]:
def get_dataset(df, tokenizer, mode='train'):
    sentences, labels = df['tweet'], df.iloc[:,2:].to_numpy()
    max_length = 300
    in_T = []
    in_T_attn_masks = []
    for sentence in sentences:
        enc_sent_dict = tokenizer.encode_plus(
            sentence[:300],
            max_length = max_length,
            add_special_tokens = True,
            pad_to_max_length = True,
            return_attention_mask = True,
            return_tensors = 'pt'
        )
        in_T.append(enc_sent_dict['input_ids'])
        in_T_attn_masks.append(enc_sent_dict['attention_mask'])
    
    in_T = torch.cat(in_T, dim=0)
    in_T_attn_masks = torch.cat(in_T_attn_masks, dim=0)
    labels = torch.tensor(labels, dtype = torch.float32)
    print('Text Input: ' , in_T.shape)
    print('Text Input Attention: ' , in_T_attn_masks.shape)    
    print('Labels: ' , labels.shape)
    
    dataset = TensorDataset(
        in_T,
        in_T_attn_masks,
        labels
    )
    
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    return train_dataset, val_dataset

In [None]:
tokenizer = AutoTokenizer.from_pretrained('ai4bharat/indic-bert')
train_dataset, val_dataset = get_dataset(
    train_df,
    tokenizer = tokenizer,
    mode = 'train'
)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Text Input:  torch.Size([13240, 300])
Text Input Attention:  torch.Size([13240, 300])
Labels:  torch.Size([13240, 2])


In [None]:
batch_size = 8
train_dataloader = DataLoader(
    train_dataset,
    batch_size = batch_size,
    sampler = RandomSampler(train_dataset)
)
val_dataloader = DataLoader(
    val_dataset,
    batch_size = batch_size,
    sampler = SequentialSampler(val_dataset)
)

print('Data Ready!!')

Data Ready!!


In [None]:
import transformers
transformers.__version__

'4.25.1'

In [None]:
import torch.nn.functional as F
from transformers import BertModel

class MultiClassClassifier(nn.Module):
    def __init__(self, hidden_dim, num_labels):
        super(MultiClassClassifier, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_labels = num_labels
        
        self.bertmodel = AutoModel.from_pretrained('ai4bharat/indic-bert')
        self.ffn1 = nn.Linear(768, hidden_dim)
        self.dp1 = nn.Dropout()
        self.ffn2 = nn.Linear(hidden_dim, num_labels)
        
    def forward(self, in_T, in_T_attn_masks):
        outputs = self.bertmodel(in_T, in_T_attn_masks)
        x = torch.mean(outputs.last_hidden_state, dim=1) 
        x = F.relu(self.ffn1(x))
        x = self.dp1(x)
        x = torch.sigmoid(self.ffn2(x))
        return x

In [None]:
model = MultiClassClassifier(100, 2).to(device) # 100 hidden dimension, 2 lables
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5, eps=1e-8) # Adam with weight decay
criterion = nn.BCELoss()

Downloading pytorch_model.bin:   0%|          | 0.00/135M [00:00<?, ?B/s]

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertModel: ['predictions.LayerNorm.weight', 'predictions.dense.weight', 'sop_classifier.classifier.weight', 'predictions.decoder.bias', 'sop_classifier.classifier.bias', 'predictions.bias', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
#format time
import datetime

def format_time(elapsed):
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [None]:
#TRAINING and VALIDATION
epochs = 3   #5, reduced to one epoch as it is taking lot of time
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer,
                                           num_warmup_steps = 0,
                                           num_training_steps = total_steps)


training_stats = []
total_t0 = time.time()

best_val_loss = 1e8
true_labels = val_dataset[:][2].numpy()

for epoch_i in range(0, epochs):

    #############               Training
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    t0 = time.time()

    total_train_loss = 0
    model.train()

    for step, batch in enumerate(train_dataloader):

        if step % 5 == 0 and not step == 0:
            elapsed = format_time(time.time() - t0)
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}. Loss: {:.5f}'.format(step, len(train_dataloader), elapsed, total_train_loss/step))

        b_in_T            = batch[0].to(device)
        b_in_T_attn_masks = batch[1].to(device)
        b_labels          = batch[2].to(device)
        
        model.zero_grad()

        logits = model(b_in_T, b_in_T_attn_masks)
        loss = criterion(logits, b_labels)

        total_train_loss += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_train_loss / len(train_dataloader)

    # Measure how long this epoch took.
    training_time = format_time(time.time() - t0)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(training_time))

    ##########               Validation
   
    print("")
    print("Running Validation...")

    t0 = time.time()

    model.eval()

    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0

    pred_labels = np.empty((0,2))

    # Evaluate data for one epoch
    for batch in val_dataloader:
        
        b_in_T            = batch[0].to(device)
        b_in_T_attn_masks = batch[1].to(device)
        b_labels          = batch[2].to(device)

        with torch.no_grad():
            logits = model(b_in_T, b_in_T_attn_masks)
            loss = criterion(logits, b_labels)

        # Accumulate the validation loss.
        total_eval_loss += loss.item()

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        pred_labels = np.concatenate((pred_labels, logits), axis=0)


    # Calculate the average loss over all of the batches.
    avg_val_loss = total_eval_loss / len(val_dataloader)

    # Measure how long the validation run took.
    validation_time = format_time(time.time() - t0)

    pred_labels = np.array([[int(x >= 0.25) for x in pred_labels[:,i]] for i  in range(2)]).transpose()

    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))

#     Report the final accuracy, f1-score for this validation run.
    for i in range(2):
        print("  Accuracy: {0:.2f}".format(accuracy_score(true_labels[:,i], pred_labels[:,i])))

    for i in range(2):
        print("  Macro F1-score: {0:.2f}".format(f1_score(true_labels[:,i], pred_labels[:,i], average='macro')))

    for i in range(2):
        print("  Weighted F1-score: {0:.2f}".format(f1_score(true_labels[:,i], pred_labels[:,i], average='weighted')))

    print('Classification Report:')
    for i in range(2):
        print(classification_report(true_labels[:,i], pred_labels[:,i]))

    print('Confusion Matrix:')
    for i in range(2):
        print(confusion_matrix(true_labels[:,i], pred_labels[:,i]))

    # Record all statistics from this epoch.
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'training_loss': avg_train_loss,
            'val_loss': avg_val_loss,
            'val_accuracy': np.mean([accuracy_score(true_labels[:,i], pred_labels[:,i]) for i in range(2)]),
            'val_macro_f1': np.mean([f1_score(true_labels[:,i], pred_labels[:,i], average='macro') for i in range(2)]),
            'val_weighted_f1': np.mean([f1_score(true_labels[:,i], pred_labels[:,i], average='weighted') for i in range(2)]),
            'training_time': training_time,
            'val_tim': validation_time
        }
    )

    model_path = 'model_state_dict_'+str(epoch_i)+'.pt'
    torch.save(model.state_dict(), model_path)


Training...
  Batch     5  of  1,324.    Elapsed: 0:00:03. Loss: 0.68169
  Batch    10  of  1,324.    Elapsed: 0:00:04. Loss: 0.68726
  Batch    15  of  1,324.    Elapsed: 0:00:06. Loss: 0.68930
  Batch    20  of  1,324.    Elapsed: 0:00:08. Loss: 0.68156
  Batch    25  of  1,324.    Elapsed: 0:00:10. Loss: 0.66997
  Batch    30  of  1,324.    Elapsed: 0:00:12. Loss: 0.66491
  Batch    35  of  1,324.    Elapsed: 0:00:13. Loss: 0.66935
  Batch    40  of  1,324.    Elapsed: 0:00:15. Loss: 0.66525
  Batch    45  of  1,324.    Elapsed: 0:00:17. Loss: 0.66014
  Batch    50  of  1,324.    Elapsed: 0:00:19. Loss: 0.66351
  Batch    55  of  1,324.    Elapsed: 0:00:21. Loss: 0.65660
  Batch    60  of  1,324.    Elapsed: 0:00:23. Loss: 0.65563
  Batch    65  of  1,324.    Elapsed: 0:00:24. Loss: 0.65423
  Batch    70  of  1,324.    Elapsed: 0:00:26. Loss: 0.65225
  Batch    75  of  1,324.    Elapsed: 0:00:28. Loss: 0.65228
  Batch    80  of  1,324.    Elapsed: 0:00:30. Loss: 0.65225
  Batch    

In [None]:
model_path = 'model_state_dict.pt'
torch.save(model.state_dict(), model_path)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
modelPathDrive = '/content/drive/MyDrive/Bert.pt'
torch.save(model.state_dict(), modelPathDrive)

In [None]:
state_dict = torch.load('/content/drive/MyDrive/Bert.pt')
print(state_dict.keys())

odict_keys(['bertmodel.embeddings.position_ids', 'bertmodel.embeddings.word_embeddings.weight', 'bertmodel.embeddings.position_embeddings.weight', 'bertmodel.embeddings.token_type_embeddings.weight', 'bertmodel.embeddings.LayerNorm.weight', 'bertmodel.embeddings.LayerNorm.bias', 'bertmodel.encoder.embedding_hidden_mapping_in.weight', 'bertmodel.encoder.embedding_hidden_mapping_in.bias', 'bertmodel.encoder.albert_layer_groups.0.albert_layers.0.full_layer_layer_norm.weight', 'bertmodel.encoder.albert_layer_groups.0.albert_layers.0.full_layer_layer_norm.bias', 'bertmodel.encoder.albert_layer_groups.0.albert_layers.0.attention.query.weight', 'bertmodel.encoder.albert_layer_groups.0.albert_layers.0.attention.query.bias', 'bertmodel.encoder.albert_layer_groups.0.albert_layers.0.attention.key.weight', 'bertmodel.encoder.albert_layer_groups.0.albert_layers.0.attention.key.bias', 'bertmodel.encoder.albert_layer_groups.0.albert_layers.0.attention.value.weight', 'bertmodel.encoder.albert_layer_gr

In [None]:
model.load_state_dict(state_dict)

<All keys matched successfully>

In [None]:
print("Our model: \n\n", model, '\n')
print("The state dict keys: \n\n", model.state_dict().keys())

Our model: 

 MultiClassClassifier(
  (bertmodel): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(105879, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,),

In [None]:
df = pd.read_csv('drive/MyDrive/hindi_dataset.tsv',sep='\t')
df

Unnamed: 0,text_id,text,task_1,task_2,task_3
0,hasoc_hi_5556,"बांग्लादेश की शानदार वापसी, भारत को 314 रन पर ...",NOT,NONE,NONE
1,hasoc_hi_5648,सब रंडी नाच देखने मे व्यस्त जैसे ही कोई #शांती...,HOF,PRFN,UNT
2,hasoc_hi_164,तुम जैसे हरामियों के लिए बस जूतों की कमी है शु...,HOF,PRFN,TIN
3,hasoc_hi_3530,"बीजेपी MLA आकाश विजयवर्गीय जेल से रिहा, जमानत ...",NOT,NONE,NONE
4,hasoc_hi_5206,चमकी बुखार: विधानसभा परिसर में आरजेडी का प्रदर...,NOT,NONE,NONE
...,...,...,...,...,...
4660,hasoc_hi_6606,पाकिस्तान ने हिंदुओं के ख़िलाफ़ बोलने वाले को ...,NOT,NONE,NONE
4661,hasoc_hi_4931,कोहली है #नेहरू नहीं जो अंग्रेजों के तलवे चाटन...,HOF,PRFN,TIN
4662,hasoc_hi_1059,परशुराम? वही जिसने अपनी मां की हत्या की थीं?,NOT,NONE,NONE
4663,hasoc_hi_5429,जिस देश में #कन्हैया_कुमार जैसा पढ़ा लिखा युवा...,HOF,HATE,TIN


In [None]:
df.drop(['task_2','task_3'],inplace=True,axis=1)
df

Unnamed: 0,text_id,text,task_1
0,hasoc_hi_5556,"बांग्लादेश की शानदार वापसी, भारत को 314 रन पर ...",NOT
1,hasoc_hi_5648,सब रंडी नाच देखने मे व्यस्त जैसे ही कोई #शांती...,HOF
2,hasoc_hi_164,तुम जैसे हरामियों के लिए बस जूतों की कमी है शु...,HOF
3,hasoc_hi_3530,"बीजेपी MLA आकाश विजयवर्गीय जेल से रिहा, जमानत ...",NOT
4,hasoc_hi_5206,चमकी बुखार: विधानसभा परिसर में आरजेडी का प्रदर...,NOT
...,...,...,...
4660,hasoc_hi_6606,पाकिस्तान ने हिंदुओं के ख़िलाफ़ बोलने वाले को ...,NOT
4661,hasoc_hi_4931,कोहली है #नेहरू नहीं जो अंग्रेजों के तलवे चाटन...,HOF
4662,hasoc_hi_1059,परशुराम? वही जिसने अपनी मां की हत्या की थीं?,NOT
4663,hasoc_hi_5429,जिस देश में #कन्हैया_कुमार जैसा पढ़ा लिखा युवा...,HOF


In [None]:
df1 = df.assign(Offensive=0)
df1

Unnamed: 0,text_id,text,task_1,Offensive
0,hasoc_hi_5556,"बांग्लादेश की शानदार वापसी, भारत को 314 रन पर ...",NOT,0
1,hasoc_hi_5648,सब रंडी नाच देखने मे व्यस्त जैसे ही कोई #शांती...,HOF,0
2,hasoc_hi_164,तुम जैसे हरामियों के लिए बस जूतों की कमी है शु...,HOF,0
3,hasoc_hi_3530,"बीजेपी MLA आकाश विजयवर्गीय जेल से रिहा, जमानत ...",NOT,0
4,hasoc_hi_5206,चमकी बुखार: विधानसभा परिसर में आरजेडी का प्रदर...,NOT,0
...,...,...,...,...
4660,hasoc_hi_6606,पाकिस्तान ने हिंदुओं के ख़िलाफ़ बोलने वाले को ...,NOT,0
4661,hasoc_hi_4931,कोहली है #नेहरू नहीं जो अंग्रेजों के तलवे चाटन...,HOF,0
4662,hasoc_hi_1059,परशुराम? वही जिसने अपनी मां की हत्या की थीं?,NOT,0
4663,hasoc_hi_5429,जिस देश में #कन्हैया_कुमार जैसा पढ़ा लिखा युवा...,HOF,0


In [None]:
train_df = df1.assign(NotOffensive=0)
train_df

Unnamed: 0,text_id,text,task_1,Offensive,NotOffensive
0,hasoc_hi_5556,"बांग्लादेश की शानदार वापसी, भारत को 314 रन पर ...",NOT,0,0
1,hasoc_hi_5648,सब रंडी नाच देखने मे व्यस्त जैसे ही कोई #शांती...,HOF,0,0
2,hasoc_hi_164,तुम जैसे हरामियों के लिए बस जूतों की कमी है शु...,HOF,0,0
3,hasoc_hi_3530,"बीजेपी MLA आकाश विजयवर्गीय जेल से रिहा, जमानत ...",NOT,0,0
4,hasoc_hi_5206,चमकी बुखार: विधानसभा परिसर में आरजेडी का प्रदर...,NOT,0,0
...,...,...,...,...,...
4660,hasoc_hi_6606,पाकिस्तान ने हिंदुओं के ख़िलाफ़ बोलने वाले को ...,NOT,0,0
4661,hasoc_hi_4931,कोहली है #नेहरू नहीं जो अंग्रेजों के तलवे चाटन...,HOF,0,0
4662,hasoc_hi_1059,परशुराम? वही जिसने अपनी मां की हत्या की थीं?,NOT,0,0
4663,hasoc_hi_5429,जिस देश में #कन्हैया_कुमार जैसा पढ़ा लिखा युवा...,HOF,0,0


In [None]:
for index in train_df.index:
    k = train_df['task_1'][index]
    if k == 'HOF':
        train_df['Offensive'][index] = 1
        train_df['NotOffensive'][index] = 0
    else:
        train_df['Offensive'][index] = 0
        train_df['NotOffensive'][index] = 1
train_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['Offensive'][index] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['NotOffensive'][index] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['Offensive'][index] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['NotOffensive'][index] = 0


Unnamed: 0,text_id,text,task_1,Offensive,NotOffensive
0,hasoc_hi_5556,"बांग्लादेश की शानदार वापसी, भारत को 314 रन पर ...",NOT,0,1
1,hasoc_hi_5648,सब रंडी नाच देखने मे व्यस्त जैसे ही कोई #शांती...,HOF,1,0
2,hasoc_hi_164,तुम जैसे हरामियों के लिए बस जूतों की कमी है शु...,HOF,1,0
3,hasoc_hi_3530,"बीजेपी MLA आकाश विजयवर्गीय जेल से रिहा, जमानत ...",NOT,0,1
4,hasoc_hi_5206,चमकी बुखार: विधानसभा परिसर में आरजेडी का प्रदर...,NOT,0,1
...,...,...,...,...,...
4660,hasoc_hi_6606,पाकिस्तान ने हिंदुओं के ख़िलाफ़ बोलने वाले को ...,NOT,0,1
4661,hasoc_hi_4931,कोहली है #नेहरू नहीं जो अंग्रेजों के तलवे चाटन...,HOF,1,0
4662,hasoc_hi_1059,परशुराम? वही जिसने अपनी मां की हत्या की थीं?,NOT,0,1
4663,hasoc_hi_5429,जिस देश में #कन्हैया_कुमार जैसा पढ़ा लिखा युवा...,HOF,1,0


In [None]:
train_df.drop(['text_id'],inplace=True,axis=1)
train_df

Unnamed: 0,text,task_1,Offensive,NotOffensive
0,"बांग्लादेश की शानदार वापसी, भारत को 314 रन पर ...",NOT,0,1
1,सब रंडी नाच देखने मे व्यस्त जैसे ही कोई #शांती...,HOF,1,0
2,तुम जैसे हरामियों के लिए बस जूतों की कमी है शु...,HOF,1,0
3,"बीजेपी MLA आकाश विजयवर्गीय जेल से रिहा, जमानत ...",NOT,0,1
4,चमकी बुखार: विधानसभा परिसर में आरजेडी का प्रदर...,NOT,0,1
...,...,...,...,...
4660,पाकिस्तान ने हिंदुओं के ख़िलाफ़ बोलने वाले को ...,NOT,0,1
4661,कोहली है #नेहरू नहीं जो अंग्रेजों के तलवे चाटन...,HOF,1,0
4662,परशुराम? वही जिसने अपनी मां की हत्या की थीं?,NOT,0,1
4663,जिस देश में #कन्हैया_कुमार जैसा पढ़ा लिखा युवा...,HOF,1,0


In [None]:
train_df.columns

Index(['text', 'task_1', 'Offensive', 'NotOffensive'], dtype='object')

In [None]:
train_df = pd.read_csv('drive/MyDrive/final_hindi_backtranslated.csv')
train_df

Unnamed: 0,text,task_1,Offensive,NotOffensive
0,"बांग्लादेश की शानदार वापसी, भारत को 314 रन पर ...",NOT,0,1
1,सब रंडी नाच देखने मे व्यस्त जैसे ही कोई #शांती...,HOF,1,0
2,तुम जैसे हरामियों के लिए बस जूतों की कमी है शु...,HOF,1,0
3,"बीजेपी MLA आकाश विजयवर्गीय जेल से रिहा, जमानत ...",NOT,0,1
4,चमकी बुखार: विधानसभा परिसर में आरजेडी का प्रदर...,NOT,0,1
...,...,...,...,...
6060,कश्मीर भगवान से है,HOF,1,0
6061,अंग्रेजों के खिलाफ पहला संगठित संघर्ष रानी लक्...,NOT,0,1
6062,ऑटो से मोबाइल तक टेक की 5 बड़ी खबरें जानें ...,NOT,0,1
6063,90 % लोग पहले से ही जानते थे कि भारत मैच हार ज...,HOF,1,0


In [None]:
categories = train_df.columns[2:]
counts = []
for category in categories:
    counts.append((category, train_df[category].sum()))
df_stats = pd.DataFrame(counts, columns=['category', 'number of comments'])
df_stats

Unnamed: 0,category,number of comments
0,Offensive,2888
1,NotOffensive,3177


In [None]:
target_list = categories

In [None]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased",do_lower_case=True)

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
tokenizer = AutoTokenizer.from_pretrained('ai4bharat/indic-bert')

In [None]:
def tokenizeWithBert(example):
  encodings = tokenizer.encode_plus(
    example,
    add_special_tokens = True,   # tokens CLS, PAD, SEP
    max_length = 512, #MAX_LEN
    padding = 'max_length',
    truncation = True,
    return_attention_mask = True,
    return_tensors = 'pt'
  )
  return encodings

In [None]:
def get_dataset(df, tokenizer, mode='train'):
    sentences, labels = df['text'], df.iloc[:,2:].to_numpy()
    max_length = 300
    in_T = []
    in_T_attn_masks = []
    for sentence in sentences:
        enc_sent_dict = tokenizer.encode_plus(
            sentence[:300],
            max_length = max_length,
            add_special_tokens = True,
            pad_to_max_length = True,
            return_attention_mask = True,
            return_tensors = 'pt'
        )
        in_T.append(enc_sent_dict['input_ids'])
        in_T_attn_masks.append(enc_sent_dict['attention_mask'])
    
    in_T = torch.cat(in_T, dim=0)
    in_T_attn_masks = torch.cat(in_T_attn_masks, dim=0)
    labels = torch.tensor(labels, dtype = torch.float32)
    print('Text Input: ' , in_T.shape)
    print('Text Input Attention: ' , in_T_attn_masks.shape)    
    print('Labels: ' , labels.shape)
    
    dataset = TensorDataset(
        in_T,
        in_T_attn_masks,
        labels
    )
    
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    return train_dataset, val_dataset

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
train_dataset, val_dataset = get_dataset(
    train_df,
    tokenizer = tokenizer,
    mode = 'train'
)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Text Input:  torch.Size([6065, 300])
Text Input Attention:  torch.Size([6065, 300])
Labels:  torch.Size([6065, 2])


In [None]:
tokenizer = AutoTokenizer.from_pretrained('ai4bharat/indic-bert')
train_dataset, val_dataset = get_dataset(
    train_df,
    tokenizer = tokenizer,
    mode = 'train'
)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Text Input:  torch.Size([6065, 300])
Text Input Attention:  torch.Size([6065, 300])
Labels:  torch.Size([6065, 2])


In [None]:
batch_size = 8
train_dataloader = DataLoader(
    train_dataset,
    batch_size = batch_size,
    sampler = RandomSampler(train_dataset)
)
val_dataloader = DataLoader(
    val_dataset,
    batch_size = batch_size,
    sampler = SequentialSampler(val_dataset)
)

print('Data Ready!!')

Data Ready!!


In [None]:
#TRAINING and VALIDATION
epochs = 3   #5, reduced to one epoch as it is taking lot of time
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer,
                                           num_warmup_steps = 0,
                                           num_training_steps = total_steps)


training_stats = []
total_t0 = time.time()

best_val_loss = 1e8
true_labels = val_dataset[:][2].numpy()

for epoch_i in range(0, epochs):

    #############               Training
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    t0 = time.time()

    total_train_loss = 0
    model.train()

    for step, batch in enumerate(train_dataloader):

        if step % 5 == 0 and not step == 0:
            elapsed = format_time(time.time() - t0)
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}. Loss: {:.5f}'.format(step, len(train_dataloader), elapsed, total_train_loss/step))

        b_in_T            = batch[0].to(device)
        b_in_T_attn_masks = batch[1].to(device)
        b_labels          = batch[2].to(device)
        
        model.zero_grad()

        logits = model(b_in_T, b_in_T_attn_masks)
        loss = criterion(logits, b_labels)

        total_train_loss += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_train_loss / len(train_dataloader)

    # Measure how long this epoch took.
    training_time = format_time(time.time() - t0)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(training_time))

    ##########               Validation
   
    print("")
    print("Running Validation...")

    t0 = time.time()

    model.eval()

    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0

    pred_labels = np.empty((0,2))

    # Evaluate data for one epoch
    for batch in val_dataloader:
        
        b_in_T            = batch[0].to(device)
        b_in_T_attn_masks = batch[1].to(device)
        b_labels          = batch[2].to(device)

        with torch.no_grad():
            logits = model(b_in_T, b_in_T_attn_masks)
            loss = criterion(logits, b_labels)

        # Accumulate the validation loss.
        total_eval_loss += loss.item()

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        pred_labels = np.concatenate((pred_labels, logits), axis=0)


    # Calculate the average loss over all of the batches.
    avg_val_loss = total_eval_loss / len(val_dataloader)

    # Measure how long the validation run took.
    validation_time = format_time(time.time() - t0)

    pred_labels = np.array([[int(x >= 0.25) for x in pred_labels[:,i]] for i  in range(2)]).transpose()

    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))

#     Report the final accuracy, f1-score for this validation run.
    for i in range(2):
        print("  Accuracy: {0:.2f}".format(accuracy_score(true_labels[:,i], pred_labels[:,i])))

    for i in range(2):
        print("  Macro F1-score: {0:.2f}".format(f1_score(true_labels[:,i], pred_labels[:,i], average='macro')))

    for i in range(2):
        print("  Weighted F1-score: {0:.2f}".format(f1_score(true_labels[:,i], pred_labels[:,i], average='weighted')))

    print('Classification Report:')
    for i in range(2):
        print(classification_report(true_labels[:,i], pred_labels[:,i]))

    print('Confusion Matrix:')
    for i in range(2):
        print(confusion_matrix(true_labels[:,i], pred_labels[:,i]))

    # Record all statistics from this epoch.
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'training_loss': avg_train_loss,
            'val_loss': avg_val_loss,
            'val_accuracy': np.mean([accuracy_score(true_labels[:,i], pred_labels[:,i]) for i in range(2)]),
            'val_macro_f1': np.mean([f1_score(true_labels[:,i], pred_labels[:,i], average='macro') for i in range(2)]),
            'val_weighted_f1': np.mean([f1_score(true_labels[:,i], pred_labels[:,i], average='weighted') for i in range(2)]),
            'training_time': training_time,
            'val_tim': validation_time
        }
    )

    model_path = 'model_state_dict_'+str(epoch_i)+'.pt'
    torch.save(model.state_dict(), model_path)


Training...
  Batch     5  of    607.    Elapsed: 0:00:02. Loss: 1.00029
  Batch    10  of    607.    Elapsed: 0:00:04. Loss: 0.91382
  Batch    15  of    607.    Elapsed: 0:00:06. Loss: 0.90042
  Batch    20  of    607.    Elapsed: 0:00:07. Loss: 0.84139
  Batch    25  of    607.    Elapsed: 0:00:09. Loss: 0.82332
  Batch    30  of    607.    Elapsed: 0:00:11. Loss: 0.79778
  Batch    35  of    607.    Elapsed: 0:00:13. Loss: 0.76302
  Batch    40  of    607.    Elapsed: 0:00:15. Loss: 0.74490
  Batch    45  of    607.    Elapsed: 0:00:17. Loss: 0.72824
  Batch    50  of    607.    Elapsed: 0:00:19. Loss: 0.72678
  Batch    55  of    607.    Elapsed: 0:00:21. Loss: 0.70282
  Batch    60  of    607.    Elapsed: 0:00:23. Loss: 0.69676
  Batch    65  of    607.    Elapsed: 0:00:24. Loss: 0.68650
  Batch    70  of    607.    Elapsed: 0:00:26. Loss: 0.67165
  Batch    75  of    607.    Elapsed: 0:00:28. Loss: 0.66721
  Batch    80  of    607.    Elapsed: 0:00:30. Loss: 0.66550
  Batch    