In [1]:
import torch
from torchtext import data
from torchtext import datasets
import random
import numpy as np

SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [2]:
import pandas as pd
from glob import glob

path_list_train = glob("OneDrive_1_11-1-2020/*/*train*.tsv")
path_list_dev = glob("OneDrive_1_11-1-2020/*/*dev*.tsv")

#print(path_list_train)
#print(path_list_dev)
#print(len(path_list_dev))

i=len(path_list_train)-1

file_train = path_list_train[i]
file_dev = path_list_dev[i]

lang = file_train[-6:-4]

import csv
df_train = pd.read_csv(file_train,sep="\t",encoding='utf-8',quoting=csv.QUOTE_NONE)
df_dev = pd.read_csv(file_dev,sep="\t",encoding='utf-8',quoting=csv.QUOTE_NONE)

df_train = df_train.dropna()
df_dev = df_dev.dropna()

train_sentences = list(df_train['text'].values)
train_labels = list(df_train['label'].values)

dev_sentences = list(df_dev['text'].values)
dev_labels = list(df_dev['label'].values)

In [4]:
from transformers import XLMRobertaTokenizer

tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')

In [5]:
#choose max_length for bert model based on the input length

max_length = 0
list_len=[]
for sentence in train_sentences:
    #print(sentence)
    length = len(tokenizer.tokenize(sentence))
    list_len.append(length)
    
from collections import Counter
Counter(list_len).most_common(100)

[(33, 538),
 (34, 514),
 (32, 512),
 (35, 508),
 (36, 475),
 (37, 470),
 (31, 465),
 (38, 433),
 (39, 391),
 (40, 387),
 (41, 385),
 (30, 384),
 (42, 375),
 (44, 337),
 (43, 335),
 (45, 317),
 (46, 283),
 (29, 267),
 (47, 248),
 (49, 222),
 (48, 220),
 (51, 209),
 (50, 200),
 (52, 199),
 (54, 185),
 (53, 179),
 (28, 163),
 (55, 160),
 (57, 145),
 (56, 141),
 (58, 132),
 (59, 129),
 (61, 128),
 (27, 107),
 (14, 104),
 (21, 98),
 (63, 97),
 (60, 97),
 (20, 95),
 (62, 94),
 (18, 94),
 (17, 93),
 (24, 93),
 (65, 91),
 (12, 90),
 (15, 89),
 (25, 89),
 (26, 89),
 (22, 89),
 (16, 87),
 (19, 84),
 (64, 84),
 (13, 82),
 (23, 79),
 (10, 77),
 (11, 73),
 (69, 70),
 (66, 69),
 (67, 69),
 (73, 60),
 (72, 59),
 (9, 58),
 (68, 51),
 (70, 50),
 (75, 50),
 (74, 50),
 (8, 49),
 (76, 46),
 (78, 44),
 (81, 39),
 (71, 38),
 (79, 36),
 (77, 33),
 (7, 32),
 (82, 30),
 (85, 26),
 (83, 26),
 (80, 26),
 (84, 22),
 (96, 22),
 (87, 22),
 (89, 22),
 (88, 18),
 (90, 18),
 (86, 15),
 (93, 14),
 (91, 13),
 (92, 13),


In [6]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()
le.fit(train_labels)
encoded_labels = le.transform(train_labels)
encoded_test_labels = le.transform(dev_labels)

In [7]:
def encoder_generator(sentences,labels):
    
    sent_index = []
    input_ids = []
    attention_masks =[]

    for index,sent in enumerate(sentences):
        
        sent_index.append(index)
        
        encoded_dict = tokenizer.encode_plus(sent,
                                             add_special_tokens=True,
                                             max_length=64,
                                             pad_to_max_length=True,
                                             truncation = True,
                                             return_attention_mask=True,
                                             return_tensors='pt')
        input_ids.append(encoded_dict['input_ids'])

        attention_masks.append(encoded_dict['attention_mask'])

    input_ids = torch.cat(input_ids,dim=0)
    attention_masks = torch.cat(attention_masks,dim=0)
    labels = torch.tensor(labels)
    sent_index = torch.tensor(sent_index)

    return sent_index,input_ids,attention_masks,labels

train_sent_index,train_input_ids,train_attention_masks,train_encoded_label_tensors = encoder_generator(train_sentences,encoded_labels)
dev_sent_index,dev_input_ids,dev_attention_masks,dev_encoded_label_tensors = encoder_generator(dev_sentences,encoded_test_labels)
print('Original: ', train_sentences[0])
print('Token IDs:', train_input_ids[0])

Original:  So , the CPU then pick up the data copies the data from the keyboards internal buffer in to the memory right .
Token IDs: tensor([     0,   1061,      6,      4,     70,  86039,   7068,  39580,   1257,
            70,   2053,  71200,      7,     70,   2053,   1295,     70, 149186,
             7,  70796,    373,  18234,     23,     47,     70,  98323,   7108,
             6,      5,      2,      1,      1,      1,      1,      1,      1,
             1,      1,      1,      1,      1,      1,      1,      1,      1,
             1,      1,      1,      1,      1,      1,      1,      1,      1,
             1,      1,      1,      1,      1,      1,      1,      1,      1,
             1])


In [8]:
from torch.utils.data import TensorDataset,random_split

train_dataset = TensorDataset(train_input_ids,train_attention_masks,train_encoded_label_tensors)
dev_dataset = TensorDataset(dev_input_ids,dev_attention_masks,dev_encoded_label_tensors)


print('train data samples is {}'.format(len(train_dataset)))
print("valid data samples is {}".format(len(dev_dataset)))

train data samples is 13580
valid data samples is 1360


In [9]:
from torch.utils.data import DataLoader,RandomSampler,SequentialSampler

bs=4

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_data_loader = DataLoader(train_dataset,
                              sampler=RandomSampler(train_dataset),
                              batch_size=bs)
valid_data_loader = DataLoader(dev_dataset,
                              sampler=RandomSampler(dev_dataset),
                              batch_size=bs)

In [10]:
from transformers import XLMRobertaModel

xlm_roberta = XLMRobertaModel.from_pretrained('xlm-roberta-base')
xlm_roberta = xlm_roberta.to(device)

In [11]:
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
        
        self.fc_input = nn.Linear(embedding_dim,embedding_dim)
        
        self.conv_0 = nn.Conv1d(in_channels = embedding_dim, 
                                out_channels = n_filters, 
                                kernel_size = filter_sizes[0])
        
        self.conv_1 = nn.Conv1d(in_channels = embedding_dim, 
                                out_channels = n_filters, 
                                kernel_size = filter_sizes[1])
        
        self.conv_2 = nn.Conv1d(in_channels = embedding_dim, 
                                out_channels = n_filters, 
                                kernel_size = filter_sizes[2])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, encoded):
                
        #embedded = [batch size, sent len, emb dim]
        embedded = self.fc_input(encoded)
        #print(embedded.shape)
        
        embedded = embedded.permute(0, 2, 1)
        #print(embedded.shape)
        
        #embedded = [batch size, emb dim, sent len]
        
        conved_0 = F.relu(self.conv_0(embedded))
        conved_1 = F.relu(self.conv_1(embedded))
        conved_2 = F.relu(self.conv_2(embedded))
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
        
        pooled_0 = F.max_pool1d(conved_0, conved_0.shape[2]).squeeze(2)
        pooled_1 = F.max_pool1d(conved_1, conved_1.shape[2]).squeeze(2)
        pooled_2 = F.max_pool1d(conved_2, conved_2.shape[2]).squeeze(2)
        
        #pooled_n = [batch size, n_fibatlters]
        
        cat = self.dropout(torch.cat((pooled_0, pooled_1, pooled_2), dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
            
        result =  self.fc(cat)
        
        #print(result.shape)
        
        return result

In [12]:
EMBEDDING_DIM = 768
N_FILTERS = 100
FILTER_SIZES = [3,4,5]
OUTPUT_DIM = len(le.classes_)
DROPOUT = 0.1
PAD_IDX = tokenizer.pad_token_id

cnn = CNN(EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
cnn = cnn.to(device)

In [13]:
import torch.optim as optim

model_prameters = list(xlm_roberta.parameters())+list(cnn.parameters())

optimizer = optim.Adam(model_prameters,lr=2e-5,eps=1e-8)

criterion = nn.CrossEntropyLoss()

criterion = criterion.to(device)

In [14]:
def categorical_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
    correct = max_preds.squeeze(1).eq(y)
    return correct.sum() / torch.FloatTensor([y.shape[0]])

In [15]:
from tqdm import tqdm

def train():
    
    epoch_loss = 0
    epoch_acc = 0
    
    xlm_roberta.train()
    cnn.train()
    
    for batch in tqdm(train_data_loader):
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        
        optimizer.zero_grad()
        
        embedded = xlm_roberta(b_input_ids,b_input_mask)[0]
        
        predictions = cnn(embedded)
        #print(predictions.shape)
        #print(b_labels.shape)
        
        loss = criterion(predictions, b_labels)
        
        acc = categorical_accuracy(predictions, b_labels)
        #print(acc)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(train_data_loader), epoch_acc / len(train_data_loader)

In [16]:
import numpy as np

def predictions_labels(preds,labels):
    pred = np.argmax(preds,axis=1).flatten()
    label = labels.flatten()
    return pred,label

In [17]:
from sklearn.metrics import classification_report,accuracy_score,f1_score
def eval():
    epoch_loss = 0
    
    total_predictions = []
    total_true = []
    
    all_true_labels = []
    all_pred_labels = []
    
    xlm_roberta.eval()
    cnn.eval()
    
    with torch.no_grad():
    
        for batch in tqdm(valid_data_loader):
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)

            embedded = xlm_roberta(b_input_ids,b_input_mask)[0]
            predictions = cnn(embedded)
            #print(predictions.shape)
            #print(b_labels.shape)

            loss = criterion(predictions, b_labels)
            epoch_loss += loss.item()
            
            predictions = predictions.detach().cpu().numpy()

            label_ids = b_labels.to('cpu').numpy()
    
            pred,true = predictions_labels(predictions,label_ids)
        
            all_pred_labels.extend(pred)
            all_true_labels.extend(true)

    print(classification_report(all_pred_labels,all_true_labels))
    avg_val_accuracy = accuracy_score(all_pred_labels,all_true_labels)
    macro_f1_score = f1_score(all_pred_labels,all_true_labels,average='macro')
    
    avg_val_loss = epoch_loss/len(valid_data_loader)

    print("accuracy = {0:.2f}".format(avg_val_accuracy))
            
    return avg_val_loss,avg_val_accuracy,macro_f1_score

In [18]:
import time
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [19]:
epochs = 15

best_macro_f1 = float('0')

for epoch in range(epochs):
    
    start_time = time.time()
    train_loss,train_acc = train()
    valid_loss,valid_acc,macro_f1 = eval()
    end_time = time.time()
    
        
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        
    if macro_f1 > best_macro_f1:
        best_macro_f1 = macro_f1
        torch.save(xlm_roberta,'xlmr_cnn_model_part1_'+lang+'task2a.pt')
        torch.save(cnn,'xlmr_cnn_model_part2_'+lang+'task2a.pt')
        print("model saved")
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:16<00:00,  6.10it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:06<00:00, 48.65it/s]


              precision    recall  f1-score   support

           0       0.60      0.71      0.65       170
           1       0.58      0.66      0.62       160
           2       0.76      0.71      0.73       192
           3       0.79      0.79      0.79       199
           4       0.72      0.73      0.73       197
           5       0.82      0.74      0.78       221
           6       0.82      0.74      0.78       221

    accuracy                           0.73      1360
   macro avg       0.73      0.73      0.72      1360
weighted avg       0.74      0.73      0.73      1360

accuracy = 0.73


  "type " + obj.__name__ + ". It won't be checked "
  0%|                                                                                 | 1/3395 [00:00<08:48,  6.43it/s]

model saved
Epoch: 01 | Epoch Time: 9m 23s
	Train Loss: 0.847 | Train Acc: 69.41%
	 Val. Loss: 0.860 |  Val. Acc: 72.87%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:15<00:00,  6.11it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:07<00:00, 47.32it/s]
  0%|                                                                                 | 1/3395 [00:00<08:58,  6.31it/s]

              precision    recall  f1-score   support

           0       0.54      0.76      0.63       143
           1       0.57      0.65      0.61       157
           2       0.19      0.97      0.32        36
           3       0.89      0.57      0.69       314
           4       0.87      0.68      0.76       256
           5       0.85      0.67      0.75       255
           6       0.77      0.77      0.77       199

    accuracy                           0.68      1360
   macro avg       0.67      0.72      0.65      1360
weighted avg       0.77      0.68      0.70      1360

accuracy = 0.68
Epoch: 02 | Epoch Time: 9m 22s
	Train Loss: 0.434 | Train Acc: 85.05%
	 Val. Loss: 1.039 |  Val. Acc: 67.72%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:12<00:00,  6.15it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:07<00:00, 48.27it/s]


              precision    recall  f1-score   support

           0       0.71      0.80      0.75       178
           1       0.63      0.69      0.66       163
           2       0.83      0.62      0.71       241
           3       0.64      0.90      0.75       142
           4       0.88      0.76      0.82       229
           5       0.82      0.80      0.81       206
           6       0.80      0.80      0.80       201

    accuracy                           0.76      1360
   macro avg       0.76      0.77      0.76      1360
weighted avg       0.77      0.76      0.76      1360

accuracy = 0.76


  0%|                                                                                 | 1/3395 [00:00<09:28,  5.97it/s]

model saved
Epoch: 03 | Epoch Time: 9m 19s
	Train Loss: 0.265 | Train Acc: 90.98%
	 Val. Loss: 0.880 |  Val. Acc: 75.88%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:04<00:00,  6.23it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:06<00:00, 50.38it/s]
  0%|                                                                                 | 1/3395 [00:00<09:45,  5.79it/s]

              precision    recall  f1-score   support

           0       0.73      0.69      0.71       213
           1       0.73      0.54      0.62       244
           2       0.68      0.72      0.70       172
           3       0.72      0.80      0.76       181
           4       0.66      0.86      0.75       153
           5       0.73      0.77      0.75       190
           6       0.84      0.81      0.83       207

    accuracy                           0.73      1360
   macro avg       0.73      0.74      0.73      1360
weighted avg       0.73      0.73      0.73      1360

accuracy = 0.73
Epoch: 04 | Epoch Time: 9m 11s
	Train Loss: 0.200 | Train Acc: 93.42%
	 Val. Loss: 0.958 |  Val. Acc: 73.09%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:16<00:00,  6.10it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:07<00:00, 48.29it/s]
  0%|                                                                                 | 1/3395 [00:00<08:54,  6.34it/s]

              precision    recall  f1-score   support

           0       0.71      0.76      0.74       188
           1       0.54      0.62      0.58       157
           2       0.58      0.78      0.67       134
           3       0.80      0.75      0.77       213
           4       0.67      0.84      0.75       159
           5       0.83      0.66      0.74       252
           6       0.89      0.69      0.77       257

    accuracy                           0.72      1360
   macro avg       0.72      0.73      0.72      1360
weighted avg       0.74      0.72      0.73      1360

accuracy = 0.72
Epoch: 05 | Epoch Time: 9m 23s
	Train Loss: 0.436 | Train Acc: 84.21%
	 Val. Loss: 0.979 |  Val. Acc: 72.28%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:12<00:00,  6.14it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:06<00:00, 48.72it/s]
  0%|                                                                                 | 1/3395 [00:00<08:54,  6.35it/s]

              precision    recall  f1-score   support

           0       0.64      0.74      0.69       173
           1       0.70      0.61      0.65       208
           2       0.68      0.67      0.67       182
           3       0.79      0.73      0.76       214
           4       0.76      0.81      0.78       187
           5       0.84      0.79      0.81       214
           6       0.80      0.88      0.84       182

    accuracy                           0.74      1360
   macro avg       0.74      0.75      0.74      1360
weighted avg       0.75      0.74      0.74      1360

accuracy = 0.74
Epoch: 06 | Epoch Time: 9m 19s
	Train Loss: 0.156 | Train Acc: 94.86%
	 Val. Loss: 1.036 |  Val. Acc: 74.41%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:15<00:00,  6.12it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:06<00:00, 49.41it/s]
  0%|                                                                                 | 1/3395 [00:00<09:08,  6.19it/s]

              precision    recall  f1-score   support

           0       0.78      0.62      0.69       250
           1       0.59      0.65      0.62       162
           2       0.72      0.64      0.67       203
           3       0.70      0.76      0.73       184
           4       0.80      0.81      0.80       197
           5       0.75      0.82      0.78       183
           6       0.75      0.83      0.79       181

    accuracy                           0.73      1360
   macro avg       0.73      0.73      0.73      1360
weighted avg       0.73      0.73      0.73      1360

accuracy = 0.73
Epoch: 07 | Epoch Time: 9m 21s
	Train Loss: 0.114 | Train Acc: 96.11%
	 Val. Loss: 1.117 |  Val. Acc: 72.79%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:16<00:00,  6.10it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:07<00:00, 45.74it/s]
  0%|                                                                                 | 1/3395 [00:00<09:52,  5.73it/s]

              precision    recall  f1-score   support

           0       0.56      0.91      0.69       122
           1       0.58      0.66      0.62       159
           2       0.80      0.71      0.75       204
           3       0.74      0.87      0.80       170
           4       0.84      0.79      0.82       214
           5       0.93      0.64      0.75       291
           6       0.83      0.83      0.83       200

    accuracy                           0.76      1360
   macro avg       0.75      0.77      0.75      1360
weighted avg       0.78      0.76      0.76      1360

accuracy = 0.76
Epoch: 08 | Epoch Time: 9m 23s
	Train Loss: 0.082 | Train Acc: 97.32%
	 Val. Loss: 1.224 |  Val. Acc: 75.59%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:12<00:00,  6.15it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:06<00:00, 49.53it/s]


              precision    recall  f1-score   support

           0       0.65      0.83      0.72       156
           1       0.65      0.67      0.66       175
           2       0.83      0.66      0.74       226
           3       0.77      0.83      0.80       186
           4       0.80      0.78      0.79       204
           5       0.81      0.75      0.78       214
           6       0.81      0.82      0.82       199

    accuracy                           0.76      1360
   macro avg       0.76      0.76      0.76      1360
weighted avg       0.77      0.76      0.76      1360

accuracy = 0.76


  0%|                                                                                 | 1/3395 [00:00<09:01,  6.27it/s]

model saved
Epoch: 09 | Epoch Time: 9m 19s
	Train Loss: 0.102 | Train Acc: 96.76%
	 Val. Loss: 0.942 |  Val. Acc: 76.03%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:20<00:00,  6.05it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:06<00:00, 49.26it/s]
  0%|                                                                                 | 1/3395 [00:00<08:51,  6.39it/s]

              precision    recall  f1-score   support

           0       0.68      0.80      0.73       171
           1       0.60      0.65      0.62       166
           2       0.75      0.66      0.70       206
           3       0.78      0.79      0.78       197
           4       0.81      0.84      0.82       191
           5       0.83      0.70      0.76       237
           6       0.80      0.83      0.82       192

    accuracy                           0.75      1360
   macro avg       0.75      0.75      0.75      1360
weighted avg       0.76      0.75      0.75      1360

accuracy = 0.75
Epoch: 10 | Epoch Time: 9m 27s
	Train Loss: 0.111 | Train Acc: 96.37%
	 Val. Loss: 1.193 |  Val. Acc: 75.15%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:08<00:00,  6.19it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:07<00:00, 47.90it/s]
  0%|                                                                                 | 1/3395 [00:00<09:11,  6.15it/s]

              precision    recall  f1-score   support

           0       0.59      0.78      0.68       152
           1       0.64      0.63      0.64       185
           2       0.72      0.73      0.72       176
           3       0.77      0.79      0.78       195
           4       0.70      0.85      0.77       165
           5       0.82      0.68      0.75       242
           6       0.88      0.71      0.79       245

    accuracy                           0.73      1360
   macro avg       0.73      0.74      0.73      1360
weighted avg       0.75      0.73      0.74      1360

accuracy = 0.73
Epoch: 11 | Epoch Time: 9m 15s
	Train Loss: 0.080 | Train Acc: 97.41%
	 Val. Loss: 1.152 |  Val. Acc: 73.46%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:06<00:00,  6.21it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:06<00:00, 49.55it/s]
  0%|                                                                                 | 1/3395 [00:00<09:22,  6.04it/s]

              precision    recall  f1-score   support

           0       0.69      0.79      0.73       173
           1       0.66      0.64      0.65       186
           2       0.62      0.67      0.64       168
           3       0.81      0.73      0.77       224
           4       0.73      0.83      0.78       177
           5       0.81      0.70      0.75       232
           6       0.81      0.81      0.81       200

    accuracy                           0.74      1360
   macro avg       0.73      0.74      0.73      1360
weighted avg       0.74      0.74      0.74      1360

accuracy = 0.74
Epoch: 12 | Epoch Time: 9m 13s
	Train Loss: 0.084 | Train Acc: 97.22%
	 Val. Loss: 1.162 |  Val. Acc: 73.68%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:11<00:00,  6.15it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:06<00:00, 49.76it/s]
  0%|                                                                                 | 1/3395 [00:00<08:58,  6.31it/s]

              precision    recall  f1-score   support

           0       0.62      0.82      0.70       152
           1       0.76      0.55      0.64       249
           2       0.74      0.70      0.72       190
           3       0.82      0.81      0.82       202
           4       0.79      0.81      0.80       195
           5       0.81      0.81      0.81       201
           6       0.76      0.88      0.81       171

    accuracy                           0.76      1360
   macro avg       0.76      0.77      0.76      1360
weighted avg       0.76      0.76      0.75      1360

accuracy = 0.76
Epoch: 13 | Epoch Time: 9m 18s
	Train Loss: 0.062 | Train Acc: 97.99%
	 Val. Loss: 1.404 |  Val. Acc: 75.66%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:11<00:00,  6.15it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:07<00:00, 48.36it/s]
  0%|                                                                                 | 1/3395 [00:00<09:35,  5.90it/s]

              precision    recall  f1-score   support

           0       0.63      0.80      0.71       157
           1       0.63      0.66      0.65       173
           2       0.60      0.79      0.68       137
           3       0.82      0.75      0.79       220
           4       0.73      0.89      0.80       164
           5       0.85      0.72      0.78       235
           6       0.91      0.66      0.77       274

    accuracy                           0.74      1360
   macro avg       0.74      0.75      0.74      1360
weighted avg       0.77      0.74      0.75      1360

accuracy = 0.74
Epoch: 14 | Epoch Time: 9m 19s
	Train Loss: 0.063 | Train Acc: 98.03%
	 Val. Loss: 1.338 |  Val. Acc: 74.34%


100%|██████████████████████████████████████████████████████████████████████████████| 3395/3395 [09:12<00:00,  6.15it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 340/340 [00:06<00:00, 49.18it/s]

              precision    recall  f1-score   support

           0       0.69      0.79      0.74       175
           1       0.66      0.63      0.65       188
           2       0.68      0.71      0.70       173
           3       0.73      0.78      0.76       189
           4       0.90      0.72      0.80       251
           5       0.79      0.83      0.81       191
           6       0.82      0.85      0.83       193

    accuracy                           0.76      1360
   macro avg       0.75      0.76      0.75      1360
weighted avg       0.76      0.76      0.76      1360

accuracy = 0.76
Epoch: 15 | Epoch Time: 9m 19s
	Train Loss: 0.079 | Train Acc: 97.60%
	 Val. Loss: 1.234 |  Val. Acc: 75.66%





In [20]:
del xlm_roberta
del cnn

In [21]:
import gc
gc.collect()

710