In [1]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.29.2-py3-none-any.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m61.9 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m126.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.29.2


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
from typing import Dict, List
from transformers import AutoTokenizer, AutoModel
import numpy as np
import pandas as pd
import torch
from sklearn.metrics import f1_score, accuracy_score, classification_report
from torch import nn
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import torch.nn.functional as F
import sklearn
import sys
import random
import math
import os
import argparse
import logging
from sklearn.utils import class_weight
from torch.cuda.amp import autocast, GradScaler
from transformers import BertModel
from torch.cuda.amp import autocast, GradScaler
from transformers import get_linear_schedule_with_warmup, AdamW
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.utils.data import Dataset, DataLoader

logging.basicConfig(
    format="%(asctime)s : %(levelname)s : %(message)s", level=logging.INFO
)
logger = logging.getLogger(__name__)

def seed_everything(seed_value=42):
    os.environ["PYTHONHASHSEED"] = str(seed_value)
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    return seed_value


class DataPreparation(Dataset):
    
    def __init__(self, tokenizer, data, intelligence='verb', max_length=None):
        
        self.tokenizer = tokenizer
        self.data = data
        self.intell = intelligence
        
        if max_length == None:
            max_length_counted = data["text"].str.split(' ').str.len().max(axis=0)
            self.max_length = max_length_counted if max_length_counted < 512 else 512
        else:
            self.max_length = max_length


    def __len__(self):
        return len(self.data)


    def tokenize(self, text):

        tokens = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt')

        return tokens

     
    def __getitem__(self, index):
        
        source_text = self.data['text'].iloc[index]
        source = self.tokenize(source_text)

        source_ids = source["input_ids"].squeeze()
        source_mask = source["attention_mask"].squeeze()

        label = self.data[self.intell].iloc[index]
        return {
            "source_ids": source_ids.to(dtype=torch.long),
            "source_mask": source_mask.to(dtype=torch.long),
            "labels":  label, 
        }


class RNN_Block(nn.Module):
    
    def __init__(self, input_size=768, hidden_size=512, rnn='LSTM', biderectional=True):
        super().__init__()

        rnn_type = {"GRU": nn.GRU, "LSTM": nn.LSTM, "RNN": nn.RNN}
        self.rnn = rnn_type[rnn](input_size=input_size,
                            hidden_size=hidden_size,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=biderectional)


    def forward(self, input_ids):
        
        rnn_output = self.rnn(input_ids)
        return rnn_output


class CNN_Block(nn.Module):
    
    def __init__(self, input_size=1024, out_size=64, kernel_size=3, stride=1, padding=1):
        super().__init__()

        self.conv_2 = nn.Conv1d(in_channels=input_size, out_channels=out_size,
                                kernel_size=2, stride=stride, padding=padding)
        self.conv_3 = nn.Conv1d(in_channels=input_size, out_channels=out_size,
                                kernel_size=3, stride=stride, padding=padding)
        self.conv_5 = nn.Conv1d(in_channels=input_size, out_channels=out_size,
                                kernel_size=5, stride=stride, padding=padding+1)
        self.relu = nn.ReLU()


    def forward(self, sequence_input):

        conv_input = sequence_input.permute(0, 2, 1) # batch_size, hidden_size, sequence_length
        cnn_output2 = self.conv_2(conv_input)
        cnn_output2 = self.relu(cnn_output2)
        cnn_output2 = F.max_pool1d(cnn_output2, kernel_size=cnn_output2.shape[2])
        cnn_output3 = self.conv_3(conv_input)
        cnn_output3 = self.relu(cnn_output3)
        cnn_output3 = F.max_pool1d(cnn_output3, kernel_size=cnn_output3.shape[2])
        cnn_output5 = self.conv_5(conv_input)
        cnn_output5 = self.relu(cnn_output5)
        cnn_output5 = F.max_pool1d(cnn_output5, kernel_size=cnn_output5.shape[2])
        cnn_output = torch.cat([cnn_output2.squeeze(dim=2), cnn_output3.squeeze(dim=2), cnn_output5.squeeze(dim=2)], dim=1)

        return cnn_output


class BIGRU_BILSTM_CNN(nn.Module):
    
    def __init__(self, out_features, hidden_size=512, hidden_size_lin=128,
                 p_spatial_dropout=0.5, out_chanels_cnn=64, kernel_size_cnn=3, 
                 stride_cnn=1, padding_cnn=1, 
                 rnn_type='LSTM', biderectional=True, pre_trained='DeepPavlov/rubert-base-cased-sentence'):
        super(BIGRU_BILSTM_CNN, self).__init__()

        self.bert = AutoModel.from_pretrained(pre_trained)
        self.layer_norm = nn.LayerNorm(32)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()
        self.p_spatial_dropout = p_spatial_dropout
        self.bigru_block = RNN_Block(input_size=self.bert.config.hidden_size, hidden_size=hidden_size, rnn='GRU', biderectional=True) 
        self.bilstm_block = RNN_Block(input_size=self.bert.config.hidden_size, hidden_size=hidden_size, rnn='LSTM', biderectional=True) 
        
        if biderectional == True:
            self.cnn_block = CNN_Block(input_size=hidden_size*2, out_size=out_chanels_cnn,
                                      kernel_size=kernel_size_cnn, stride=stride_cnn, padding=padding_cnn)
        else:
            self.cnn_block = CNN_Block(input_size=hidden_size, out_size=out_chanels_cnn,
                                      kernel_size=kernel_size_cnn, stride=stride_cnn, padding=padding_cnn)

        self.linear_modules = nn.ModuleList([torch.nn.Linear(out_chanels_cnn*3*2, 32),
                                          torch.nn.Linear(32, 32),
                                          torch.nn.Linear(32, out_features)])
                                          
        for param in self.bert.parameters():
            param.requires_grad = False
    
    def forward(self, input_ids, attention_mask):


        encoded_layers = self.bert(input_ids=input_ids,attention_mask=attention_mask, output_hidden_states=True)
        x = encoded_layers['last_hidden_state']#.permute(1, 0, 2)
#         # spatial dropout

        embeddings = x.unsqueeze(2)    # (N, T, 1, K)
        embeddings = embeddings.permute(0, 3, 2, 1)  # (N, K, 1, T)
        embeddings = F.dropout2d(embeddings, self.p_spatial_dropout)  # (N, K, 1, T), some features are masked
        embeddings = embeddings.permute(0, 3, 2, 1)  # (N, T, 1, K)
        x = embeddings.squeeze(2)  # (N, T, K)
        bigru,_ = self.bigru_block(x) #(N,L,H in) - batch size, sequence length, input size
        
        bilstm, hidden_bilstm = self.bilstm_block(x)
        bilstm_cnn = self.cnn_block(bilstm)
        bigru_cnn = self.cnn_block(bigru)
        x = torch.cat([bigru_cnn, bilstm_cnn], dim=1)

        h = []
        
        for lin in self.linear_modules[:-1]:
            x = lin(x)
            h.append(x)
            x = self.layer_norm(x)
            x = self.relu(x)
            x = self.dropout(x)
        
        x = self.relu(h[-1] + h[-2])
        x = self.linear_modules[-1](x)
        return x


def train(model, data_loader, device, optimizer, criterion, n_epoch):

    logger.info('Epoch #{}\n'.format(n_epoch+1))

    train_losses = []
    train_labels = []
    train_predictions = []

    progress_bar = tqdm(total=math.ceil(len(data_loader.dataset)/data_loader.batch_size), desc='Epoch {}'.format(n_epoch + 1))

    model.train()

    for _, data in enumerate(data_loader, 0):


        input_ids = data["source_ids"].to(device)
        attention_mask = data["source_mask"].to(device)
        labels = data['labels'].to(device)

        optimizer.zero_grad()

        pred = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(pred, labels)
              
        loss.backward()      
        optimizer.step()
        
        lr_list.append(optimizer.state_dict()['param_groups'][0]['lr'])
        scheduler.step()
        
        predict = torch.log_softmax(pred, dim=1).argmax(dim=1)
        
        train_losses.append(loss.item())
        train_labels.extend(labels.cpu().detach().numpy())
        train_predictions.extend(predict.cpu().detach().numpy())

        progress_bar.set_postfix(loss=np.mean(train_losses))
        progress_bar.update(1)
    
    progress_bar.update(1)
    progress_bar.close()
    
    print('\n\nMean Loss after epoch #{0} - {1}'.format(str(n_epoch + 1), np.mean(train_losses)))
    print('F1 score after epoch #{0} on train - {1}\n'.format(str(n_epoch + 1), f1_score(train_labels, train_predictions, average='macro')))
    print('Accuracy score after epoch #{0} on train - {1}\n'.format(str(n_epoch + 1), accuracy_score(train_labels, train_predictions)))

    print(classification_report(train_labels, train_predictions))
    return train_labels, train_predictions


def validating(model, data_loader, device, criterion, n_epoch):

    val_losses, val_labels, val_predictions = [], [], []

    progress_bar = tqdm(total=math.ceil(len(data_loader.dataset)/data_loader.batch_size),
                        desc='Epoch {}'.format(n_epoch + 1))

    model.eval()

    for _, data in enumerate(data_loader, 0):
          
        input_ids = data["source_ids"].to(device)
        attention_mask = data["source_mask"].to(device)
        labels = data['labels'].to(device)

        with torch.no_grad():
            pred = model(input_ids, attention_mask)

            loss = criterion(pred, labels)
          
            predict = torch.log_softmax(pred, dim=1).argmax(dim=1)

        val_losses.append(loss.item())
        val_labels.extend(labels.cpu().detach().numpy())
        val_predictions.extend(predict.cpu().detach().numpy())

        progress_bar.set_postfix(loss=np.mean(val_losses))
        progress_bar.update(1)

    progress_bar.update(1)
    progress_bar.close()
    
    
    valid_stats.append(
        {
            'Val Loss': np.mean(val_losses)
        }
    )

    print('\n\nMean Loss after epoch #{0} - {1}'.format(str(n_epoch + 1), np.mean(val_losses)))
    print('F1 score after epoch #{0} on validation - {1}\n'.format(str(n_epoch + 1), f1_score(val_labels, val_predictions, average='macro')))
    print('Accuracy score after epoch #{0} on validation - {1}\n'.format(str(n_epoch + 1), accuracy_score(val_labels, val_predictions)))
    
    print(classification_report(val_labels, val_predictions))
    return valid_stats


def test(data_loader, device, id_name):
    
    model.load_state_dict(torch.load(f'results/model_with_cnn_justcnn_{id_name}.pth'))
    
    test_labels, test_predictions = [], []

    model.eval()

    for _, data in enumerate(data_loader, 0):
          
        input_ids = data["source_ids"].to(device)
        attention_mask = data["source_mask"].to(device)
        labels = data['labels'].to(device)

        with torch.no_grad():
            pred = model(input_ids, attention_mask)
          
        predict = torch.log_softmax(pred, dim=1).argmax(dim=1)

        test_labels.extend(labels.cpu().detach().numpy())
        test_predictions.extend(predict.cpu().detach().numpy())

    print('F1 macro score on test - {0}\n'.format(f1_score(test_labels, test_predictions, average='macro')))
    print('F1 score on test - {0}\n'.format(f1_score(test_labels, test_predictions, average='weighted')))
    print('Accuracy score on test - {0}\n'.format(accuracy_score(test_labels, test_predictions)))
    
    print(classification_report(test_labels, test_predictions))


def evaluate(model, train_dataset, val_dataset, device, epochs, target_value, weights):
    
    model = model.to(device)
    lr = 2e-4 
    optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(weight=weights,reduction='mean').to(device)
    global lr_list
    lr_list = []
    global scheduler
    total_steps = len(train_dataset) * epochs
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=int(total_steps * 0.1),
                                                num_training_steps=total_steps)
    global valid_stats
    valid_stats = []
    best_valid_loss = float('inf')

    for epoch in range(epochs):
        # train
        try:
            train(model, train_dataset, device, optimizer, criterion,  epoch)
            # # validate
            validating(model, val_dataset, device, criterion, epoch)

            if valid_stats[epoch]['Val Loss'] < best_valid_loss:
                best_valid_loss = valid_stats[epoch]['Val Loss']

                name_to_save = f'model_with_cnn_nolayernorm_{target_value}'
                if os.path.isfile('results/'+name_to_save+'.pth'):
                    os.remove('results/'+name_to_save+'.pth')
                    torch.save(model.state_dict(), 'results/'+name_to_save+'.pth')
                else:
                    if not os.path.isdir('results'):
                        os.mkdir('results')
                    torch.save(model.state_dict(), 'results/'+name_to_save+'.pth')
        except KeyboardInterrupt:
            break

In [17]:
%cd  /content/drive/MyDrive/вышка/диплом

/content/drive/MyDrive/вышка/диплом


In [24]:
path_to_data = 'dataset_all_nlp_features_target_classes_no_naive (1).csv'
target_value = 'verb'
path_to_model = 'DeepPavlov/rubert-base-cased'
epochs = 20
seed_value = 42

_ = seed_everything(seed_value)
LR = 2e-5
maxlength = 120
minlength = 2
bsize = 8

dataset = pd.read_csv(path_to_data, sep='\t')

dataset = dataset[dataset.question_id != '129_Чтение текста - видео']

if target_value == 'raven':
            dataset = dataset[dataset["raven"] > 0]
if target_value == 'verb':
            dataset = dataset[dataset["verb"] > 0]
        
tokenizer = AutoTokenizer.from_pretrained(path_to_model)

intelligence = target_value+'_classes'

dataset = dataset[dataset['N_words'] > minlength]

dataset[intelligence] = dataset[intelligence].astype(int)

train_data, extra_data = train_test_split(dataset, test_size=0.22,
                                        stratify=dataset[intelligence],
                                        random_state=seed_value)

vaild_data, test_data = train_test_split(extra_data, test_size=0.45,
                                        stratify=extra_data[intelligence],
                                        random_state=seed_value)


train_dataset_data = DataPreparation(
            tokenizer=tokenizer,
            data = train_data,
            intelligence = intelligence,
            max_length = maxlength
        )

val_dataset_data = DataPreparation(
            tokenizer=tokenizer,
            data = vaild_data,
            intelligence = intelligence,
            max_length = maxlength
        )

test_dataset_data = DataPreparation(
            tokenizer=tokenizer,
            data = test_data,
            intelligence = intelligence,
            max_length = maxlength
        )

weights = class_weight.compute_class_weight(class_weight='balanced',classes=np.unique(dataset[intelligence]), y=dataset[intelligence].to_numpy())

wights_tensor = torch.tensor(weights,dtype=torch.float)

train_dataset = DataLoader(train_dataset_data, batch_size=bsize, drop_last=True, shuffle=True)
val_dataset = DataLoader(val_dataset_data, batch_size=bsize)
test_dataset = DataLoader(test_dataset_data, batch_size=bsize)
        
model = BIGRU_BILSTM_CNN(pre_trained=path_to_model, out_features=len(dataset[intelligence].unique()))


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
evaluate(model=model, train_dataset=train_dataset, val_dataset=val_dataset, device=device, epochs=epochs, target_value=target_value, weights=wights_tensor)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset[intelligence] = dataset[intelligence].astype(int)
Some weights of the model checkpoint at DeepPavlov/rubert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT 

Epoch 1:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #1 - 1.6078167687127405
F1 score after epoch #1 on train - 0.19500166531004215

Accuracy score after epoch #1 on train - 0.2014659270998415

              precision    recall  f1-score   support

           0       0.24      0.32      0.27       946
           1       0.15      0.29      0.20       771
           2       0.27      0.12      0.16      1282
           3       0.25      0.07      0.11      1258
           4       0.18      0.33      0.24       791

    accuracy                           0.20      5048
   macro avg       0.22      0.22      0.20      5048
weighted avg       0.23      0.20      0.19      5048



Epoch 1:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #1 - 1.5610994806095047
F1 score after epoch #1 on validation - 0.21272485769993293

Accuracy score after epoch #1 on validation - 0.2413793103448276

              precision    recall  f1-score   support

           0       0.31      0.41      0.35       147
           1       0.18      0.38      0.24       120
           2       0.22      0.01      0.02       198
           3       0.28      0.07      0.11       195
           4       0.25      0.55      0.34       123

    accuracy                           0.24       783
   macro avg       0.25      0.28      0.21       783
weighted avg       0.25      0.24      0.19       783



Epoch 2:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #2 - 1.5569132617460755
F1 score after epoch #2 on train - 0.2509142319761419

Accuracy score after epoch #2 on train - 0.26822503961965133

              precision    recall  f1-score   support

           0       0.31      0.54      0.39       947
           1       0.16      0.15      0.16       771
           2       0.30      0.13      0.18      1280
           3       0.31      0.14      0.20      1259
           4       0.25      0.47      0.32       791

    accuracy                           0.27      5048
   macro avg       0.27      0.29      0.25      5048
weighted avg       0.28      0.27      0.24      5048



Epoch 2:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #2 - 1.5629974190069704
F1 score after epoch #2 on validation - 0.21941109290425764

Accuracy score after epoch #2 on validation - 0.26947637292464877

              precision    recall  f1-score   support

           0       0.31      0.53      0.39       147
           1       0.12      0.04      0.06       120
           2       0.20      0.01      0.02       198
           3       0.29      0.25      0.27       195
           4       0.25      0.63      0.36       123

    accuracy                           0.27       783
   macro avg       0.23      0.29      0.22       783
weighted avg       0.24      0.27      0.21       783



Epoch 3:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #3 - 1.5443560412115227
F1 score after epoch #3 on train - 0.25484461458090885

Accuracy score after epoch #3 on train - 0.2731774960380349

              precision    recall  f1-score   support

           0       0.32      0.61      0.42       947
           1       0.17      0.15      0.16       771
           2       0.30      0.13      0.18      1280
           3       0.28      0.19      0.22      1259
           4       0.25      0.37      0.30       791

    accuracy                           0.27      5048
   macro avg       0.26      0.29      0.25      5048
weighted avg       0.27      0.27      0.25      5048



Epoch 3:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #3 - 1.5351694749326121
F1 score after epoch #3 on validation - 0.22960761500009158

Accuracy score after epoch #3 on validation - 0.2707535121328225

              precision    recall  f1-score   support

           0       0.30      0.63      0.40       147
           1       0.20      0.23      0.21       120
           2       0.31      0.07      0.12       198
           3       0.28      0.03      0.05       195
           4       0.27      0.59      0.37       123

    accuracy                           0.27       783
   macro avg       0.27      0.31      0.23       783
weighted avg       0.28      0.27      0.21       783



Epoch 4:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #4 - 1.5326063241898162
F1 score after epoch #4 on train - 0.26321730778883873

Accuracy score after epoch #4 on train - 0.28387480190174325

              precision    recall  f1-score   support

           0       0.32      0.63      0.42       947
           1       0.18      0.16      0.17       770
           2       0.29      0.11      0.16      1279
           3       0.32      0.21      0.25      1260
           4       0.26      0.39      0.31       792

    accuracy                           0.28      5048
   macro avg       0.27      0.30      0.26      5048
weighted avg       0.28      0.28      0.26      5048



Epoch 4:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #4 - 1.532336281270397
F1 score after epoch #4 on validation - 0.24171235420959544

Accuracy score after epoch #4 on validation - 0.28991060025542786

              precision    recall  f1-score   support

           0       0.31      0.63      0.41       147
           1       0.11      0.02      0.03       120
           2       0.35      0.10      0.15       198
           3       0.31      0.22      0.26       195
           4       0.26      0.58      0.36       123

    accuracy                           0.29       783
   macro avg       0.27      0.31      0.24       783
weighted avg       0.28      0.29      0.24       783



Epoch 5:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #5 - 1.5191309259357242
F1 score after epoch #5 on train - 0.27954986245899116

Accuracy score after epoch #5 on train - 0.2985340729001585

              precision    recall  f1-score   support

           0       0.32      0.62      0.42       947
           1       0.22      0.14      0.17       770
           2       0.33      0.16      0.22      1281
           3       0.31      0.21      0.25      1258
           4       0.27      0.44      0.33       792

    accuracy                           0.30      5048
   macro avg       0.29      0.31      0.28      5048
weighted avg       0.30      0.30      0.28      5048



Epoch 5:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #5 - 1.5312615645175078
F1 score after epoch #5 on validation - 0.24904384857782133

Accuracy score after epoch #5 on validation - 0.3065134099616858

              precision    recall  f1-score   support

           0       0.32      0.59      0.41       147
           1       0.27      0.03      0.05       120
           2       0.31      0.40      0.35       198
           3       0.23      0.03      0.05       195
           4       0.30      0.52      0.38       123

    accuracy                           0.31       783
   macro avg       0.29      0.31      0.25       783
weighted avg       0.28      0.31      0.25       783



Epoch 6:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #6 - 1.51254515432518
F1 score after epoch #6 on train - 0.2807712751289072

Accuracy score after epoch #6 on train - 0.29754358161648176

              precision    recall  f1-score   support

           0       0.34      0.62      0.44       947
           1       0.20      0.15      0.17       770
           2       0.30      0.18      0.22      1281
           3       0.31      0.21      0.25      1258
           4       0.27      0.40      0.32       792

    accuracy                           0.30      5048
   macro avg       0.28      0.31      0.28      5048
weighted avg       0.29      0.30      0.28      5048



Epoch 6:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #6 - 1.5534778626597658
F1 score after epoch #6 on validation - 0.25925489658207435

Accuracy score after epoch #6 on validation - 0.2835249042145594

              precision    recall  f1-score   support

           0       0.37      0.28      0.32       147
           1       0.25      0.16      0.19       120
           2       0.27      0.09      0.14       198
           3       0.28      0.54      0.37       195
           4       0.26      0.31      0.28       123

    accuracy                           0.28       783
   macro avg       0.28      0.28      0.26       783
weighted avg       0.28      0.28      0.26       783



Epoch 7:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #7 - 1.494991640281375
F1 score after epoch #7 on train - 0.30321256434387156

Accuracy score after epoch #7 on train - 0.3167591125198098

              precision    recall  f1-score   support

           0       0.33      0.62      0.43       946
           1       0.22      0.19      0.21       770
           2       0.34      0.20      0.25      1281
           3       0.35      0.22      0.27      1259
           4       0.31      0.42      0.36       792

    accuracy                           0.32      5048
   macro avg       0.31      0.33      0.30      5048
weighted avg       0.32      0.32      0.30      5048



Epoch 7:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #7 - 1.5097660702102038
F1 score after epoch #7 on validation - 0.3133891852547979

Accuracy score after epoch #7 on validation - 0.3448275862068966

              precision    recall  f1-score   support

           0       0.34      0.57      0.42       147
           1       0.33      0.05      0.09       120
           2       0.33      0.33      0.33       198
           3       0.35      0.35      0.35       195
           4       0.38      0.37      0.37       123

    accuracy                           0.34       783
   macro avg       0.35      0.33      0.31       783
weighted avg       0.34      0.34      0.32       783



Epoch 8:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #8 - 1.4909596768120768
F1 score after epoch #8 on train - 0.3131665363477697

Accuracy score after epoch #8 on train - 0.32408874801901744

              precision    recall  f1-score   support

           0       0.35      0.61      0.45       945
           1       0.26      0.23      0.24       771
           2       0.33      0.18      0.24      1282
           3       0.34      0.25      0.29      1258
           4       0.30      0.41      0.35       792

    accuracy                           0.32      5048
   macro avg       0.32      0.34      0.31      5048
weighted avg       0.32      0.32      0.31      5048



Epoch 8:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #8 - 1.5276515921767877
F1 score after epoch #8 on validation - 0.2625273609271941

Accuracy score after epoch #8 on validation - 0.27458492975734355

              precision    recall  f1-score   support

           0       0.33      0.49      0.40       147
           1       0.19      0.23      0.21       120
           2       0.32      0.17      0.22       198
           3       0.24      0.10      0.14       195
           4       0.26      0.51      0.34       123

    accuracy                           0.27       783
   macro avg       0.27      0.30      0.26       783
weighted avg       0.28      0.27      0.25       783



Epoch 9:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #9 - 1.4722605903061747
F1 score after epoch #9 on train - 0.3137231037358714

Accuracy score after epoch #9 on train - 0.32408874801901744

              precision    recall  f1-score   support

           0       0.36      0.58      0.44       947
           1       0.22      0.21      0.21       770
           2       0.34      0.21      0.26      1282
           3       0.35      0.23      0.28      1259
           4       0.31      0.46      0.37       790

    accuracy                           0.32      5048
   macro avg       0.32      0.34      0.31      5048
weighted avg       0.32      0.32      0.31      5048



Epoch 9:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #9 - 1.5326693617567724
F1 score after epoch #9 on validation - 0.27172626165511493

Accuracy score after epoch #9 on validation - 0.3001277139208174

              precision    recall  f1-score   support

           0       0.33      0.65      0.44       147
           1       0.20      0.15      0.17       120
           2       0.36      0.19      0.25       198
           3       0.29      0.09      0.13       195
           4       0.28      0.54      0.37       123

    accuracy                           0.30       783
   macro avg       0.29      0.32      0.27       783
weighted avg       0.30      0.30      0.26       783



Epoch 10:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #10 - 1.4510347695811614
F1 score after epoch #10 on train - 0.33837398609602987

Accuracy score after epoch #10 on train - 0.347068145800317

              precision    recall  f1-score   support

           0       0.39      0.62      0.48       946
           1       0.26      0.26      0.26       771
           2       0.35      0.21      0.26      1282
           3       0.38      0.27      0.31      1259
           4       0.33      0.46      0.38       790

    accuracy                           0.35      5048
   macro avg       0.34      0.36      0.34      5048
weighted avg       0.35      0.35      0.33      5048



Epoch 10:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #10 - 1.5451486816211624
F1 score after epoch #10 on validation - 0.2814089251217719

Accuracy score after epoch #10 on validation - 0.3243933588761175

              precision    recall  f1-score   support

           0       0.33      0.60      0.42       147
           1       0.30      0.06      0.10       120
           2       0.32      0.29      0.31       198
           3       0.33      0.42      0.37       195
           4       0.29      0.16      0.21       123

    accuracy                           0.32       783
   macro avg       0.32      0.31      0.28       783
weighted avg       0.32      0.32      0.30       783



Epoch 11:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #11 - 1.442519616211651
F1 score after epoch #11 on train - 0.3483930490312443

Accuracy score after epoch #11 on train - 0.3579635499207607

              precision    recall  f1-score   support

           0       0.39      0.62      0.48       946
           1       0.26      0.25      0.25       771
           2       0.37      0.24      0.29      1281
           3       0.39      0.32      0.35      1259
           4       0.34      0.41      0.37       791

    accuracy                           0.36      5048
   macro avg       0.35      0.37      0.35      5048
weighted avg       0.36      0.36      0.35      5048



Epoch 11:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #11 - 1.5161852994743659
F1 score after epoch #11 on validation - 0.3042435140014924

Accuracy score after epoch #11 on validation - 0.31417624521072796

              precision    recall  f1-score   support

           0       0.34      0.62      0.44       147
           1       0.20      0.23      0.21       120
           2       0.32      0.22      0.26       198
           3       0.36      0.18      0.24       195
           4       0.35      0.39      0.37       123

    accuracy                           0.31       783
   macro avg       0.31      0.33      0.30       783
weighted avg       0.32      0.31      0.30       783



Epoch 12:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #12 - 1.4207508852100221
F1 score after epoch #12 on train - 0.3633710612826042

Accuracy score after epoch #12 on train - 0.3706418383518225

              precision    recall  f1-score   support

           0       0.40      0.63      0.49       947
           1       0.26      0.29      0.28       771
           2       0.40      0.26      0.31      1282
           3       0.41      0.30      0.35      1257
           4       0.36      0.43      0.39       791

    accuracy                           0.37      5048
   macro avg       0.37      0.38      0.36      5048
weighted avg       0.37      0.37      0.36      5048



Epoch 12:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #12 - 1.5304092101904812
F1 score after epoch #12 on validation - 0.3279499222930571

Accuracy score after epoch #12 on validation - 0.3448275862068966

              precision    recall  f1-score   support

           0       0.38      0.50      0.43       147
           1       0.25      0.18      0.21       120
           2       0.34      0.45      0.39       198
           3       0.39      0.16      0.23       195
           4       0.34      0.43      0.38       123

    accuracy                           0.34       783
   macro avg       0.34      0.35      0.33       783
weighted avg       0.35      0.34      0.33       783



Epoch 13:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #13 - 1.4073758249048574
F1 score after epoch #13 on train - 0.36618289731990056

Accuracy score after epoch #13 on train - 0.3712361331220285

              precision    recall  f1-score   support

           0       0.41      0.61      0.49       947
           1       0.28      0.31      0.29       770
           2       0.39      0.29      0.33      1280
           3       0.40      0.28      0.33      1259
           4       0.35      0.45      0.39       792

    accuracy                           0.37      5048
   macro avg       0.37      0.38      0.37      5048
weighted avg       0.37      0.37      0.36      5048



Epoch 13:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #13 - 1.514310067405506
F1 score after epoch #13 on validation - 0.3182744302097546

Accuracy score after epoch #13 on validation - 0.3282247765006386

              precision    recall  f1-score   support

           0       0.37      0.55      0.44       147
           1       0.20      0.20      0.20       120
           2       0.34      0.29      0.32       198
           3       0.37      0.19      0.25       195
           4       0.33      0.46      0.38       123

    accuracy                           0.33       783
   macro avg       0.32      0.34      0.32       783
weighted avg       0.33      0.33      0.32       783



Epoch 14:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #14 - 1.3826816536356266
F1 score after epoch #14 on train - 0.3809000954597936

Accuracy score after epoch #14 on train - 0.38549920760697304

              precision    recall  f1-score   support

           0       0.42      0.61      0.49       947
           1       0.29      0.31      0.30       771
           2       0.39      0.30      0.34      1280
           3       0.43      0.30      0.35      1260
           4       0.38      0.46      0.41       790

    accuracy                           0.39      5048
   macro avg       0.38      0.40      0.38      5048
weighted avg       0.39      0.39      0.38      5048



Epoch 14:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #14 - 1.5814499173845564
F1 score after epoch #14 on validation - 0.26558572542509634

Accuracy score after epoch #14 on validation - 0.2720306513409962

              precision    recall  f1-score   support

           0       0.34      0.50      0.40       147
           1       0.18      0.22      0.20       120
           2       0.27      0.16      0.20       198
           3       0.29      0.23      0.26       195
           4       0.25      0.29      0.27       123

    accuracy                           0.27       783
   macro avg       0.27      0.28      0.27       783
weighted avg       0.27      0.27      0.26       783



Epoch 15:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #15 - 1.3686724921036069
F1 score after epoch #15 on train - 0.38663215548598573

Accuracy score after epoch #15 on train - 0.3920364500792393

              precision    recall  f1-score   support

           0       0.43      0.64      0.51       944
           1       0.31      0.33      0.32       771
           2       0.41      0.28      0.34      1281
           3       0.43      0.31      0.36      1260
           4       0.36      0.48      0.41       792

    accuracy                           0.39      5048
   macro avg       0.39      0.41      0.39      5048
weighted avg       0.40      0.39      0.38      5048



Epoch 15:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #15 - 1.548145513145291
F1 score after epoch #15 on validation - 0.308655004948189

Accuracy score after epoch #15 on validation - 0.31545338441890164

              precision    recall  f1-score   support

           0       0.37      0.47      0.41       147
           1       0.19      0.22      0.20       120
           2       0.32      0.30      0.31       198
           3       0.37      0.16      0.23       195
           4       0.32      0.49      0.39       123

    accuracy                           0.32       783
   macro avg       0.31      0.33      0.31       783
weighted avg       0.32      0.32      0.31       783



Epoch 16:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #16 - 1.3459881542223946
F1 score after epoch #16 on train - 0.41601070982528654

Accuracy score after epoch #16 on train - 0.42056259904912835

              precision    recall  f1-score   support

           0       0.45      0.65      0.53       947
           1       0.31      0.36      0.33       770
           2       0.43      0.31      0.36      1281
           3       0.50      0.35      0.42      1259
           4       0.40      0.49      0.44       791

    accuracy                           0.42      5048
   macro avg       0.42      0.43      0.42      5048
weighted avg       0.43      0.42      0.41      5048



Epoch 16:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #16 - 1.5457238737417727
F1 score after epoch #16 on validation - 0.30181716989607

Accuracy score after epoch #16 on validation - 0.30395913154533843

              precision    recall  f1-score   support

           0       0.35      0.48      0.40       147
           1       0.20      0.29      0.24       120
           2       0.32      0.22      0.26       198
           3       0.33      0.28      0.30       195
           4       0.33      0.28      0.31       123

    accuracy                           0.30       783
   macro avg       0.31      0.31      0.30       783
weighted avg       0.31      0.30      0.30       783



Epoch 17:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #17 - 1.3153104053813192
F1 score after epoch #17 on train - 0.42925750996276657

Accuracy score after epoch #17 on train - 0.43225039619651345

              precision    recall  f1-score   support

           0       0.47      0.63      0.54       946
           1       0.34      0.40      0.37       771
           2       0.45      0.35      0.39      1282
           3       0.48      0.37      0.42      1258
           4       0.40      0.48      0.43       791

    accuracy                           0.43      5048
   macro avg       0.43      0.44      0.43      5048
weighted avg       0.44      0.43      0.43      5048



Epoch 17:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #17 - 1.6166832276753016
F1 score after epoch #17 on validation - 0.31389613254551235

Accuracy score after epoch #17 on validation - 0.31928480204342274

              precision    recall  f1-score   support

           0       0.32      0.52      0.40       147
           1       0.23      0.23      0.23       120
           2       0.36      0.29      0.32       198
           3       0.32      0.24      0.27       195
           4       0.36      0.34      0.35       123

    accuracy                           0.32       783
   macro avg       0.32      0.32      0.31       783
weighted avg       0.32      0.32      0.31       783



Epoch 18:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #18 - 1.3069158705591966
F1 score after epoch #18 on train - 0.4381731604545462

Accuracy score after epoch #18 on train - 0.4405705229793978

              precision    recall  f1-score   support

           0       0.48      0.65      0.55       947
           1       0.36      0.41      0.38       770
           2       0.45      0.33      0.38      1282
           3       0.48      0.37      0.42      1259
           4       0.42      0.50      0.46       790

    accuracy                           0.44      5048
   macro avg       0.44      0.45      0.44      5048
weighted avg       0.44      0.44      0.43      5048



Epoch 18:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #18 - 1.5727407214592914
F1 score after epoch #18 on validation - 0.292483062824607

Accuracy score after epoch #18 on validation - 0.29757343550446996

              precision    recall  f1-score   support

           0       0.37      0.44      0.40       147
           1       0.20      0.20      0.20       120
           2       0.32      0.30      0.31       198
           3       0.30      0.19      0.24       195
           4       0.26      0.38      0.31       123

    accuracy                           0.30       783
   macro avg       0.29      0.30      0.29       783
weighted avg       0.30      0.30      0.29       783



Epoch 19:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #19 - 1.2920036952582479
F1 score after epoch #19 on train - 0.4370227312015687

Accuracy score after epoch #19 on train - 0.4401743264659271

              precision    recall  f1-score   support

           0       0.48      0.64      0.55       946
           1       0.35      0.39      0.37       771
           2       0.46      0.35      0.40      1281
           3       0.49      0.36      0.42      1259
           4       0.40      0.52      0.45       791

    accuracy                           0.44      5048
   macro avg       0.44      0.45      0.44      5048
weighted avg       0.45      0.44      0.44      5048



Epoch 19:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #19 - 1.57714920141259
F1 score after epoch #19 on validation - 0.3071399528156998

Accuracy score after epoch #19 on validation - 0.3090676883780332

              precision    recall  f1-score   support

           0       0.37      0.45      0.41       147
           1       0.19      0.25      0.22       120
           2       0.34      0.28      0.31       198
           3       0.32      0.25      0.28       195
           4       0.31      0.33      0.32       123

    accuracy                           0.31       783
   macro avg       0.31      0.31      0.31       783
weighted avg       0.31      0.31      0.31       783



Epoch 20:   0%|          | 0/632 [00:00<?, ?it/s]



Mean Loss after epoch #20 - 1.2651397160796092
F1 score after epoch #20 on train - 0.45024384365382203

Accuracy score after epoch #20 on train - 0.45106973058637084

              precision    recall  f1-score   support

           0       0.49      0.64      0.56       946
           1       0.37      0.45      0.40       770
           2       0.48      0.35      0.40      1282
           3       0.48      0.36      0.41      1259
           4       0.43      0.53      0.47       791

    accuracy                           0.45      5048
   macro avg       0.45      0.47      0.45      5048
weighted avg       0.46      0.45      0.45      5048



Epoch 20:   0%|          | 0/98 [00:00<?, ?it/s]



Mean Loss after epoch #20 - 1.5703312414033073
F1 score after epoch #20 on validation - 0.30717084635582265

Accuracy score after epoch #20 on validation - 0.3116219667943806

              precision    recall  f1-score   support

           0       0.37      0.47      0.41       147
           1       0.19      0.20      0.20       120
           2       0.36      0.30      0.33       198
           3       0.28      0.23      0.25       195
           4       0.32      0.37      0.34       123

    accuracy                           0.31       783
   macro avg       0.30      0.32      0.31       783
weighted avg       0.31      0.31      0.31       783



In [25]:
def test(data_loader, device, id_name):
    # model = model.to(device)
#     model = BertBaseline_ResNet(model_name=path_to_model, out_features=len(dataset[intelligence].unique()))
    
    # model = BIGRU_BILSTM_CNN(pre_trained=path_to_model, out_features=len(dataset[intelligence].unique()))

    model.load_state_dict(torch.load(f'results/model_with_cnn_nolayernorm_{id_name}.pth'))
    
    # model = model.to(device)
    test_labels, test_predictions = [], []

    model.eval()

    for _, data in enumerate(data_loader, 0):
          
        input_ids = data["source_ids"].to(device)
        attention_mask = data["source_mask"].to(device)
        labels = data['labels'].to(device)

        with torch.no_grad():
            pred = model(input_ids, attention_mask)
          
        predict = torch.log_softmax(pred, dim=1).argmax(dim=1)

        test_labels.extend(labels.cpu().detach().numpy())
        test_predictions.extend(predict.cpu().detach().numpy())

    print('F1 macro score on test - {0}\n'.format(f1_score(test_labels, test_predictions, average='macro')))
    print('F1 score on test - {0}\n'.format(f1_score(test_labels, test_predictions, average='weighted')))
    print('Accuracy score on test - {0}\n'.format(accuracy_score(test_labels, test_predictions)))
    
    print(classification_report(test_labels, test_predictions))

In [26]:
device

device(type='cuda')

In [30]:
test(train_dataset, device, target_value) 

F1 macro score on test - 0.3171624387302704

F1 score on test - 0.3297144777208272

Accuracy score on test - 0.35439778129952454

              precision    recall  f1-score   support

           0       0.37      0.62      0.46       945
           1       0.25      0.04      0.08       770
           2       0.34      0.35      0.35      1282
           3       0.36      0.37      0.36      1259
           4       0.36      0.32      0.34       792

    accuracy                           0.35      5048
   macro avg       0.34      0.34      0.32      5048
weighted avg       0.34      0.35      0.33      5048



In [None]:
test(val_dataset, device, target_value) 

In [32]:
test(val_dataset, device, target_value) 

F1 macro score on test - 0.30953606944260775

F1 score on test - 0.31903002309898026

Accuracy score on test - 0.3371647509578544

              precision    recall  f1-score   support

           0       0.35      0.56      0.43       147
           1       0.43      0.08      0.14       120
           2       0.34      0.37      0.35       198
           3       0.32      0.32      0.32       195
           4       0.32      0.28      0.30       123

    accuracy                           0.34       783
   macro avg       0.35      0.32      0.31       783
weighted avg       0.35      0.34      0.32       783



In [31]:
test(test_dataset, device, target_value) 

F1 macro score on test - 0.29015688430170494

F1 score on test - 0.30260773571799193

Accuracy score on test - 0.32710280373831774

              precision    recall  f1-score   support

           0       0.37      0.59      0.46       120
           1       0.31      0.04      0.07        98
           2       0.30      0.33      0.31       163
           3       0.32      0.35      0.34       160
           4       0.29      0.26      0.27       101

    accuracy                           0.33       642
   macro avg       0.32      0.31      0.29       642
weighted avg       0.32      0.33      0.30       642

