In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

onehot_encoder = OneHotEncoder(sparse=False)
data = pd.read_csv('bert-base-uncased_train_embeddings_with_span_diff-sum.csv')['label'].values.reshape(-1, 1)
onehot_encoder.fit(data)

class CustomDataset(Dataset):
    def __init__(self, csv_file, onehot_encoder):
        self.data = pd.read_csv(csv_file)
        # self.embeddings = [torch.load(data.iloc[i]['path_to_embeddings']).cpu() for i in range(len(data))]
        self.labels = onehot_encoder.transform(self.data['label'].values.reshape(-1, 1))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        file_path = self.data.iloc[idx]['path_to_embeddings']
        label = self.labels[idx]

        # Load embedding from .pt file
        embedding = torch.load(file_path)

        # return embedding, torch.FloatTensor(label_onehot)
        return embedding, torch.FloatTensor(label)
    
# Paths to train and test CSV files
train_csv_path = 'bert-base-uncased_train_embeddings_with_span_raw.csv'
test_csv_path = 'bert-base-uncased_test_embeddings_with_span_raw.csv'

# Create instances of CustomDataset for train and test
train_dataset = CustomDataset(train_csv_path, onehot_encoder)
test_dataset = CustomDataset(test_csv_path, onehot_encoder)

# Define batch sizes
train_batch_size = 1
test_batch_size = 1

# Create DataLoaders for train and test datasets
train_data_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
test_data_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)



In [2]:
train_dataset[0][0].shape

torch.Size([1, 1536])

In [3]:
import torch.nn.functional as F
import torch.nn as nn

class CustomDeepClassifier(nn.Module):
    def __init__(self, input_dim=1536, num_classes=20, num_layers=20, hidden_dim=1024):
        super(CustomDeepClassifier, self).__init__()

        layers = []
        layers.append(nn.Linear(input_dim, hidden_dim))
        layers.append(nn.ReLU())
        layers.append(nn.BatchNorm1d(1))

        for _ in range(num_layers - 2):  # Subtract 2 for the initial and final layers
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.BatchNorm1d(1))

        layers.append(nn.Linear(hidden_dim, num_classes))

        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        x = self.layers(x)
        x = F.softmax(x, dim=0)
        return x

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import precision_recall_fscore_support
import matplotlib.pyplot as plt
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_validate_model(model, train_loader, val_loader, num_epochs=10, learning_rate=0.1, stepslr=10, gamma=0.9):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.StepLR(optimizer, stepslr, gamma=gamma)

    train_losses = []
    val_losses = []
    precision_scores = []
    recall_scores = []
    f1_scores = []

    for epoch in tqdm(range(num_epochs), total=num_epochs):
        model.train()
        running_train_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs.float())
            # print(outputs.shape, labels.shape)
            # print(outputs.squeeze(1), labels)
            loss = criterion(outputs.squeeze(1), labels)
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item()
        
        # print(running_train_loss, len(train_loader))
        train_loss = running_train_loss / len(train_loader)
        train_losses.append(train_loss)

        if epoch % 10 == 0:
            model.eval()
            running_val_loss = 0.0
            all_preds = []
            all_labels = []
            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs.float())
                    loss = criterion(outputs.squeeze(1), labels)
                    running_val_loss += loss.item()

                    # print(outputs.shape, labels.shape)
                    # print(outputs, labels)
                    predicted = torch.argmax(outputs.squeeze(1), 1)
                    labels = torch.argmax(labels, 1)
                    # print(predicted.shape, labels.shape)
                    # print(predicted, labels)
                    # break
                    all_preds.extend(predicted.cpu().numpy())
                    all_labels.extend(labels.cpu().numpy())

            val_loss = running_val_loss / len(val_loader)
            val_losses.append(val_loss)

            precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')
            precision_scores.append(precision)
            recall_scores.append(recall)
            f1_scores.append(f1)

            scheduler.step()

            print(f"Epoch [{epoch + 1}/{num_epochs}] "
                f"Train Loss: {train_loss:.4f} "
                f"Val Loss: {val_loss:.4f} "
                f"Precision: {precision:.4f} "
                f"Recall: {recall:.4f} "
                f"F1 Score: {f1:.4f}")

    return train_losses, val_losses, precision_scores, recall_scores, f1_scores


model = CustomDeepClassifier(input_dim=train_dataset[0][0].shape[1], num_classes=20)
train_losses, val_losses, precision_scores, recall_scores, f1_scores = train_validate_model(model, train_data_loader, test_data_loader, num_epochs=300, learning_rate=0.00005, stepslr=50, gamma=0.9)

---

In [9]:
import pandas as pd
import torch
from tqdm import tqdm

def endpoint(input_csv, output_csv, output_folder, mode):
    # Загрузка таблицы CSV
    df = pd.read_csv(input_csv)

    for index, row in tqdm(df.iterrows(), total=len(list(df.iterrows()))):
        path_to_embeddings = row['path_to_embeddings']

        # Загрузка тензора с эмбеддингами
        embeddings_tensor = torch.load(path_to_embeddings)

        # Получение первого и последнего эмбеддингов
        first_embedding = embeddings_tensor[:, 0, :]
        last_embedding = embeddings_tensor[:, -1, :]

        # Конкатенация эмбеддингов
        concatenated_embedding = torch.cat((first_embedding, last_embedding), dim=1)

        # Формирование пути для сохранения нового эмбеддинга
        new_embedding_path = f"{output_folder}/embedding_{mode}_{index}.pt"

        # Сохранение нового эмбеддинга в формате .pt
        torch.save(concatenated_embedding, new_embedding_path)

        # Запись пути до сохраненного эмбеддинга в dataframe
        df.at[index, 'path_to_embeddings'] = new_embedding_path

    # Сохранение обновленного dataframe в CSV файл
    df.to_csv(output_csv, index=False)

# Пример вызова функции
endpoint('bert-base-uncased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_endpoint.csv', 'training/bert-base-uncased/endpoint', 'train')
endpoint('bert-base-uncased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_endpoint.csv', 'training/bert-base-uncased/endpoint', 'test')
endpoint('roberta-base_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_endpoint.csv', 'training/roberta-base/endpoint', 'train')
endpoint('roberta-base_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_endpoint.csv', 'training/roberta-base/endpoint', 'test')
endpoint('xlnet-base-cased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_endpoint.csv', 'training/xlnet-base-cased/endpoint', 'train')
endpoint('xlnet-base-cased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_endpoint.csv', 'training/xlnet-base-cased/endpoint', 'test')
endpoint('SpanBERT/spanbert-base-cased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_endpoint.csv', 'training/spanbert-base-cased/endpoint', 'train')
endpoint('SpanBERT/spanbert-base-cased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_endpoint.csv', 'training/spanbert-base-cased/endpoint', 'test')

100%|██████████| 14528/14528 [17:04<00:00, 14.18it/s] 
100%|██████████| 3280/3280 [03:46<00:00, 14.51it/s]
100%|██████████| 14528/14528 [25:30<00:00,  9.49it/s] 
100%|██████████| 3280/3280 [03:53<00:00, 14.02it/s]
100%|██████████| 14528/14528 [22:48<00:00, 10.62it/s] 
100%|██████████| 3280/3280 [03:42<00:00, 14.77it/s]
100%|██████████| 14528/14528 [22:56<00:00, 10.55it/s] 
100%|██████████| 3280/3280 [04:15<00:00, 12.82it/s]


In [10]:
import pandas as pd
import torch
from tqdm import tqdm

def diffsum(input_csv, output_csv, output_folder, mode):
    # Загрузка таблицы CSV
    df = pd.read_csv(input_csv)

    for index, row in tqdm(df.iterrows(), total=len(list(df.iterrows()))):
        path_to_embeddings = row['path_to_embeddings']

        # Загрузка тензора с эмбеддингами
        embeddings_tensor = torch.load(path_to_embeddings)

        # Получение первого и последнего эмбеддингов
        first_embedding = embeddings_tensor[:, 0, :]
        last_embedding = embeddings_tensor[:, -1, :]

        # Конкатенация эмбеддингов
        concatenated_embedding = torch.cat((first_embedding + last_embedding, first_embedding - last_embedding), dim=1)

        # Формирование пути для сохранения нового эмбеддинга
        new_embedding_path = f"{output_folder}/embedding_{mode}_{index}.pt"

        # Сохранение нового эмбеддинга в формате .pt
        torch.save(concatenated_embedding, new_embedding_path)

        # Запись пути до сохраненного эмбеддинга в dataframe
        df.at[index, 'path_to_embeddings'] = new_embedding_path

    # Сохранение обновленного dataframe в CSV файл
    df.to_csv(output_csv, index=False)

# Пример вызова функции
diffsum('bert-base-uncased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_diff-sumt.csv', 'training/bert-base-uncased/diff-sum', 'train')
diffsum('bert-base-uncased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_diff-sum.csv', 'training/bert-base-uncased/diff-sum', 'test')
diffsum('roberta-base_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_diff-sum.csv', 'training/roberta-base/diff-sum', 'train')
diffsum('roberta-base_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_diff-sum.csv', 'training/roberta-base/diff-sum', 'test')
diffsum('xlnet-base-cased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_diff-sum.csv', 'training/xlnet-base-cased/diff-sum', 'train')
diffsum('xlnet-base-cased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_diff-sum.csv', 'training/xlnet-base-cased/diff-sum', 'test')
diffsum('SpanBERT/spanbert-base-cased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_diff-sum.csv', 'training/spanbert-base-cased/diff-sum', 'train')
diffsum('SpanBERT/spanbert-base-cased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_diff-sum.csv', 'training/spanbert-base-cased/diff-sum', 'test')

100%|██████████| 14528/14528 [21:35<00:00, 11.21it/s]
100%|██████████| 3280/3280 [04:07<00:00, 13.27it/s]
100%|██████████| 14528/14528 [27:57<00:00,  8.66it/s] 
100%|██████████| 3280/3280 [04:55<00:00, 11.12it/s]
100%|██████████| 14528/14528 [22:24<00:00, 10.80it/s] 
100%|██████████| 3280/3280 [04:07<00:00, 13.23it/s]
100%|██████████| 14528/14528 [22:50<00:00, 10.60it/s] 
100%|██████████| 3280/3280 [07:24<00:00,  7.38it/s]


In [12]:
import pandas as pd
import torch
from tqdm import tqdm

def coherent(input_csv, output_csv, output_folder, mode):
    # Загрузка таблицы CSV
    df = pd.read_csv(input_csv)

    for index, row in tqdm(df.iterrows(), total=len(list(df.iterrows()))):
        path_to_embeddings = row['path_to_embeddings']

        # Загрузка тензора с эмбеддингами
        embeddings_tensor = torch.load(path_to_embeddings)

        # Получение первого и последнего эмбеддингов
        first_embedding = embeddings_tensor[:, 0, :]
        last_embedding = embeddings_tensor[:, -1, :]

        # 0.46875

        n = int(round(first_embedding.shape[-1] * 0.46875))
        e1 = first_embedding[:, :n]
        e2 = last_embedding[:, :n]
        e3 = first_embedding[:, n:]
        e4 = last_embedding[:, n:]

        # Конкатенация эмбеддингов
        concatenated_embedding = torch.cat((e1, e2, torch.mul(e3, e4)), dim=1)

        # Формирование пути для сохранения нового эмбеддинга
        new_embedding_path = f"{output_folder}/embedding_{mode}_{index}.pt"

        # Сохранение нового эмбеддинга в формате .pt
        torch.save(concatenated_embedding, new_embedding_path)

        # Запись пути до сохраненного эмбеддинга в dataframe
        df.at[index, 'path_to_embeddings'] = new_embedding_path

    # Сохранение обновленного dataframe в CSV файл
    df.to_csv(output_csv, index=False)

# Пример вызова функции
coherent('bert-base-uncased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_coherent.csv', 'training/bert-base-uncased/coherent', 'train')
coherent('bert-base-uncased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_coherent.csv', 'training/bert-base-uncased/coherent', 'test')
coherent('roberta-base_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_coherent.csv', 'training/roberta-base/coherent', 'train')
coherent('roberta-base_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_coherent.csv', 'training/roberta-base/coherent', 'test')
coherent('xlnet-base-cased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_coherent.csv', 'training/xlnet-base-cased/coherent', 'train')
coherent('xlnet-base-cased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_coherent.csv', 'training/xlnet-base-cased/coherent', 'test')
coherent('SpanBERT/spanbert-base-cased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_coherent.csv', 'training/spanbert-base-cased/coherent', 'train')
coherent('SpanBERT/spanbert-base-cased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_coherent.csv', 'training/spanbert-base-cased/coherent', 'test')

100%|██████████| 14528/14528 [21:32<00:00, 11.24it/s]
100%|██████████| 3280/3280 [04:16<00:00, 12.79it/s]
100%|██████████| 14528/14528 [22:50<00:00, 10.60it/s] 
100%|██████████| 3280/3280 [04:11<00:00, 13.03it/s]
100%|██████████| 14528/14528 [21:54<00:00, 11.05it/s] 
100%|██████████| 3280/3280 [04:05<00:00, 13.36it/s]
100%|██████████| 14528/14528 [24:09<00:00, 10.03it/s] 
100%|██████████| 3280/3280 [04:11<00:00, 13.03it/s]


In [14]:
import pandas as pd
import torch
from tqdm import tqdm

def maxpool(input_csv, output_csv, output_folder, mode):
    # Загрузка таблицы CSV
    df = pd.read_csv(input_csv)

    for index, row in tqdm(df.iterrows(), total=len(list(df.iterrows()))):
        path_to_embeddings = row['path_to_embeddings']

        # Загрузка тензора с эмбеддингами
        embeddings_tensor = torch.load(path_to_embeddings)

        # Конкатенация эмбеддингов
        concatenated_embedding = torch.max(embeddings_tensor, dim=1)[0]

        # Формирование пути для сохранения нового эмбеддинга
        new_embedding_path = f"{output_folder}/embedding_{mode}_{index}.pt"

        # Сохранение нового эмбеддинга в формате .pt
        torch.save(concatenated_embedding, new_embedding_path)

        # Запись пути до сохраненного эмбеддинга в dataframe
        df.at[index, 'path_to_embeddings'] = new_embedding_path

    # Сохранение обновленного dataframe в CSV файл
    df.to_csv(output_csv, index=False)

# Пример вызова функции
maxpool('bert-base-uncased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_maxpooling.csv', 'training/bert-base-uncased/maxpooling', 'train')
maxpool('bert-base-uncased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_maxpooling.csv', 'training/bert-base-uncased/maxpooling', 'test')
maxpool('roberta-base_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_maxpooling.csv', 'training/roberta-base/maxpooling', 'train')
maxpool('roberta-base_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_maxpooling.csv', 'training/roberta-base/maxpooling', 'test')
maxpool('xlnet-base-cased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_maxpooling.csv', 'training/xlnet-base-cased/maxpooling', 'train')
maxpool('xlnet-base-cased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_maxpooling.csv', 'training/xlnet-base-cased/maxpooling', 'test')
maxpool('SpanBERT/spanbert-base-cased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_maxpooling.csv', 'training/spanbert-base-cased/maxpooling', 'train')
maxpool('SpanBERT/spanbert-base-cased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_maxpooling.csv', 'training/spanbert-base-cased/maxpooling', 'test')

100%|██████████| 14528/14528 [23:35<00:00, 10.27it/s] 
100%|██████████| 3280/3280 [04:10<00:00, 13.09it/s]
100%|██████████| 14528/14528 [24:52<00:00,  9.74it/s] 
100%|██████████| 3280/3280 [04:24<00:00, 12.40it/s]
100%|██████████| 14528/14528 [22:21<00:00, 10.83it/s] 
100%|██████████| 3280/3280 [04:10<00:00, 13.07it/s]
100%|██████████| 14528/14528 [21:51<00:00, 11.08it/s] 
100%|██████████| 3280/3280 [04:13<00:00, 12.94it/s]


In [15]:
import pandas as pd
import torch
from tqdm import tqdm

def avgpool(input_csv, output_csv, output_folder, mode):
    # Загрузка таблицы CSV
    df = pd.read_csv(input_csv)

    for index, row in tqdm(df.iterrows(), total=len(list(df.iterrows()))):
        path_to_embeddings = row['path_to_embeddings']

        # Загрузка тензора с эмбеддингами
        embeddings_tensor = torch.load(path_to_embeddings)

        # Конкатенация эмбеддингов
        concatenated_embedding = torch.mean(embeddings_tensor, dim=1)

        # Формирование пути для сохранения нового эмбеддинга
        new_embedding_path = f"{output_folder}/embedding_{mode}_{index}.pt"

        # Сохранение нового эмбеддинга в формате .pt
        torch.save(concatenated_embedding, new_embedding_path)

        # Запись пути до сохраненного эмбеддинга в dataframe
        df.at[index, 'path_to_embeddings'] = new_embedding_path

    # Сохранение обновленного dataframe в CSV файл
    df.to_csv(output_csv, index=False)

# Пример вызова функции
avgpool('bert-base-uncased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_avgpooling.csv', 'training/bert-base-uncased/avgpooling', 'train')
avgpool('bert-base-uncased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_avgpooling.csv', 'training/bert-base-uncased/avgpooling', 'test')
avgpool('roberta-base_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_avgpooling.csv', 'training/roberta-base/avgpooling', 'train')
avgpool('roberta-base_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_avgpooling.csv', 'training/roberta-base/avgpooling', 'test')
avgpool('xlnet-base-cased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_avgpooling.csv', 'training/xlnet-base-cased/avgpooling', 'train')
avgpool('xlnet-base-cased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_avgpooling.csv', 'training/xlnet-base-cased/avgpooling', 'test')
avgpool('SpanBERT/spanbert-base-cased_train_embeddings_with_span_raw.csv', 'bert-base-uncased_train_avgpooling.csv', 'training/spanbert-base-cased/avgpooling', 'train')
avgpool('SpanBERT/spanbert-base-cased_test_embeddings_with_span_raw.csv', 'bert-base-uncased_test_avgpooling.csv', 'training/spanbert-base-cased/avgpooling', 'test')

100%|██████████| 14528/14528 [21:23<00:00, 11.32it/s] 
100%|██████████| 3280/3280 [04:06<00:00, 13.30it/s]
100%|██████████| 14528/14528 [21:33<00:00, 11.24it/s] 
100%|██████████| 3280/3280 [04:05<00:00, 13.37it/s]
100%|██████████| 14528/14528 [21:30<00:00, 11.26it/s]
100%|██████████| 3280/3280 [04:10<00:00, 13.08it/s]
100%|██████████| 14528/14528 [24:59<00:00,  9.69it/s] 
100%|██████████| 3280/3280 [05:00<00:00, 10.92it/s]


---

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import precision_recall_fscore_support, classification_report
from tqdm import tqdm
import pickle

import warnings
warnings.filterwarnings('ignore')

class CustomDataset(Dataset):
    def __init__(self, csv_file, onehot_encoder):
        self.data = pd.read_csv(csv_file)
        self.labels = onehot_encoder.transform(self.data['label'].values.reshape(-1, 1))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        file_path = self.data.iloc[idx]['path_to_embeddings']
        label = self.labels[idx]

        # Load embedding from .pt file
        embedding = torch.load(file_path)

        # return embedding, torch.FloatTensor(label_onehot)
        return embedding, torch.FloatTensor(label)


class CustomDeepClassifier(nn.Module):
    def __init__(self, input_dim=1536, num_classes=20, num_layers=3, hidden_dim=1024):
        super(CustomDeepClassifier, self).__init__()

        layers = []
        layers.append(nn.Linear(input_dim, hidden_dim))
        layers.append(nn.BatchNorm1d(1))
        layers.append(nn.ReLU())

        for _ in range(num_layers - 2):  # Subtract 2 for the initial and final layers
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.BatchNorm1d(1))
            layers.append(nn.ReLU())

        layers.append(nn.Linear(hidden_dim, num_classes))

        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        x = self.layers(x)
        x = F.softmax(x, dim=0)
        return x

class CustomAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, proj_dim=256, num_layers=3, num_classes=20):
        super(CustomAttention, self).__init__()

        self.projection = nn.Linear(input_dim, proj_dim)

        self.attention_params = nn.Linear(proj_dim, 1)

        layers = []
        layers.append(nn.Linear(proj_dim, hidden_dim))
        layers.append(nn.ReLU())
        layers.append(nn.BatchNorm1d(1))
        for _ in range(num_layers - 2):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.BatchNorm1d(1))
        layers.append(nn.Linear(hidden_dim, num_classes))
        self.layers = nn.Sequential(*layers)
        
    def forward(self, embeddings):
        embeddings = self.projection(embeddings)

        attn_logits = self.attention_params(embeddings)  #  + attention_mask
        # print(attn_logits)
        attention_wts = nn.functional.softmax(attn_logits, dim=2)
        # print(attention_wts.shape, embeddings.shape)

        attention_term = torch.sum(attention_wts * embeddings, dim=-1)  #  * attention_mask
        # print(attention_term.shape)

        output = self.layers(attention_term)
        # print(output.shape)
        
        output = F.softmax(output, dim=0)
        # print(output.shape)
        
        return output

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_validate_model(model, train_loader, val_loader, method, model_name, num_epochs=10, learning_rate=0.1, stepslr=10, gamma=0.9):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.StepLR(optimizer, stepslr, gamma=gamma)

    train_losses = []
    val_losses = []
    precision_scores = []
    recall_scores = []
    f1_scores = []

    for epoch in tqdm(range(num_epochs), total=num_epochs):
        model.train()
        running_train_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs.float())
            # print(outputs.shape, labels.shape)
            # print(outputs.squeeze(1), labels)
            loss = criterion(outputs.squeeze(1), labels)
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item()
        
        # print(running_train_loss, len(train_loader))
        train_loss = running_train_loss / len(train_loader)
        train_losses.append(train_loss)

        if (epoch + 1) % 300 == 0:
            model.eval()
            running_val_loss = 0.0
            all_preds = []
            all_labels = []
            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs.float())
                    loss = criterion(outputs.squeeze(1), labels)
                    running_val_loss += loss.item()

                    # print(outputs.shape, labels.shape)
                    # print(outputs, labels)
                    predicted = torch.argmax(outputs.squeeze(1), 1)
                    labels = torch.argmax(labels, 1)
                    # print(predicted.shape, labels.shape)
                    # print(predicted, labels)
                    # break
                    all_preds.extend(predicted.cpu().numpy())
                    all_labels.extend(labels.cpu().numpy())

            val_loss = running_val_loss / len(val_loader)
            val_losses.append(val_loss)

            precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='micro')
            precision_scores.append(precision)
            recall_scores.append(recall)
            f1_scores.append(f1)

            scheduler.step()

            print(f"Epoch [{epoch + 1}/{num_epochs}] "
                f"Train Loss: {train_loss:.4f} "
                f"Val Loss: {val_loss:.4f} "
                f"Precision: {precision:.4f} "
                f"Recall: {recall:.4f} "
                f"F1 Score: {f1:.4f}")
            
            print(classification_report(all_labels, all_preds))
            with open('metrics.txt', 'a') as file:
                file.write(method + ' ' + model_name)
                file.write(f'{precision}, {recall}, {f1}\n')
                file.write(classification_report(all_labels, all_preds) + '\n')

    return train_losses, val_losses, precision_scores, recall_scores, f1_scores

In [8]:
def save_object(obj, filename):
    with open(filename, 'wb') as outp:
        pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)

for method in ['coherent', 'diff-sum', 'endpoint', 'avgpooling', 'maxpooling']:
    for model_name in ['bert-base-uncased', 'roberta-base', 'xlnet-base-cased', 'spanbert-base-cased']:
        print('=============================================================\n', method, model_name)
        onehot_encoder = OneHotEncoder(sparse=False)
        data = pd.read_csv(f'{model_name}_train_{method}.csv')['label'].values.reshape(-1, 1)
        onehot_encoder.fit(data)
            
        # Paths to train and test CSV files
        train_csv_path = f'{model_name}_train_{method}.csv'
        test_csv_path = f'{model_name}_test_{method}.csv'

        # Create instances of CustomDataset for train and test
        train_dataset = CustomDataset(train_csv_path, onehot_encoder)
        test_dataset = CustomDataset(test_csv_path, onehot_encoder)

        # Define batch sizes
        train_batch_size = 128
        test_batch_size = 128

        # Create DataLoaders for train and test datasets
        train_data_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
        test_data_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

        model = CustomDeepClassifier(input_dim=train_dataset[0][0].shape[1], num_classes=20)
        train_losses, val_losses, precision_scores, recall_scores, f1_scores = train_validate_model(model,
                                                                                                    train_data_loader,
                                                                                                    test_data_loader,
                                                                                                    method,
                                                                                                    model_name,
                                                                                                    num_epochs=300,
                                                                                                    learning_rate=3e-4,
                                                                                                    stepslr=20,
                                                                                                    gamma=0.9)
        
        save_object((train_losses, val_losses, precision_scores, recall_scores, f1_scores), f'{model_name}_{method}_data_and_metrics.pkl')

 coherent bert-base-uncased


100%|██████████| 300/300 [31:50<00:00,  6.37s/it]


Epoch [300/300] Train Loss: 2.9032 Val Loss: 2.9881 Precision: 0.1302 Recall: 0.1302 F1 Score: 0.1302
              precision    recall  f1-score   support

           0       0.02      0.14      0.03        21
           1       0.05      0.04      0.04       133
           2       0.01      0.14      0.01         7
           3       0.02      0.11      0.03        35
           4       0.02      0.10      0.03        20
           5       0.00      0.04      0.01        26
           6       0.17      0.06      0.09       188
           7       0.09      0.07      0.08       121
           8       0.04      0.16      0.06        61
           9       0.23      0.14      0.17        95
          10       0.00      0.00      0.00         4
          11       0.64      0.22      0.32       569
          12       0.56      0.15      0.24       296
          13       0.72      0.12      0.21      1479
          14       0.00      0.00      0.00        13
          15       0.01      0.06

100%|██████████| 300/300 [31:51<00:00,  6.37s/it]


Epoch [300/300] Train Loss: 2.9385 Val Loss: 2.9952 Precision: 0.0713 Recall: 0.0713 F1 Score: 0.0713
              precision    recall  f1-score   support

           0       0.02      0.10      0.03        21
           1       0.23      0.08      0.11       133
           2       0.00      0.14      0.01         7
           3       0.01      0.06      0.02        35
           4       0.00      0.00      0.00        20
           5       0.01      0.04      0.01        26
           6       0.24      0.07      0.11       188
           7       0.02      0.02      0.02       121
           8       0.01      0.03      0.02        61
           9       0.02      0.02      0.02        95
          10       0.00      0.00      0.00         4
          11       0.11      0.02      0.04       569
          12       0.11      0.03      0.05       296
          13       0.42      0.11      0.18      1479
          14       0.01      0.08      0.01        13
          15       0.00      0.00

100%|██████████| 300/300 [31:59<00:00,  6.40s/it]


Epoch [300/300] Train Loss: 2.9352 Val Loss: 2.9899 Precision: 0.0646 Recall: 0.0646 F1 Score: 0.0646
              precision    recall  f1-score   support

           0       0.02      0.05      0.02        21
           1       0.08      0.04      0.05       133
           2       0.00      0.00      0.00         7
           3       0.00      0.03      0.00        35
           4       0.00      0.00      0.00        20
           5       0.04      0.12      0.06        26
           6       0.12      0.03      0.05       188
           7       0.06      0.03      0.04       121
           8       0.06      0.08      0.07        61
           9       0.17      0.06      0.09        95
          10       0.00      0.00      0.00         4
          11       0.33      0.08      0.12       569
          12       0.28      0.07      0.11       296
          13       0.52      0.06      0.11      1479
          14       0.00      0.23      0.01        13
          15       0.00      0.00

100%|██████████| 300/300 [32:55<00:00,  6.58s/it]  


Epoch [300/300] Train Loss: 2.9052 Val Loss: 2.9892 Precision: 0.0924 Recall: 0.0924 F1 Score: 0.0924
              precision    recall  f1-score   support

           0       0.01      0.05      0.01        21
           1       0.03      0.02      0.02       133
           2       0.00      0.00      0.00         7
           3       0.02      0.09      0.03        35
           4       0.03      0.20      0.05        20
           5       0.01      0.04      0.01        26
           6       0.17      0.07      0.10       188
           7       0.07      0.07      0.07       121
           8       0.02      0.07      0.03        61
           9       0.15      0.13      0.14        95
          10       0.00      0.25      0.01         4
          11       0.51      0.12      0.20       569
          12       0.40      0.12      0.19       296
          13       0.65      0.09      0.16      1479
          14       0.01      0.08      0.01        13
          15       0.00      0.00

100%|██████████| 300/300 [31:28<00:00,  6.29s/it]


Epoch [300/300] Train Loss: 2.9048 Val Loss: 2.9842 Precision: 0.1643 Recall: 0.1643 F1 Score: 0.1643
              precision    recall  f1-score   support

           0       0.04      0.19      0.06        21
           1       0.10      0.08      0.09       133
           2       0.00      0.00      0.00         7
           3       0.01      0.11      0.03        35
           4       0.00      0.00      0.00        20
           5       0.02      0.08      0.03        26
           6       0.22      0.11      0.14       188
           7       0.18      0.11      0.13       121
           8       0.08      0.21      0.12        61
           9       0.21      0.15      0.17        95
          10       0.01      0.25      0.01         4
          11       0.66      0.23      0.34       569
          12       0.51      0.15      0.23       296
          13       0.72      0.17      0.28      1479
          14       0.01      0.15      0.02        13
          15       0.01      0.18

100%|██████████| 300/300 [35:27<00:00,  7.09s/it]  


Epoch [300/300] Train Loss: 2.9389 Val Loss: 2.9920 Precision: 0.0381 Recall: 0.0381 F1 Score: 0.0381
              precision    recall  f1-score   support

           0       0.10      0.24      0.14        21
           1       0.09      0.04      0.05       133
           2       0.00      0.00      0.00         7
           3       0.02      0.14      0.04        35
           4       0.01      0.05      0.01        20
           5       0.01      0.04      0.02        26
           6       0.34      0.12      0.17       188
           7       0.03      0.03      0.03       121
           8       0.04      0.07      0.05        61
           9       0.03      0.03      0.03        95
          10       0.00      0.00      0.00         4
          11       0.13      0.02      0.04       569
          12       0.15      0.04      0.06       296
          13       0.31      0.02      0.04      1479
          14       0.00      0.08      0.01        13
          15       0.01      0.18

100%|██████████| 300/300 [35:26<00:00,  7.09s/it]  


Epoch [300/300] Train Loss: 2.9095 Val Loss: 2.9862 Precision: 0.1445 Recall: 0.1445 F1 Score: 0.1445
              precision    recall  f1-score   support

           0       0.04      0.10      0.06        21
           1       0.13      0.05      0.07       133
           2       0.00      0.00      0.00         7
           3       0.01      0.09      0.02        35
           4       0.01      0.05      0.02        20
           5       0.00      0.00      0.00        26
           6       0.22      0.05      0.09       188
           7       0.11      0.06      0.08       121
           8       0.00      0.00      0.00        61
           9       0.28      0.12      0.16        95
          10       0.00      0.25      0.01         4
          11       0.70      0.20      0.31       569
          12       0.50      0.12      0.19       296
          13       0.68      0.18      0.29      1479
          14       0.00      0.08      0.01        13
          15       0.01      0.06

100%|██████████| 300/300 [33:15<00:00,  6.65s/it]  


Epoch [300/300] Train Loss: 2.9044 Val Loss: 2.9885 Precision: 0.0939 Recall: 0.0939 F1 Score: 0.0939
              precision    recall  f1-score   support

           0       0.01      0.10      0.02        21
           1       0.08      0.06      0.07       133
           2       0.01      0.29      0.02         7
           3       0.02      0.11      0.03        35
           4       0.00      0.00      0.00        20
           5       0.01      0.04      0.01        26
           6       0.14      0.07      0.09       188
           7       0.07      0.07      0.07       121
           8       0.03      0.08      0.05        61
           9       0.15      0.14      0.14        95
          10       0.00      0.00      0.00         4
          11       0.44      0.11      0.18       569
          12       0.31      0.08      0.12       296
          13       0.67      0.10      0.17      1479
          14       0.01      0.23      0.03        13
          15       0.01      0.18

100%|██████████| 300/300 [36:02<00:00,  7.21s/it]  


Epoch [300/300] Train Loss: 2.9051 Val Loss: 2.9831 Precision: 0.1439 Recall: 0.1439 F1 Score: 0.1439
              precision    recall  f1-score   support

           0       0.04      0.24      0.06        21
           1       0.09      0.05      0.07       133
           2       0.00      0.14      0.01         7
           3       0.02      0.09      0.03        35
           4       0.04      0.20      0.06        20
           5       0.01      0.04      0.01        26
           6       0.22      0.10      0.14       188
           7       0.13      0.07      0.10       121
           8       0.04      0.11      0.06        61
           9       0.18      0.14      0.15        95
          10       0.01      0.50      0.02         4
          11       0.72      0.23      0.34       569
          12       0.46      0.13      0.21       296
          13       0.68      0.14      0.23      1479
          14       0.01      0.15      0.02        13
          15       0.01      0.12

100%|██████████| 300/300 [35:57<00:00,  7.19s/it]  


Epoch [300/300] Train Loss: 2.9379 Val Loss: 2.9910 Precision: 0.0390 Recall: 0.0390 F1 Score: 0.0390
              precision    recall  f1-score   support

           0       0.01      0.05      0.02        21
           1       0.06      0.06      0.06       133
           2       0.00      0.00      0.00         7
           3       0.00      0.03      0.01        35
           4       0.01      0.10      0.02        20
           5       0.01      0.08      0.02        26
           6       0.26      0.12      0.16       188
           7       0.06      0.05      0.05       121
           8       0.04      0.03      0.04        61
           9       0.05      0.04      0.05        95
          10       0.00      0.00      0.00         4
          11       0.09      0.02      0.04       569
          12       0.16      0.03      0.06       296
          13       0.39      0.03      0.05      1479
          14       0.00      0.08      0.00        13
          15       0.01      0.12

100%|██████████| 300/300 [35:47<00:00,  7.16s/it]  


Epoch [300/300] Train Loss: 2.9126 Val Loss: 2.9857 Precision: 0.1287 Recall: 0.1287 F1 Score: 0.1287
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        21
           1       0.14      0.06      0.08       133
           2       0.00      0.00      0.00         7
           3       0.01      0.17      0.03        35
           4       0.00      0.00      0.00        20
           5       0.02      0.04      0.02        26
           6       0.16      0.04      0.07       188
           7       0.12      0.07      0.09       121
           8       0.06      0.11      0.08        61
           9       0.23      0.17      0.19        95
          10       0.00      0.25      0.01         4
          11       0.69      0.18      0.28       569
          12       0.47      0.13      0.20       296
          13       0.70      0.14      0.23      1479
          14       0.00      0.15      0.01        13
          15       0.02      0.24

100%|██████████| 300/300 [34:22<00:00,  6.87s/it]  


Epoch [300/300] Train Loss: 2.9049 Val Loss: 2.9907 Precision: 0.0899 Recall: 0.0899 F1 Score: 0.0899
              precision    recall  f1-score   support

           0       0.01      0.10      0.02        21
           1       0.05      0.04      0.04       133
           2       0.00      0.14      0.01         7
           3       0.02      0.11      0.04        35
           4       0.01      0.10      0.02        20
           5       0.00      0.00      0.00        26
           6       0.16      0.08      0.11       188
           7       0.05      0.06      0.06       121
           8       0.02      0.07      0.03        61
           9       0.09      0.11      0.10        95
          10       0.00      0.25      0.01         4
          11       0.54      0.12      0.19       569
          12       0.36      0.13      0.19       296
          13       0.65      0.09      0.15      1479
          14       0.01      0.23      0.02        13
          15       0.00      0.06

100%|██████████| 300/300 [35:20<00:00,  7.07s/it]  


Epoch [300/300] Train Loss: 2.9130 Val Loss: 2.9849 Precision: 0.1311 Recall: 0.1311 F1 Score: 0.1311
              precision    recall  f1-score   support

           0       0.07      0.14      0.09        21
           1       0.11      0.03      0.05       133
           2       0.01      0.14      0.01         7
           3       0.02      0.09      0.03        35
           4       0.04      0.10      0.06        20
           5       0.02      0.08      0.03        26
           6       0.28      0.07      0.11       188
           7       0.14      0.04      0.06       121
           8       0.06      0.03      0.04        61
           9       0.22      0.11      0.14        95
          10       0.01      0.25      0.02         4
          11       0.57      0.25      0.34       569
          12       0.37      0.06      0.11       296
          13       0.63      0.13      0.22      1479
          14       0.01      0.15      0.01        13
          15       0.01      0.12

100%|██████████| 300/300 [35:37<00:00,  7.13s/it]  


Epoch [300/300] Train Loss: 2.9414 Val Loss: 2.9911 Precision: 0.1015 Recall: 0.1015 F1 Score: 0.1015
              precision    recall  f1-score   support

           0       0.01      0.05      0.02        21
           1       0.10      0.04      0.05       133
           2       0.00      0.00      0.00         7
           3       0.01      0.09      0.02        35
           4       0.00      0.00      0.00        20
           5       0.01      0.04      0.02        26
           6       0.27      0.13      0.18       188
           7       0.06      0.10      0.07       121
           8       0.04      0.03      0.04        61
           9       0.07      0.05      0.06        95
          10       0.00      0.00      0.00         4
          11       0.09      0.02      0.04       569
          12       0.12      0.03      0.05       296
          13       0.56      0.16      0.25      1479
          14       0.01      0.08      0.01        13
          15       0.00      0.06

100%|██████████| 300/300 [37:00<00:00,  7.40s/it]  


Epoch [300/300] Train Loss: 2.9152 Val Loss: 2.9868 Precision: 0.1396 Recall: 0.1396 F1 Score: 0.1396
              precision    recall  f1-score   support

           0       0.02      0.05      0.03        21
           1       0.17      0.08      0.11       133
           2       0.01      0.14      0.02         7
           3       0.01      0.09      0.01        35
           4       0.04      0.10      0.06        20
           5       0.00      0.00      0.00        26
           6       0.19      0.06      0.09       188
           7       0.13      0.06      0.08       121
           8       0.04      0.03      0.04        61
           9       0.24      0.12      0.16        95
          10       0.02      0.50      0.03         4
          11       0.60      0.23      0.33       569
          12       0.29      0.05      0.09       296
          13       0.59      0.15      0.24      1479
          14       0.00      0.00      0.00        13
          15       0.01      0.12

100%|██████████| 300/300 [35:10<00:00,  7.04s/it]  


Epoch [300/300] Train Loss: 2.9120 Val Loss: 2.9893 Precision: 0.0659 Recall: 0.0659 F1 Score: 0.0659
              precision    recall  f1-score   support

           0       0.02      0.05      0.02        21
           1       0.02      0.01      0.01       133
           2       0.00      0.00      0.00         7
           3       0.00      0.06      0.01        35
           4       0.04      0.10      0.06        20
           5       0.01      0.12      0.03        26
           6       0.22      0.09      0.12       188
           7       0.05      0.02      0.03       121
           8       0.04      0.05      0.04        61
           9       0.11      0.07      0.09        95
          10       0.01      0.25      0.02         4
          11       0.32      0.03      0.06       569
          12       0.36      0.07      0.11       296
          13       0.51      0.08      0.14      1479
          14       0.00      0.00      0.00        13
          15       0.01      0.12

100%|██████████| 300/300 [37:27<00:00,  7.49s/it]  


Epoch [300/300] Train Loss: 2.9170 Val Loss: 2.9871 Precision: 0.0832 Recall: 0.0832 F1 Score: 0.0832
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        21
           1       0.06      0.02      0.02       133
           2       0.00      0.00      0.00         7
           3       0.01      0.20      0.03        35
           4       0.03      0.05      0.03        20
           5       0.05      0.19      0.08        26
           6       0.22      0.04      0.07       188
           7       0.14      0.07      0.09       121
           8       0.09      0.08      0.09        61
           9       0.22      0.12      0.15        95
          10       0.01      0.25      0.02         4
          11       0.35      0.18      0.24       569
          12       0.40      0.06      0.11       296
          13       0.47      0.05      0.09      1479
          14       0.00      0.08      0.00        13
          15       0.00      0.00

100%|██████████| 300/300 [37:27<00:00,  7.49s/it]  


Epoch [300/300] Train Loss: 2.9460 Val Loss: 2.9923 Precision: 0.0985 Recall: 0.0985 F1 Score: 0.0985
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        21
           1       0.04      0.02      0.02       133
           2       0.02      0.14      0.04         7
           3       0.00      0.00      0.00        35
           4       0.02      0.05      0.03        20
           5       0.03      0.04      0.03        26
           6       0.33      0.07      0.12       188
           7       0.06      0.35      0.11       121
           8       0.06      0.03      0.04        61
           9       0.11      0.04      0.06        95
          10       0.00      0.00      0.00         4
          11       0.18      0.03      0.05       569
          12       0.07      0.03      0.04       296
          13       0.44      0.14      0.21      1479
          14       0.00      0.00      0.00        13
          15       0.00      0.00

100%|██████████| 300/300 [38:25<00:00,  7.68s/it]  


Epoch [300/300] Train Loss: 2.9192 Val Loss: 2.9857 Precision: 0.1018 Recall: 0.1018 F1 Score: 0.1018
              precision    recall  f1-score   support

           0       0.02      0.05      0.03        21
           1       0.10      0.03      0.05       133
           2       0.00      0.00      0.00         7
           3       0.02      0.20      0.04        35
           4       0.00      0.00      0.00        20
           5       0.01      0.04      0.02        26
           6       0.23      0.05      0.08       188
           7       0.08      0.02      0.04       121
           8       0.08      0.08      0.08        61
           9       0.26      0.13      0.17        95
          10       0.02      0.50      0.05         4
          11       0.55      0.14      0.23       569
          12       0.38      0.06      0.11       296
          13       0.52      0.10      0.17      1479
          14       0.01      0.31      0.02        13
          15       0.01      0.06

100%|██████████| 300/300 [35:33<00:00,  7.11s/it]  

Epoch [300/300] Train Loss: 2.9197 Val Loss: 2.9888 Precision: 0.0625 Recall: 0.0625 F1 Score: 0.0625
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        21
           1       0.06      0.02      0.02       133
           2       0.00      0.00      0.00         7
           3       0.01      0.06      0.01        35
           4       0.06      0.15      0.08        20
           5       0.02      0.19      0.04        26
           6       0.19      0.03      0.05       188
           7       0.09      0.03      0.05       121
           8       0.04      0.03      0.04        61
           9       0.18      0.08      0.11        95
          10       0.00      0.00      0.00         4
          11       0.39      0.07      0.11       569
          12       0.38      0.07      0.11       296
          13       0.43      0.06      0.11      1479
          14       0.00      0.15      0.01        13
          15       0.01      0.12


