In [1]:
from metrics.accuracy_metrics import AccuracyMetrics
from metrics.similarity_metrics import CosineSimilarity
from tqdm import tqdm
import torch

In [2]:
from loaders.elco_dataloader import get_loaders
train_loader, test_loader = get_loaders("data/ELCo.csv", batch_size=1, shuffle=True, num_workers=0)

In [12]:
import torch.nn as nn
from sentence_transformers import SentenceTransformer


class TeacherModel():
    def __init__(self):
        self.model = SentenceTransformer('bert-base-nli-mean-tokens')

    def encode(self, x):
        return self.model.encode(x, convert_to_tensor=True)
    
    def __call__(self, x):
        return self.encode(x)

class StudentModel():
    def __init__(self):
        self.model = SentenceTransformer('xlm-roberta-base')

    def forward(self, x):
        return self.model.encode(x, convert_to_tensor=True, )
    
    def __call__(self, x):
        return self.forward(x)

In [13]:
from torch.autograd import Variable
from itertools import chain

class ELCoMTrainer:
    def __init__(self, teacher_model, student_model, optim, lr, accuracy_metric):
        self.teacher = teacher_model
        self.student = student_model
        # self.teacher_optimizer = optim(self.teacher.model._first_module().parameters(), lr=lr)
        self.student_optimizer = optim(self.student.model._first_module().parameters(), lr=lr)
        self.accuracy_metric = accuracy_metric
        self.optim = optim
        self.mse = nn.MSELoss()
        self.cosine = nn.CosineSimilarity()
        self.similarity = CosineSimilarity()
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def train(self, train_loader, epochs):
        # self.teacher.train()
        # self.teacher.to(self.device)
        # self.student.train()
        # self.student.to(self.device)
        total_loss = 0
        for epoch in range(epochs):
            for x, y in tqdm(train_loader):
                teacher_en = Variable(self.teacher(x), requires_grad=True).mean(axis=0).unsqueeze(0)
                student_en = Variable(self.student(x), requires_grad=True).mean(axis=0).unsqueeze(0)
                student_em = Variable(self.student(y), requires_grad=True)
                loss = self.mse(student_en, teacher_en) + self.mse(student_em, teacher_en)
                + self.cosine(student_en, student_em).item()
                # self.teacher_optimizer.zero_grad()
                self.student_optimizer.zero_grad()
                loss.backward()
                # self.teacher_optimizer.step()
                self.student_optimizer.step()
                total_loss += loss.item()
            print(f"Epoch {epoch + 1} loss: {total_loss / len(train_loader)}")
        print("Training finished")
    
    def test(self, test_loader):
        
        accuracy = 0
        for x, y in tqdm(test_loader):
            y = y[0]
            rank_scores = []
            y_score = self.student(y)
            for rank, sentences in x.items():
                sentences = list(chain(*sentences))
                embeddings = self.student(sentences).mean(axis=0) # simple mean
                rank_scores.append((self.similarity(embeddings, y_score).cpu().detach().numpy()[0], rank))
            sorted_rank_scores = sorted(rank_scores, key=lambda x: x[0], reverse=True)
            accuracy += self.accuracy_metric(sorted_rank_scores)
        return accuracy / len(test_loader)

In [14]:
from torch.optim import Adam
teacher_model = TeacherModel()
student_model = StudentModel()
lr = 1e-3
trainer = ELCoMTrainer(teacher_model, student_model, Adam, lr, AccuracyMetrics().top_k_accuracy)
# trainer.train(train_loader, epochs=5)

No sentence-transformers model found with name C:\Users\Shirshajit/.cache\torch\sentence_transformers\xlm-roberta-base. Creating a new one with MEAN pooling.
Some weights of the model checkpoint at C:\Users\Shirshajit/.cache\torch\sentence_transformers\xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [17]:

class AccuracyMetrics:
    def __init__(self, k=5):
        self.metrics = {}
        self.k = k

    """
    List of metrics:
    - Accuracy
    - Precision/Recall and F1 + Confusion Matrix
    - ROC/AUC Curve
    - Mean Reciprocal Rank (MRR)
    - Precision@k
    - Hit Rate
    """

    def accuracy(self, rank_scores):
        correct = 0
        for i in range(len(rank_scores)):
            if rank_scores[i][1] == i:
                correct += 1
        self.metrics['accuracy'] = correct / len(rank_scores)
        return self.metrics['accuracy']
    
    def top_k_accuracy(self, rank_scores):
        correct = 0
        k = self.k
        # if the max rank score is in the top k, then it is correct
        if 0 in [rank_scores[i][1] for i in range(k)]:
            correct += 1    
        else:
            print(rank_scores)
        self.metrics['top_k_accuracy'] = correct
        return self.metrics['top_k_accuracy']

    def precision_recall_f1(self, rank_scores, y):
        # TODO: Add functionality
        self.metrics['precision'] = None
        self.metrics['recall'] = None
        self.metrics['f1'] = None
        return self.metrics['precision'], self.metrics['recall'], self.metrics['f1']

    def roc_auc(self, rank_scores, y):
        # TODO: Add functionality
        self.metrics['roc_auc'] = None
        return self.metrics['roc_auc']

    def mrr(self, rank_scores, y):
        # TODO: Add functionality
        self.metrics['mrr'] = None
        return self.metrics['mrr']

    def precision_at_k(self, rank_scores, y):
        # TODO: Add functionality
        self.metrics['precision_at_k'] = None
        return self.metrics['precision_at_k']
    
    def hit_rate(self, rank_scores, y):
        # TODO: Add functionality
        self.metrics['hit_rate'] = None
        return self.metrics['hit_rate']

    def confusion_matrix(self, rank_scores, y):
        # TODO: Add functionality
        self.metrics['confusion_matrix'] = None
        return self.metrics['confusion_matrix']

    def plot_roc_auc(self, rank_scores, y):
        # TODO: Add plot
        pass

    def plot_confusion_matrix(self, rank_scores, y):
        # TODO: Add plot
        pass

    # Helper methods for metrics
    def get(self, name):
        return self.metrics[name]

    def get_all(self):
        return self.metrics

    def __str__(self):
        return str(self.metrics)

    def __repr__(self):
        return str(self.metrics)

    def __iter__(self):
        return iter(self.metrics)


In [8]:
trainer = ELCoMTrainer(trainer.teacher, trainer.student, Adam, lr, AccuracyMetrics().accuracy)
trainer.test(test_loader)

100%|██████████| 42/42 [03:31<00:00,  5.03s/it]


0.16052532123960694

In [18]:
trainer = ELCoMTrainer(trainer.teacher, trainer.student, Adam, lr, AccuracyMetrics().top_k_accuracy)
trainer.test(test_loader)

  2%|▏         | 1/42 [00:12<08:52, 12.99s/it]

[(0.9922796, 3), (0.9904179, 5), (0.9883725, 2), (0.98767537, 7), (0.9876372, 4), (0.98716235, 6), (0.9867712, 0), (0.98652285, 1)]


  5%|▍         | 2/42 [00:19<06:02,  9.07s/it]

[(0.99708664, 2), (0.99395066, 1), (0.9934078, 7), (0.99210835, 6), (0.99186295, 5), (0.99183077, 3), (0.99168026, 0), (0.98931265, 4)]


  7%|▋         | 3/42 [00:26<05:24,  8.31s/it]

[(0.99513686, 3), (0.99513036, 1), (0.9941717, 5), (0.99331677, 4), (0.99235207, 2), (0.9911839, 0), (0.98942757, 6)]


 19%|█▉        | 8/42 [01:22<05:38,  9.95s/it]

[(0.993364, 4), (0.9925506, 5), (0.99181455, 6), (0.99124706, 7), (0.9910674, 3), (0.99018216, 2), (0.9897621, 0), (0.98676693, 1)]


 26%|██▌       | 11/42 [01:48<04:57,  9.60s/it]

[(0.99566334, 5), (0.9936409, 3), (0.99351627, 2), (0.99329317, 6), (0.9931414, 7), (0.99244803, 1), (0.991856, 0), (0.9887193, 4)]


 29%|██▊       | 12/42 [02:05<05:51, 11.73s/it]

[(0.9945805, 4), (0.99353296, 3), (0.9935099, 1), (0.9931449, 5), (0.99299675, 2), (0.991795, 6), (0.99168444, 0), (0.99102175, 7)]


 31%|███       | 13/42 [02:12<04:55, 10.19s/it]

[(0.99487, 1), (0.99487, 4), (0.9942554, 3), (0.9941044, 5), (0.9932482, 2), (0.99180984, 0), (0.990287, 6)]


 33%|███▎      | 14/42 [02:22<04:47, 10.27s/it]

[(0.9949983, 5), (0.9948519, 7), (0.99388844, 1), (0.99317145, 3), (0.9914884, 4), (0.99012774, 2), (0.9897674, 0), (0.9889223, 6)]


 36%|███▌      | 15/42 [02:29<04:09,  9.25s/it]

[(0.9939599, 1), (0.9912823, 6), (0.99116606, 7), (0.9904037, 4), (0.99009246, 2), (0.9885861, 0), (0.98723, 5), (0.98566747, 3)]


 40%|████      | 17/42 [03:11<06:07, 14.68s/it]

[(0.9959698, 4), (0.9957489, 1), (0.99561065, 2), (0.9949715, 3), (0.99428093, 5), (0.99390644, 6), (0.9925594, 7), (0.9922165, 0)]


 43%|████▎     | 18/42 [03:15<04:38, 11.58s/it]

[(0.99590987, 2), (0.99590045, 4), (0.99590045, 7), (0.99584424, 6), (0.99524206, 5), (0.9950286, 3), (0.991818, 0), (0.99035674, 1)]


 57%|█████▋    | 24/42 [04:47<03:48, 12.68s/it]

[(0.99679416, 3), (0.9966049, 1), (0.9938611, 6), (0.9921573, 7), (0.9920352, 5), (0.99186695, 4), (0.9912572, 0), (0.99077845, 2)]


 62%|██████▏   | 26/42 [05:03<02:42, 10.15s/it]

[(0.99551004, 5), (0.99238306, 6), (0.9915354, 4), (0.9913707, 1), (0.9911169, 3), (0.99032307, 0), (0.9898963, 2)]


 64%|██████▍   | 27/42 [05:11<02:21,  9.45s/it]

[(0.9956853, 8), (0.99390376, 6), (0.9938203, 1), (0.9937954, 3), (0.9931139, 4), (0.9914742, 0), (0.98963654, 7), (0.98926586, 5), (0.9888343, 2)]


 67%|██████▋   | 28/42 [05:16<01:52,  8.01s/it]

[(0.99814683, 1), (0.9978796, 3), (0.99658734, 4), (0.9965856, 6), (0.9965856, 7), (0.9962928, 0), (0.99601245, 5), (0.9955719, 2)]


 69%|██████▉   | 29/42 [05:27<01:56,  8.96s/it]

[(0.9960184, 2), (0.99594176, 4), (0.9956916, 5), (0.99564385, 3), (0.9950621, 7), (0.9938423, 1), (0.99239606, 6), (0.9917474, 0)]


 71%|███████▏  | 30/42 [05:32<01:32,  7.74s/it]

[(0.9969924, 1), (0.9958421, 4), (0.9954656, 5), (0.99476504, 7), (0.9945866, 3), (0.9945175, 6), (0.99413663, 0), (0.99262714, 2)]


 76%|███████▌  | 32/42 [05:47<01:15,  7.53s/it]

[(0.9961684, 1), (0.99510026, 2), (0.99510026, 6), (0.9950396, 4), (0.9943682, 3), (0.99410516, 5), (0.99081147, 0), (0.99079764, 7)]


 81%|████████  | 34/42 [06:04<01:01,  7.72s/it]

[(0.9966995, 4), (0.99655324, 7), (0.9963093, 1), (0.9962782, 6), (0.99614483, 2), (0.9957789, 3), (0.9957364, 0), (0.99567735, 5)]


 88%|████████▊ | 37/42 [06:36<00:43,  8.65s/it]

[(0.9974639, 7), (0.99740857, 1), (0.9969399, 3), (0.99642503, 4), (0.99536353, 2), (0.99419457, 6), (0.993499, 0), (0.993499, 5)]


 90%|█████████ | 38/42 [06:46<00:36,  9.20s/it]

[(0.996256, 5), (0.994724, 4), (0.99377245, 2), (0.99347925, 3), (0.99321663, 6), (0.99314266, 1), (0.9913159, 7), (0.9898502, 0)]


 95%|█████████▌| 40/42 [07:02<00:17,  8.52s/it]

[(0.9960773, 5), (0.99567574, 3), (0.99559814, 2), (0.995588, 1), (0.9952556, 7), (0.99512124, 4), (0.99483, 0), (0.9940717, 8), (0.9913796, 6)]


100%|██████████| 42/42 [07:16<00:00, 10.38s/it]

[(0.99667317, 4), (0.99476063, 2), (0.9936729, 3), (0.9931971, 7), (0.99262595, 6), (0.9909542, 1), (0.9909542, 5), (0.9876985, 0)]





0.4523809523809524

In [None]:
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(embedding_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.to(self.device)

    def forward(self, x):
        out = self.embedding(x)
        out, _ = self.lstm(out)
        out = self.fc(out[:, -1, :])
        return out
    
    def __call__(self, x):
        return self.forward(x)

class GRU(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(embedding_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.to(self.device)

    def forward(self, x):
        out = self.embedding(x)
        out, _ = self.gru(out)
        out = self.fc(out[:, -1, :])
        return out
    
    def __call__(self, x):
        return self.forward(x)
    
class RNN(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(embedding_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.to(self.device)

    def forward(self, x):
        out = self.embedding(x)
        out, _ = self.rnn(out)
        out = self.fc(out[:, -1, :])
        return out
    
    def __call__(self, x):
        return self.forward(x)


In [None]:
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

tokenizer = get_tokenizer('basic_english')


def yield_tokens(data_iter):
    for _, text in data_iter:
        yield tokenizer(text)
    
vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<unk>"])