In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import random
from transformers import BertTokenizer, BertModel
import json
import numpy as np
from tqdm import tqdm
import pickle
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
torch.manual_seed(1)

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x2add41d3810>

In [2]:
device = "cpu"

In [3]:
# #check if cuda is available
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(device)

In [4]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)

#### Data Loading

In [5]:
train_data = json.load(open('../Dataset/BIO_Tagged/ATE_train.json', 'r'))
test_data = json.load(open('../Dataset/BIO_Tagged/ATE_test.json', 'r'))
val_data = json.load(open('../Dataset/BIO_Tagged/ATE_val.json', 'r'))

In [6]:
word_to_idx = pickle.load(open('../Utils/word_to_idx.pkl', 'rb'))

In [7]:
tag_to_ix = pickle.load(open('../Utils/tag_to_ix.pkl', 'rb'))

#### RNN model

In [8]:
class RNN_model(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, target_size, embedding_mat, start_tag, end_tag, tag_to_ix, device='cpu'):
        super(RNN_model, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_mat)).to(device)
        self.rnn = nn.RNN(embedding_dim, hidden_dim).to(device)
        self.hidden2tag = nn.Linear(hidden_dim, target_size).to(device)
        self.start_tag = start_tag
        self.end_tag = end_tag
        self.tag_to_ix = tag_to_ix
        self.target_size = target_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        rnn_out, _ = self.rnn(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(rnn_out.view(len(sentence), -1))
        tag_scores = nn.functional.log_softmax(tag_space, dim=1)
        return tag_scores


#### LSTM Model

In [9]:
class LSTM_model(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, target_size, embedding_mat, start_tag, end_tag, tag_to_ix, device='cpu'):
        super(LSTM_model, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_mat)).to(device)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim).to(device)
        self.hidden2tag = nn.Linear(hidden_dim, target_size).to(device)
        self.start_tag = start_tag
        self.end_tag = end_tag
        self.tag_to_ix = tag_to_ix
        self.target_size = target_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = nn.functional.log_softmax(tag_space, dim=1)
        return tag_scores

#### GRU model

In [10]:
class GRU_model(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, target_size, embedding_mat, start_tag, end_tag, tag_to_ix, device='cpu'):
        super(GRU_model, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_mat)).to(device)
        self.gru = nn.GRU(embedding_dim, hidden_dim).to(device)
        self.hidden2tag = nn.Linear(hidden_dim, target_size).to(device)
        self.start_tag = start_tag
        self.end_tag = end_tag
        self.tag_to_ix = tag_to_ix
        self.target_size = target_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        gru_out, _ = self.gru(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(gru_out.view(len(sentence), -1))
        tag_scores = nn.functional.log_softmax(tag_space, dim=1)
        return tag_scores

#### Embedding mats

In [11]:
bert_embedding_mat = pickle.load(open('../Extracted Word Embeddings/bert_embedding_mat.pkl', 'rb'))
word2vec_embedding_mat = pickle.load(open('../Extracted Word Embeddings/word2vec_embedding_mat.pkl', 'rb'))
glove_embedding_mat = pickle.load(open('../Extracted Word Embeddings/glove_embedding_mat.pkl', 'rb'))

#### Glove + RNN 

In [12]:
rnn_model = RNN_model(len(word_to_idx), 300, 256, len(tag_to_ix), glove_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(rnn_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []


for epoch in range(epochs):
    print("Epoch: ", epoch)
    preds = []
    actuals = []
    train_loss_temp = 0
    val_loss_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
        actuals.extend(targets.detach().numpy().tolist())
    train_f1_temp = f1_score(actuals, preds, average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp)
    
    with torch.no_grad():
        rnn_model.eval()
        preds = []
        actuals = []
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
            actuals.extend(targets.detach().numpy().tolist())
        val_f1_temp = f1_score(actuals, preds, average='macro')
        val_loss.append(val_loss_temp/len(val_data))
        val_f1.append(val_f1_temp)
    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 906/906 [00:04<00:00, 198.24it/s]
100%|██████████| 219/219 [00:00<00:00, 954.03it/s] 


Train Loss: 0.5906751319996305, Val Loss: 0.48600818461751283, Train F1: 0.24792421500113568, Val F1: 0.4936049709652359
Epoch:  1


100%|██████████| 906/906 [00:04<00:00, 190.52it/s]
100%|██████████| 219/219 [00:00<00:00, 1280.75it/s]


Train Loss: 0.4745467263243056, Val Loss: 0.4407622191255495, Train F1: 0.5159481994997834, Val F1: 0.5871878967190712
Epoch:  2


100%|██████████| 906/906 [00:05<00:00, 180.74it/s]
100%|██████████| 219/219 [00:00<00:00, 1074.65it/s]


Train Loss: 0.436975707552883, Val Loss: 0.42309416020841906, Train F1: 0.5771188530310494, Val F1: 0.6123415450966992
Epoch:  3


100%|██████████| 906/906 [00:04<00:00, 222.60it/s]
100%|██████████| 219/219 [00:00<00:00, 1108.04it/s]


Train Loss: 0.4137342054947849, Val Loss: 0.4148335832439057, Train F1: 0.6144161160651196, Val F1: 0.624637177538558
Epoch:  4


100%|██████████| 906/906 [00:04<00:00, 221.03it/s]
100%|██████████| 219/219 [00:00<00:00, 787.95it/s]


Train Loss: 0.3965740699452609, Val Loss: 0.41056347464861936, Train F1: 0.6410232287690835, Val F1: 0.6385879376026248
Epoch:  5


100%|██████████| 906/906 [00:04<00:00, 220.48it/s]
100%|██████████| 219/219 [00:00<00:00, 742.66it/s]


Train Loss: 0.3829378330870377, Val Loss: 0.40833529379201805, Train F1: 0.6590469017991157, Val F1: 0.6427277125883862
Epoch:  6


100%|██████████| 906/906 [00:04<00:00, 197.85it/s]
100%|██████████| 219/219 [00:00<00:00, 438.80it/s]


Train Loss: 0.3715327756960463, Val Loss: 0.4072454960412903, Train F1: 0.6722689250356608, Val F1: 0.6476423403885582
Epoch:  7


100%|██████████| 906/906 [00:05<00:00, 168.18it/s]
100%|██████████| 219/219 [00:00<00:00, 913.68it/s]


Train Loss: 0.36156520150420945, Val Loss: 0.40679565660621475, Train F1: 0.6840069156304057, Val F1: 0.6536894304246933
Epoch:  8


100%|██████████| 906/906 [00:04<00:00, 202.18it/s]
100%|██████████| 219/219 [00:00<00:00, 864.15it/s]


Train Loss: 0.3525366788693019, Val Loss: 0.406747530771581, Train F1: 0.6943506197311087, Val F1: 0.6561248612243575
Epoch:  9


100%|██████████| 906/906 [00:04<00:00, 186.30it/s]
100%|██████████| 219/219 [00:00<00:00, 683.80it/s]

Train Loss: 0.3441290736494475, Val Loss: 0.40703572426416557, Train F1: 0.7053258764128988, Val F1: 0.6564892657316198





In [13]:
torch.save(rnn_model, 'Non Trainable Embeddings/Glove+RNN/model.pt')
torch.save(rnn_model, '../../Deliverables/Task 2/Saved Models/t2_RNN_Glove.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Glove+RNN/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Glove+RNN/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Glove+RNN/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Glove+RNN/val_f1.pkl', 'wb'))

#### Word2vec + RNN

In [14]:
rnn_model = RNN_model(len(word_to_idx), 300, 256, len(tag_to_ix), word2vec_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(rnn_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    preds = []
    actuals = []
    train_loss_temp = 0
    val_loss_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
        actuals.extend(targets.detach().numpy().tolist())
    train_f1_temp = f1_score(actuals, preds, average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp)
    
    with torch.no_grad():
        rnn_model.eval()
        preds = []
        actuals = []
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
            actuals.extend(targets.detach().numpy().tolist())
        val_f1_temp = f1_score(actuals, preds, average='macro')
        val_loss.append(val_loss_temp/len(val_data))
        val_f1.append(val_f1_temp)
    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  1%|▏         | 12/906 [00:00<00:07, 119.09it/s]

100%|██████████| 906/906 [00:08<00:00, 104.03it/s]
100%|██████████| 219/219 [00:00<00:00, 682.58it/s]


Train Loss: 0.6518684866412586, Val Loss: 0.5325831061901023, Train F1: 0.19861567460388913, Val F1: 0.41638778399959125
Epoch:  1


100%|██████████| 906/906 [00:06<00:00, 131.48it/s]
100%|██████████| 219/219 [00:00<00:00, 652.28it/s]


Train Loss: 0.49532461168026554, Val Loss: 0.43930739069093855, Train F1: 0.452669848606394, Val F1: 0.5175740333635072
Epoch:  2


100%|██████████| 906/906 [00:06<00:00, 148.56it/s]
100%|██████████| 219/219 [00:00<00:00, 542.58it/s]


Train Loss: 0.4309496403266789, Val Loss: 0.40018840290640045, Train F1: 0.5444306655742012, Val F1: 0.6159484980146271
Epoch:  3


100%|██████████| 906/906 [00:07<00:00, 117.16it/s]
100%|██████████| 219/219 [00:00<00:00, 353.10it/s]


Train Loss: 0.3988724973866876, Val Loss: 0.3824650713808188, Train F1: 0.608791918149919, Val F1: 0.6503085926021616
Epoch:  4


100%|██████████| 906/906 [00:09<00:00, 97.58it/s] 
100%|██████████| 219/219 [00:00<00:00, 224.00it/s]


Train Loss: 0.3787854417385513, Val Loss: 0.3729464514989015, Train F1: 0.6400835434181352, Val F1: 0.6667882869727687
Epoch:  5


100%|██████████| 906/906 [00:08<00:00, 112.12it/s]
100%|██████████| 219/219 [00:00<00:00, 479.85it/s]


Train Loss: 0.3638512512738868, Val Loss: 0.36728311791063445, Train F1: 0.6583324549794562, Val F1: 0.675230393169353
Epoch:  6


100%|██████████| 906/906 [00:05<00:00, 158.90it/s]
100%|██████████| 219/219 [00:00<00:00, 974.10it/s] 


Train Loss: 0.35170825776822917, Val Loss: 0.36390301910097195, Train F1: 0.6744333863491455, Val F1: 0.6811537881245284
Epoch:  7


100%|██████████| 906/906 [00:04<00:00, 214.81it/s]
100%|██████████| 219/219 [00:00<00:00, 1111.80it/s]


Train Loss: 0.34138174529781534, Val Loss: 0.36205828336139795, Train F1: 0.6879242306632719, Val F1: 0.6802942154697792
Epoch:  8


100%|██████████| 906/906 [00:05<00:00, 152.63it/s]
100%|██████████| 219/219 [00:00<00:00, 1027.20it/s]


Train Loss: 0.3323538691035696, Val Loss: 0.3613403914283672, Train F1: 0.7002416810095701, Val F1: 0.6863871510307821
Epoch:  9


100%|██████████| 906/906 [00:05<00:00, 154.06it/s]
100%|██████████| 219/219 [00:00<00:00, 1000.80it/s]

Train Loss: 0.3242399464700593, Val Loss: 0.3615543705381487, Train F1: 0.7078857935966575, Val F1: 0.6902368501743693





In [15]:
torch.save(rnn_model, 'Non Trainable Embeddings/Word2vec+RNN/model.pt')
torch.save(rnn_model, '../../Deliverables/Task 2/Saved Models/t2_RNN_Word2Vec.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Word2vec+RNN/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Word2vec+RNN/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Word2vec+RNN/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Word2vec+RNN/val_f1.pkl', 'wb'))

#### Bert + RNN

In [16]:
rnn_model = RNN_model(len(word_to_idx), 768, 512, len(tag_to_ix), bert_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(rnn_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    preds = []
    actuals = []
    train_loss_temp = 0
    val_loss_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
        actuals.extend(targets.detach().numpy().tolist())
    train_f1_temp = f1_score(actuals, preds, average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp)
    
    with torch.no_grad():
        rnn_model.eval()
        preds = []
        actuals = []
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
            actuals.extend(targets.detach().numpy().tolist())
        val_f1_temp = f1_score(actuals, preds, average='macro')
        val_loss.append(val_loss_temp/len(val_data))
        val_f1.append(val_f1_temp)
    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  0%|          | 0/906 [00:00<?, ?it/s]

100%|██████████| 906/906 [00:08<00:00, 111.15it/s]
100%|██████████| 219/219 [00:00<00:00, 487.34it/s]


Train Loss: 0.5115846748531654, Val Loss: 0.4185662794875228, Train F1: 0.5117648202263461, Val F1: 0.6008823407211678
Epoch:  1


100%|██████████| 906/906 [00:09<00:00, 96.82it/s] 
100%|██████████| 219/219 [00:00<00:00, 515.95it/s]


Train Loss: 0.41822173813189367, Val Loss: 0.3837210721076896, Train F1: 0.6175650847875512, Val F1: 0.6424363513573539
Epoch:  2


100%|██████████| 906/906 [00:08<00:00, 109.69it/s]
100%|██████████| 219/219 [00:00<00:00, 744.09it/s]


Train Loss: 0.3862682700519004, Val Loss: 0.36664632456079466, Train F1: 0.6535374801888602, Val F1: 0.680609009576704
Epoch:  3


100%|██████████| 906/906 [00:06<00:00, 131.70it/s]
100%|██████████| 219/219 [00:00<00:00, 587.04it/s]


Train Loss: 0.36434132257110496, Val Loss: 0.3568663612756555, Train F1: 0.6751026246309463, Val F1: 0.6834288317269515
Epoch:  4


100%|██████████| 906/906 [00:07<00:00, 128.89it/s]
100%|██████████| 219/219 [00:00<00:00, 655.30it/s]


Train Loss: 0.3467783506663627, Val Loss: 0.3509362359195267, Train F1: 0.6908299900868101, Val F1: 0.6872757093584769
Epoch:  5


100%|██████████| 906/906 [00:08<00:00, 112.69it/s]
100%|██████████| 219/219 [00:00<00:00, 632.33it/s]


Train Loss: 0.33156428097543733, Val Loss: 0.34741868276029964, Train F1: 0.7015191680757655, Val F1: 0.6896188751891584
Epoch:  6


100%|██████████| 906/906 [00:07<00:00, 125.56it/s]
100%|██████████| 219/219 [00:00<00:00, 702.12it/s]


Train Loss: 0.3177414035083311, Val Loss: 0.3456251445358202, Train F1: 0.7146268246536095, Val F1: 0.6967714249165936
Epoch:  7


100%|██████████| 906/906 [00:08<00:00, 106.49it/s]
100%|██████████| 219/219 [00:00<00:00, 305.99it/s]


Train Loss: 0.3047435293775915, Val Loss: 0.3452359177172184, Train F1: 0.7284251571421084, Val F1: 0.6933911384251172
Epoch:  8


100%|██████████| 906/906 [00:07<00:00, 120.47it/s]
100%|██████████| 219/219 [00:00<00:00, 691.34it/s]


Train Loss: 0.292218828347674, Val Loss: 0.3461604512479479, Train F1: 0.7390190525020355, Val F1: 0.6932770945008352
Epoch:  9


100%|██████████| 906/906 [00:07<00:00, 113.68it/s]
100%|██████████| 219/219 [00:00<00:00, 459.05it/s]


Train Loss: 0.27994560464501184, Val Loss: 0.34846479016181814, Train F1: 0.7498621914033691, Val F1: 0.690814526254795


In [17]:
torch.save(rnn_model, 'Non Trainable Embeddings/Bert+RNN/model.pt')
torch.save(rnn_model, '../../Deliverables/Task 2/Saved Models/t2_RNN_Bert.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Bert+RNN/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Bert+RNN/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Bert+RNN/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Bert+RNN/val_f1.pkl', 'wb'))

#### Glove + GRU

In [18]:
gru_model = GRU_model(len(word_to_idx), 300, 256, len(tag_to_ix), glove_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(gru_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    preds = []
    actuals = []
    train_loss_temp = 0
    val_loss_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
        actuals.extend(targets.detach().numpy().tolist())
    train_f1_temp = f1_score(actuals, preds, average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp)
    
    with torch.no_grad():
        rnn_model.eval()
        preds = []
        actuals = []
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
            actuals.extend(targets.detach().numpy().tolist())
        val_f1_temp = f1_score(actuals, preds, average='macro')
        val_loss.append(val_loss_temp/len(val_data))
        val_f1.append(val_f1_temp)
    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  0%|          | 0/906 [00:00<?, ?it/s]

100%|██████████| 906/906 [00:10<00:00, 82.67it/s] 
100%|██████████| 219/219 [00:00<00:00, 490.23it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  1


100%|██████████| 906/906 [00:07<00:00, 120.72it/s]
100%|██████████| 219/219 [00:00<00:00, 558.69it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  2


100%|██████████| 906/906 [00:10<00:00, 90.30it/s] 
100%|██████████| 219/219 [00:00<00:00, 326.60it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  3


100%|██████████| 906/906 [00:09<00:00, 91.25it/s] 
100%|██████████| 219/219 [00:00<00:00, 510.48it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  4


100%|██████████| 906/906 [00:09<00:00, 100.05it/s]
100%|██████████| 219/219 [00:00<00:00, 616.48it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  5


100%|██████████| 906/906 [00:07<00:00, 126.35it/s]
100%|██████████| 219/219 [00:00<00:00, 634.30it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  6


100%|██████████| 906/906 [00:06<00:00, 135.10it/s]
100%|██████████| 219/219 [00:00<00:00, 626.18it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  7


100%|██████████| 906/906 [00:09<00:00, 98.95it/s] 
100%|██████████| 219/219 [00:00<00:00, 464.51it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  8


100%|██████████| 906/906 [00:09<00:00, 93.32it/s] 
100%|██████████| 219/219 [00:00<00:00, 309.11it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  9


100%|██████████| 906/906 [00:12<00:00, 69.96it/s]
100%|██████████| 219/219 [00:00<00:00, 259.51it/s]

Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795





In [19]:
torch.save(gru_model, 'Non Trainable Embeddings/Glove+GRU/model.pt')
torch.save(gru_model, '../../Deliverables/Task 2/Saved Models/t2_GRU_Glove.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Glove+GRU/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Glove+GRU/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Glove+GRU/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Glove+GRU/val_f1.pkl', 'wb'))

#### Word2vec + GRU

In [20]:
gru_model = GRU_model(len(word_to_idx), 300, 256, len(tag_to_ix), word2vec_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(gru_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    preds = []
    actuals = []
    train_loss_temp = 0
    val_loss_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
        actuals.extend(targets.detach().numpy().tolist())
    train_f1_temp = f1_score(actuals, preds, average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp)
    
    with torch.no_grad():
        rnn_model.eval()
        preds = []
        actuals = []
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
            actuals.extend(targets.detach().numpy().tolist())
        val_f1_temp = f1_score(actuals, preds, average='macro')
        val_loss.append(val_loss_temp/len(val_data))
        val_f1.append(val_f1_temp)
    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  1%|          | 6/906 [00:00<00:17, 52.46it/s]

100%|██████████| 906/906 [00:08<00:00, 109.15it/s]
100%|██████████| 219/219 [00:00<00:00, 791.19it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  1


100%|██████████| 906/906 [00:06<00:00, 132.37it/s]
100%|██████████| 219/219 [00:00<00:00, 395.91it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  2


100%|██████████| 906/906 [00:07<00:00, 124.07it/s]
100%|██████████| 219/219 [00:00<00:00, 659.90it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  3


100%|██████████| 906/906 [00:06<00:00, 146.05it/s]
100%|██████████| 219/219 [00:00<00:00, 663.08it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  4


100%|██████████| 906/906 [00:06<00:00, 131.21it/s]
100%|██████████| 219/219 [00:00<00:00, 626.01it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  5


100%|██████████| 906/906 [00:09<00:00, 97.62it/s] 
100%|██████████| 219/219 [00:00<00:00, 604.31it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  6


100%|██████████| 906/906 [00:09<00:00, 95.10it/s] 
100%|██████████| 219/219 [00:00<00:00, 466.13it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  7


100%|██████████| 906/906 [00:09<00:00, 96.33it/s] 
100%|██████████| 219/219 [00:00<00:00, 539.02it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  8


100%|██████████| 906/906 [00:07<00:00, 122.57it/s]
100%|██████████| 219/219 [00:00<00:00, 610.90it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  9


100%|██████████| 906/906 [00:09<00:00, 94.83it/s] 
100%|██████████| 219/219 [00:00<00:00, 456.71it/s]

Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795





In [21]:
torch.save(gru_model, 'Non Trainable Embeddings/Word2vec+GRU/model.pt')
torch.save(gru_model, '../../Deliverables/Task 2/Saved Models/t2_GRU_Word2Vec.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Word2vec+GRU/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Word2vec+GRU/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Word2vec+GRU/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Word2vec+GRU/val_f1.pkl', 'wb'))

#### Bert + GRU

In [22]:
gru_model = GRU_model(len(word_to_idx), 768, 512, len(tag_to_ix), bert_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(gru_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    preds = []
    actuals = []
    train_loss_temp = 0
    val_loss_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
        actuals.extend(targets.detach().numpy().tolist())
    train_f1_temp = f1_score(actuals, preds, average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp)
    
    with torch.no_grad():
        rnn_model.eval()
        preds = []
        actuals = []
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
            actuals.extend(targets.detach().numpy().tolist())
        val_f1_temp = f1_score(actuals, preds, average='macro')
        val_loss.append(val_loss_temp/len(val_data))
        val_f1.append(val_f1_temp)
    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 906/906 [00:10<00:00, 85.05it/s] 
100%|██████████| 219/219 [00:00<00:00, 445.61it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  1


100%|██████████| 906/906 [00:07<00:00, 116.86it/s]
100%|██████████| 219/219 [00:00<00:00, 561.95it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  2


100%|██████████| 906/906 [00:07<00:00, 115.45it/s]
100%|██████████| 219/219 [00:00<00:00, 582.05it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  3


100%|██████████| 906/906 [00:09<00:00, 92.66it/s] 
100%|██████████| 219/219 [00:00<00:00, 445.40it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  4


100%|██████████| 906/906 [00:08<00:00, 106.84it/s]
100%|██████████| 219/219 [00:00<00:00, 469.27it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  5


100%|██████████| 906/906 [00:07<00:00, 116.05it/s]
100%|██████████| 219/219 [00:00<00:00, 493.01it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  6


100%|██████████| 906/906 [00:09<00:00, 94.71it/s] 
100%|██████████| 219/219 [00:00<00:00, 568.35it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  7


100%|██████████| 906/906 [00:07<00:00, 128.71it/s]
100%|██████████| 219/219 [00:00<00:00, 595.86it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  8


100%|██████████| 906/906 [00:07<00:00, 122.22it/s]
100%|██████████| 219/219 [00:00<00:00, 594.98it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  9


100%|██████████| 906/906 [00:08<00:00, 110.21it/s]
100%|██████████| 219/219 [00:00<00:00, 296.18it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795


In [23]:
torch.save(gru_model, 'Non Trainable Embeddings/Bert+GRU/model.pt')
torch.save(gru_model, '../../Deliverables/Task 2/Saved Models/t2_GRU_Bert.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Bert+GRU/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Bert+GRU/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Bert+GRU/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Bert+GRU/val_f1.pkl', 'wb'))

#### Glove + LSTM

In [24]:
lstm_model = LSTM_model(len(word_to_idx), 300, 256, len(tag_to_ix), glove_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(lstm_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    preds = []
    actuals = []
    train_loss_temp = 0
    val_loss_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
        actuals.extend(targets.detach().numpy().tolist())
    train_f1_temp = f1_score(actuals, preds, average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp)
    
    with torch.no_grad():
        rnn_model.eval()
        preds = []
        actuals = []
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
            actuals.extend(targets.detach().numpy().tolist())
        val_f1_temp = f1_score(actuals, preds, average='macro')
        val_loss.append(val_loss_temp/len(val_data))
        val_f1.append(val_f1_temp)
    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  2%|▏         | 14/906 [00:00<00:15, 58.81it/s]

100%|██████████| 906/906 [00:09<00:00, 91.77it/s] 
100%|██████████| 219/219 [00:00<00:00, 621.45it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  1


100%|██████████| 906/906 [00:09<00:00, 98.60it/s] 
100%|██████████| 219/219 [00:00<00:00, 547.95it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  2


100%|██████████| 906/906 [00:10<00:00, 84.30it/s] 
100%|██████████| 219/219 [00:00<00:00, 375.44it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  3


100%|██████████| 906/906 [00:10<00:00, 89.64it/s] 
100%|██████████| 219/219 [00:00<00:00, 530.05it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  4


100%|██████████| 906/906 [00:09<00:00, 98.75it/s] 
100%|██████████| 219/219 [00:00<00:00, 340.71it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  5


100%|██████████| 906/906 [00:10<00:00, 85.99it/s] 
100%|██████████| 219/219 [00:00<00:00, 630.51it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  6


100%|██████████| 906/906 [00:07<00:00, 119.99it/s]
100%|██████████| 219/219 [00:00<00:00, 419.86it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  7


100%|██████████| 906/906 [00:09<00:00, 100.03it/s]
100%|██████████| 219/219 [00:00<00:00, 668.22it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  8


100%|██████████| 906/906 [00:07<00:00, 125.21it/s]
100%|██████████| 219/219 [00:00<00:00, 548.56it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  9


100%|██████████| 906/906 [00:09<00:00, 96.87it/s] 
100%|██████████| 219/219 [00:00<00:00, 659.07it/s]

Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795





In [25]:
torch.save(lstm_model, 'Non Trainable Embeddings/Glove+LSTM/model.pt')
torch.save(lstm_model, '../../Deliverables/Task 2/Saved Models/t2_LSTM_Glove.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Glove+LSTM/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Glove+LSTM/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Glove+LSTM/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Glove+LSTM/val_f1.pkl', 'wb'))

#### Word2vec + LSTM

In [26]:
lstm_model = LSTM_model(len(word_to_idx), 300, 256, len(tag_to_ix), word2vec_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(lstm_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    preds = []
    actuals = []
    train_loss_temp = 0
    val_loss_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
        actuals.extend(targets.detach().numpy().tolist())
    train_f1_temp = f1_score(actuals, preds, average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp)
    
    with torch.no_grad():
        rnn_model.eval()
        preds = []
        actuals = []
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
            actuals.extend(targets.detach().numpy().tolist())
        val_f1_temp = f1_score(actuals, preds, average='macro')
        val_loss.append(val_loss_temp/len(val_data))
        val_f1.append(val_f1_temp)
    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 906/906 [00:09<00:00, 95.43it/s] 
100%|██████████| 219/219 [00:00<00:00, 603.92it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  1


100%|██████████| 906/906 [00:07<00:00, 118.98it/s]
100%|██████████| 219/219 [00:00<00:00, 675.37it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  2


100%|██████████| 906/906 [00:12<00:00, 72.06it/s] 
100%|██████████| 219/219 [00:00<00:00, 484.55it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  3


100%|██████████| 906/906 [00:09<00:00, 99.48it/s] 
100%|██████████| 219/219 [00:00<00:00, 503.50it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  4


100%|██████████| 906/906 [00:08<00:00, 104.17it/s]
100%|██████████| 219/219 [00:00<00:00, 574.76it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  5


100%|██████████| 906/906 [00:08<00:00, 105.12it/s]
100%|██████████| 219/219 [00:00<00:00, 600.71it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  6


100%|██████████| 906/906 [00:09<00:00, 95.61it/s] 
100%|██████████| 219/219 [00:00<00:00, 304.23it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  7


100%|██████████| 906/906 [00:08<00:00, 104.86it/s]
100%|██████████| 219/219 [00:00<00:00, 538.60it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  8


100%|██████████| 906/906 [00:09<00:00, 100.23it/s]
100%|██████████| 219/219 [00:00<00:00, 606.86it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  9


100%|██████████| 906/906 [00:10<00:00, 87.12it/s] 
100%|██████████| 219/219 [00:00<00:00, 300.78it/s]

Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795





In [27]:
torch.save(lstm_model, 'Non Trainable Embeddings/Word2vec+LSTM/model.pt')
torch.save(lstm_model, '../../Deliverables/Task 2/Saved Models/t2_LSTM_Word2Vec.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Word2vec+LSTM/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Word2vec+LSTM/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Word2vec+LSTM/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Word2vec+LSTM/val_f1.pkl', 'wb'))

#### Bert + LSTM

In [28]:
lstm_model = LSTM_model(len(word_to_idx), 768, 512, len(tag_to_ix), bert_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(lstm_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    preds = []
    actuals = []
    train_loss_temp = 0
    val_loss_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
        actuals.extend(targets.detach().numpy().tolist())
    train_f1_temp = f1_score(actuals, preds, average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp)
    
    with torch.no_grad():
        rnn_model.eval()
        preds = []
        actuals = []
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            preds.extend(torch.argmax(tag_scores, dim=1).detach().numpy().tolist())
            actuals.extend(targets.detach().numpy().tolist())
        val_f1_temp = f1_score(actuals, preds, average='macro')
        val_loss.append(val_loss_temp/len(val_data))
        val_f1.append(val_f1_temp)
    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  0%|          | 0/906 [00:00<?, ?it/s]

100%|██████████| 906/906 [00:08<00:00, 103.63it/s]
100%|██████████| 219/219 [00:00<00:00, 626.40it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  1


100%|██████████| 906/906 [00:09<00:00, 99.30it/s] 
100%|██████████| 219/219 [00:00<00:00, 549.40it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  2


100%|██████████| 906/906 [00:08<00:00, 111.60it/s]
100%|██████████| 219/219 [00:00<00:00, 614.06it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  3


100%|██████████| 906/906 [00:10<00:00, 89.74it/s] 
100%|██████████| 219/219 [00:00<00:00, 390.57it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  4


100%|██████████| 906/906 [00:10<00:00, 83.08it/s] 
100%|██████████| 219/219 [00:00<00:00, 403.07it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  5


100%|██████████| 906/906 [00:10<00:00, 86.90it/s] 
100%|██████████| 219/219 [00:00<00:00, 395.25it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  6


100%|██████████| 906/906 [00:12<00:00, 74.97it/s] 
100%|██████████| 219/219 [00:00<00:00, 309.76it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  7


100%|██████████| 906/906 [00:12<00:00, 71.19it/s]
100%|██████████| 219/219 [00:00<00:00, 393.75it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  8


100%|██████████| 906/906 [00:10<00:00, 84.13it/s] 
100%|██████████| 219/219 [00:00<00:00, 420.00it/s]


Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795
Epoch:  9


100%|██████████| 906/906 [00:14<00:00, 63.88it/s]
100%|██████████| 219/219 [00:00<00:00, 309.05it/s]

Train Loss: 0.2489375187614501, Val Loss: 0.34846479016181814, Train F1: 0.7814219835995798, Val F1: 0.690814526254795





In [29]:
torch.save(lstm_model, 'Non Trainable Embeddings/Bert+LSTM/model.pt')
torch.save(lstm_model, '../../Deliverables/Task 2/Saved Models/t2_LSTM_Bert.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Bert+LSTM/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Bert+LSTM/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Bert+LSTM/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Bert+LSTM/val_f1.pkl', 'wb'))