In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import random
from transformers import BertTokenizer, BertModel
import json
import numpy as np
from tqdm import tqdm
import pickle
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
torch.manual_seed(1)

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x1b5e8c53810>

In [2]:
device = "cpu"

In [3]:
# #check if cuda is available
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(device)

In [4]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)

#### Data Loading

In [5]:
train_data = json.load(open('../Dataset/BIO_Tagged/ATE_train.json', 'r'))
test_data = json.load(open('../Dataset/BIO_Tagged/ATE_test.json', 'r'))
val_data = json.load(open('../Dataset/BIO_Tagged/ATE_val.json', 'r'))

In [6]:
word_to_idx = pickle.load(open('../Utils/word_to_idx.pkl', 'rb'))

In [7]:
tag_to_ix = pickle.load(open('../Utils/tag_to_ix.pkl', 'rb'))

#### RNN model

In [8]:
class RNN_model(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, target_size, embedding_mat, start_tag, end_tag, tag_to_ix, device='cpu'):
        super(RNN_model, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_mat)).to(device)
        self.rnn = nn.RNN(embedding_dim, hidden_dim).to(device)
        self.hidden2tag = nn.Linear(hidden_dim, target_size).to(device)
        self.start_tag = start_tag
        self.end_tag = end_tag
        self.tag_to_ix = tag_to_ix
        self.target_size = target_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        rnn_out, _ = self.rnn(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(rnn_out.view(len(sentence), -1))
        tag_scores = nn.functional.log_softmax(tag_space, dim=1)
        return tag_scores


#### LSTM Model

In [9]:
class LSTM_model(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, target_size, embedding_mat, start_tag, end_tag, tag_to_ix, device='cpu'):
        super(LSTM_model, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_mat)).to(device)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim).to(device)
        self.hidden2tag = nn.Linear(hidden_dim, target_size).to(device)
        self.start_tag = start_tag
        self.end_tag = end_tag
        self.tag_to_ix = tag_to_ix
        self.target_size = target_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = nn.functional.log_softmax(tag_space, dim=1)
        return tag_scores

#### GRU model

In [10]:
class GRU_model(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, target_size, embedding_mat, start_tag, end_tag, tag_to_ix, device='cpu'):
        super(GRU_model, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_mat)).to(device)
        self.gru = nn.GRU(embedding_dim, hidden_dim).to(device)
        self.hidden2tag = nn.Linear(hidden_dim, target_size).to(device)
        self.start_tag = start_tag
        self.end_tag = end_tag
        self.tag_to_ix = tag_to_ix
        self.target_size = target_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        gru_out, _ = self.gru(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(gru_out.view(len(sentence), -1))
        tag_scores = nn.functional.log_softmax(tag_space, dim=1)
        return tag_scores

#### Embedding mats

In [11]:
bert_embedding_mat = pickle.load(open('../Extracted Word Embeddings/legal_bert_embedding_mat.pkl', 'rb'))
word2vec_embedding_mat = pickle.load(open('../Extracted Word Embeddings/word2vec_embedding_mat.pkl', 'rb'))
glove_embedding_mat = pickle.load(open('../Extracted Word Embeddings/glove_embedding_mat.pkl', 'rb'))

#### Glove + RNN 

In [12]:
rnn_model = RNN_model(len(word_to_idx), 300, 256, len(tag_to_ix), glove_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(rnn_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        rnn_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 906/906 [00:04<00:00, 188.60it/s]
100%|██████████| 219/219 [00:00<00:00, 641.54it/s]


Train Loss: 0.5968134191882268, Val Loss: 0.4906317757008827, Train F1: 0.459286623513282, Val F1: 0.5663856508567454
Epoch:  1


100%|██████████| 906/906 [00:05<00:00, 176.17it/s]
100%|██████████| 219/219 [00:00<00:00, 527.77it/s]


Train Loss: 0.47993985639173725, Val Loss: 0.44160774785634044, Train F1: 0.5662979265034384, Val F1: 0.6233874019219469
Epoch:  2


100%|██████████| 906/906 [00:05<00:00, 174.16it/s]
100%|██████████| 219/219 [00:00<00:00, 538.81it/s]


Train Loss: 0.4413757815743256, Val Loss: 0.4215341919020975, Train F1: 0.6023781967599309, Val F1: 0.6364297677431544
Epoch:  3


100%|██████████| 906/906 [00:05<00:00, 177.19it/s]
100%|██████████| 219/219 [00:00<00:00, 547.33it/s]


Train Loss: 0.4173726557895837, Val Loss: 0.41181905111765754, Train F1: 0.6290902141263539, Val F1: 0.6435473665837765
Epoch:  4


100%|██████████| 906/906 [00:04<00:00, 189.86it/s]
100%|██████████| 219/219 [00:00<00:00, 615.41it/s]


Train Loss: 0.3997564797698794, Val Loss: 0.40676956460492253, Train F1: 0.6489653533198935, Val F1: 0.6418748309728011
Epoch:  5


100%|██████████| 906/906 [00:05<00:00, 166.47it/s]
100%|██████████| 219/219 [00:00<00:00, 496.81it/s]


Train Loss: 0.38580086319882084, Val Loss: 0.40432043985961236, Train F1: 0.6615917726602287, Val F1: 0.6477432574435013
Epoch:  6


100%|██████████| 906/906 [00:05<00:00, 173.36it/s]
100%|██████████| 219/219 [00:00<00:00, 399.84it/s]


Train Loss: 0.37409793594996005, Val Loss: 0.40345122039182, Train F1: 0.67467678933486, Val F1: 0.656578393443616
Epoch:  7


100%|██████████| 906/906 [00:05<00:00, 164.57it/s]
100%|██████████| 219/219 [00:00<00:00, 603.16it/s]


Train Loss: 0.363856319527226, Val Loss: 0.4034735304920096, Train F1: 0.6849642588034722, Val F1: 0.6588760977371073
Epoch:  8


100%|██████████| 906/906 [00:04<00:00, 216.33it/s]
100%|██████████| 219/219 [00:00<00:00, 614.58it/s]


Train Loss: 0.3546119403447668, Val Loss: 0.40396422222596867, Train F1: 0.6928735673691673, Val F1: 0.6623555267227851
Epoch:  9


100%|██████████| 906/906 [00:05<00:00, 177.93it/s]
100%|██████████| 219/219 [00:00<00:00, 391.28it/s]

Train Loss: 0.34607392498528455, Val Loss: 0.40475360673244143, Train F1: 0.6998949622485837, Val F1: 0.6630402886840052





In [13]:
torch.save(rnn_model, 'Non Trainable Embeddings/Glove+RNN/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Glove+RNN/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Glove+RNN/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Glove+RNN/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Glove+RNN/val_f1.pkl', 'wb'))

#### Word2vec + RNN

In [15]:
rnn_model = RNN_model(len(word_to_idx), 300, 256, len(tag_to_ix), word2vec_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(rnn_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        rnn_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  0%|          | 0/906 [00:00<?, ?it/s]

100%|██████████| 906/906 [00:04<00:00, 203.56it/s]
100%|██████████| 219/219 [00:00<00:00, 466.24it/s]


Train Loss: 0.6535127756019302, Val Loss: 0.5358878160448379, Train F1: 0.38059835866048025, Val F1: 0.4678420440307875
Epoch:  1


100%|██████████| 906/906 [00:04<00:00, 181.92it/s]
100%|██████████| 219/219 [00:00<00:00, 478.74it/s]


Train Loss: 0.4984212756716126, Val Loss: 0.44232855669167487, Train F1: 0.5195272489184937, Val F1: 0.5812711647647449
Epoch:  2


100%|██████████| 906/906 [00:04<00:00, 199.21it/s]
100%|██████████| 219/219 [00:00<00:00, 417.89it/s]


Train Loss: 0.43399601949432304, Val Loss: 0.40195366855897857, Train F1: 0.6063180209195727, Val F1: 0.6456353777059476
Epoch:  3


100%|██████████| 906/906 [00:04<00:00, 186.68it/s]
100%|██████████| 219/219 [00:00<00:00, 493.13it/s]


Train Loss: 0.4020476406789642, Val Loss: 0.38321965364696775, Train F1: 0.6473607425230786, Val F1: 0.6749788417410171
Epoch:  4


100%|██████████| 906/906 [00:04<00:00, 193.82it/s]
100%|██████████| 219/219 [00:00<00:00, 390.60it/s]


Train Loss: 0.3822060127436259, Val Loss: 0.3727722850088115, Train F1: 0.6657423564716967, Val F1: 0.6902428977765166
Epoch:  5


100%|██████████| 906/906 [00:05<00:00, 171.27it/s]
100%|██████████| 219/219 [00:00<00:00, 318.57it/s]


Train Loss: 0.3673849542631416, Val Loss: 0.3661038413916004, Train F1: 0.6774983027510253, Val F1: 0.6983145257049538
Epoch:  6


100%|██████████| 906/906 [00:06<00:00, 150.04it/s]
100%|██████████| 219/219 [00:00<00:00, 567.74it/s]


Train Loss: 0.35521191141052094, Val Loss: 0.36174247713257734, Train F1: 0.6860951980741811, Val F1: 0.7001032640530731
Epoch:  7


100%|██████████| 906/906 [00:05<00:00, 171.26it/s]
100%|██████████| 219/219 [00:00<00:00, 609.40it/s]


Train Loss: 0.3447088405744853, Val Loss: 0.35916402391647095, Train F1: 0.6980137923857486, Val F1: 0.7010041891924879
Epoch:  8


100%|██████████| 906/906 [00:05<00:00, 167.20it/s]
100%|██████████| 219/219 [00:00<00:00, 425.41it/s]


Train Loss: 0.3354455349806523, Val Loss: 0.3579895687906165, Train F1: 0.7069530224102186, Val F1: 0.7000983633459458
Epoch:  9


100%|██████████| 906/906 [00:06<00:00, 144.96it/s]
100%|██████████| 219/219 [00:00<00:00, 410.26it/s]

Train Loss: 0.3271152251636482, Val Loss: 0.35792350151563346, Train F1: 0.715636726258825, Val F1: 0.7013391906011901





In [16]:
torch.save(rnn_model, 'Non Trainable Embeddings/Word2vec+RNN/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Word2vec+RNN/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Word2vec+RNN/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Word2vec+RNN/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Word2vec+RNN/val_f1.pkl', 'wb'))

#### Bert + RNN

In [17]:
rnn_model = RNN_model(len(word_to_idx), 768, 512, len(tag_to_ix), bert_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(rnn_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        rnn_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  1%|          | 9/906 [00:00<00:10, 83.66it/s]

100%|██████████| 906/906 [00:09<00:00, 99.79it/s] 
100%|██████████| 219/219 [00:00<00:00, 492.30it/s]


Train Loss: 0.6838453699717458, Val Loss: 0.6475972284192908, Train F1: 0.4108339289631352, Val F1: 0.4209117892116244
Epoch:  1


100%|██████████| 906/906 [00:06<00:00, 135.41it/s]
100%|██████████| 219/219 [00:00<00:00, 449.31it/s]


Train Loss: 0.6562118282085223, Val Loss: 0.6390599423347543, Train F1: 0.42972467535969533, Val F1: 0.4358104310487969
Epoch:  2


100%|██████████| 906/906 [00:07<00:00, 128.16it/s]
100%|██████████| 219/219 [00:00<00:00, 263.92it/s]


Train Loss: 0.646315835580789, Val Loss: 0.6327123050003836, Train F1: 0.4332680885477291, Val F1: 0.4386494484842911
Epoch:  3


100%|██████████| 906/906 [00:11<00:00, 75.87it/s] 
100%|██████████| 219/219 [00:00<00:00, 400.05it/s]


Train Loss: 0.6386674604405367, Val Loss: 0.6274173443448053, Train F1: 0.44198056079720877, Val F1: 0.4428039814250003
Epoch:  4


100%|██████████| 906/906 [00:09<00:00, 93.40it/s] 
100%|██████████| 219/219 [00:00<00:00, 275.44it/s]


Train Loss: 0.6321836748627111, Val Loss: 0.623216449806135, Train F1: 0.4501051910519494, Val F1: 0.4415889452939511
Epoch:  5


100%|██████████| 906/906 [00:08<00:00, 105.45it/s]
100%|██████████| 219/219 [00:00<00:00, 396.55it/s]


Train Loss: 0.6270107110308496, Val Loss: 0.6198079869083074, Train F1: 0.4537786033371692, Val F1: 0.44520201966899364
Epoch:  6


100%|██████████| 906/906 [00:07<00:00, 117.32it/s]
100%|██████████| 219/219 [00:00<00:00, 368.93it/s]


Train Loss: 0.6225836438519539, Val Loss: 0.6168556880188859, Train F1: 0.45844530894838115, Val F1: 0.44639177981914774
Epoch:  7


100%|██████████| 906/906 [00:09<00:00, 96.79it/s] 
100%|██████████| 219/219 [00:00<00:00, 403.80it/s]


Train Loss: 0.6184745836369776, Val Loss: 0.614347759583225, Train F1: 0.46164803585873876, Val F1: 0.449163102256526
Epoch:  8


100%|██████████| 906/906 [00:10<00:00, 88.69it/s] 
100%|██████████| 219/219 [00:00<00:00, 290.69it/s]


Train Loss: 0.6145513336159799, Val Loss: 0.6122919484360577, Train F1: 0.46372080395479465, Val F1: 0.4560875929647532
Epoch:  9


100%|██████████| 906/906 [00:07<00:00, 114.01it/s]
100%|██████████| 219/219 [00:00<00:00, 430.22it/s]

Train Loss: 0.6107513507865122, Val Loss: 0.6106750203049891, Train F1: 0.4659139108601753, Val F1: 0.4580197127836402





In [18]:
torch.save(rnn_model, 'Non Trainable Embeddings/Bert+RNN/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Bert+RNN/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Bert+RNN/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Bert+RNN/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Bert+RNN/val_f1.pkl', 'wb'))

#### Glove + GRU

In [19]:
gru_model = GRU_model(len(word_to_idx), 300, 256, len(tag_to_ix), glove_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(gru_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    gru_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        gru_model.zero_grad()
        tag_scores = gru_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        gru_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = gru_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  0%|          | 0/906 [00:00<?, ?it/s]

100%|██████████| 906/906 [00:13<00:00, 69.24it/s]
100%|██████████| 219/219 [00:00<00:00, 265.88it/s]


Train Loss: 0.6877748227079973, Val Loss: 0.5866534595620142, Train F1: 0.36627919093451694, Val F1: 0.4462616602840087
Epoch:  1


100%|██████████| 906/906 [00:12<00:00, 75.13it/s] 
100%|██████████| 219/219 [00:00<00:00, 359.10it/s]


Train Loss: 0.5595951171604213, Val Loss: 0.5214242952871541, Train F1: 0.47824325652397687, Val F1: 0.5374964711655497
Epoch:  2


100%|██████████| 906/906 [00:09<00:00, 95.90it/s] 
100%|██████████| 219/219 [00:00<00:00, 358.49it/s]


Train Loss: 0.5112265820633497, Val Loss: 0.4852672877240943, Train F1: 0.5406394252122311, Val F1: 0.571110503522886
Epoch:  3


100%|██████████| 906/906 [00:09<00:00, 96.42it/s] 
100%|██████████| 219/219 [00:00<00:00, 361.45it/s]


Train Loss: 0.4820197305043802, Val Loss: 0.46180405518780016, Train F1: 0.5629418499258454, Val F1: 0.5853689802532933
Epoch:  4


100%|██████████| 906/906 [00:09<00:00, 95.85it/s] 
100%|██████████| 219/219 [00:00<00:00, 326.27it/s]


Train Loss: 0.4610925462368308, Val Loss: 0.44524542422599445, Train F1: 0.5828699844212507, Val F1: 0.6010898120793638
Epoch:  5


100%|██████████| 906/906 [00:09<00:00, 96.10it/s] 
100%|██████████| 219/219 [00:00<00:00, 354.27it/s]


Train Loss: 0.4448357619032691, Val Loss: 0.4330631505748997, Train F1: 0.5939519682322784, Val F1: 0.6138142255490178
Epoch:  6


100%|██████████| 906/906 [00:09<00:00, 94.56it/s] 
100%|██████████| 219/219 [00:00<00:00, 339.49it/s]


Train Loss: 0.43144713103705423, Val Loss: 0.42373843045403425, Train F1: 0.6082213189483494, Val F1: 0.633375810713661
Epoch:  7


100%|██████████| 906/906 [00:09<00:00, 95.13it/s] 
100%|██████████| 219/219 [00:00<00:00, 325.63it/s]


Train Loss: 0.4199479476571346, Val Loss: 0.4163357388891586, Train F1: 0.62394911283692, Val F1: 0.6382639654823741
Epoch:  8


100%|██████████| 906/906 [00:09<00:00, 92.42it/s] 
100%|██████████| 219/219 [00:00<00:00, 351.58it/s]


Train Loss: 0.4098619041063928, Val Loss: 0.41023150481045517, Train F1: 0.6353051317016546, Val F1: 0.6432222298997439
Epoch:  9


100%|██████████| 906/906 [00:09<00:00, 91.27it/s] 
100%|██████████| 219/219 [00:00<00:00, 283.26it/s]

Train Loss: 0.40089639766392593, Val Loss: 0.4049899061656978, Train F1: 0.6436106278633325, Val F1: 0.639819831002705





In [20]:
torch.save(gru_model, 'Non Trainable Embeddings/Glove+GRU/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Glove+GRU/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Glove+GRU/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Glove+GRU/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Glove+GRU/val_f1.pkl', 'wb'))

#### Word2vec + GRU

In [21]:
gru_model = GRU_model(len(word_to_idx), 300, 256, len(tag_to_ix), word2vec_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(gru_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    gru_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        gru_model.zero_grad()
        tag_scores = gru_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        gru_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = gru_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  0%|          | 0/906 [00:00<?, ?it/s]

100%|██████████| 906/906 [00:10<00:00, 86.91it/s] 
100%|██████████| 219/219 [00:00<00:00, 339.64it/s]


Train Loss: 0.7604094644058619, Val Loss: 0.6435012930332253, Train F1: 0.35151441913565007, Val F1: 0.3531173110138638
Epoch:  1


100%|██████████| 906/906 [00:09<00:00, 93.42it/s] 
100%|██████████| 219/219 [00:00<00:00, 318.17it/s]


Train Loss: 0.6128206780019975, Val Loss: 0.5698079982576849, Train F1: 0.36366325820668155, Val F1: 0.4066808421913643
Epoch:  2


100%|██████████| 906/906 [00:09<00:00, 92.73it/s] 
100%|██████████| 219/219 [00:00<00:00, 342.40it/s]


Train Loss: 0.5495131920373466, Val Loss: 0.5154017844156588, Train F1: 0.43907330125220295, Val F1: 0.512430921428327
Epoch:  3


100%|██████████| 906/906 [00:09<00:00, 90.88it/s] 
100%|██████████| 219/219 [00:00<00:00, 336.21it/s]


Train Loss: 0.5031745871302834, Val Loss: 0.4776720395099083, Train F1: 0.5183870917049738, Val F1: 0.5631630225125325
Epoch:  4


100%|██████████| 906/906 [00:10<00:00, 89.84it/s] 
100%|██████████| 219/219 [00:00<00:00, 334.72it/s]


Train Loss: 0.471383687972233, Val Loss: 0.45215561135446647, Train F1: 0.5562230864249489, Val F1: 0.5967591743697546
Epoch:  5


100%|██████████| 906/906 [00:09<00:00, 91.54it/s] 
100%|██████████| 219/219 [00:00<00:00, 331.53it/s]


Train Loss: 0.44956634886085856, Val Loss: 0.4342395785736711, Train F1: 0.5807132910379856, Val F1: 0.608949143674809
Epoch:  6


100%|██████████| 906/906 [00:12<00:00, 75.04it/s]
100%|██████████| 219/219 [00:00<00:00, 294.77it/s]


Train Loss: 0.4337269016613497, Val Loss: 0.4209292972604978, Train F1: 0.5998319687435444, Val F1: 0.6145040918152864
Epoch:  7


100%|██████████| 906/906 [00:10<00:00, 87.58it/s] 
100%|██████████| 219/219 [00:00<00:00, 322.56it/s]


Train Loss: 0.42143394626167174, Val Loss: 0.41049077161098724, Train F1: 0.6088956180109768, Val F1: 0.6260205462626923
Epoch:  8


100%|██████████| 906/906 [00:10<00:00, 88.75it/s]
100%|██████████| 219/219 [00:00<00:00, 324.99it/s]


Train Loss: 0.41131051442283667, Val Loss: 0.4019248680505034, Train F1: 0.6190169581061779, Val F1: 0.6440093282600707
Epoch:  9


100%|██████████| 906/906 [00:13<00:00, 68.41it/s]
100%|██████████| 219/219 [00:00<00:00, 229.34it/s]

Train Loss: 0.40258057483741255, Val Loss: 0.3946731783891922, Train F1: 0.6280850830387626, Val F1: 0.6592141532923255





In [22]:
torch.save(gru_model, 'Non Trainable Embeddings/Word2vec+GRU/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Word2vec+GRU/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Word2vec+GRU/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Word2vec+GRU/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Word2vec+GRU/val_f1.pkl', 'wb'))

#### Bert + GRU

In [23]:
gru_model = GRU_model(len(word_to_idx), 768, 512, len(tag_to_ix), bert_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(gru_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    gru_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        gru_model.zero_grad()
        tag_scores = gru_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        gru_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = gru_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  0%|          | 0/906 [00:00<?, ?it/s]

100%|██████████| 906/906 [00:31<00:00, 28.90it/s]
100%|██████████| 219/219 [00:01<00:00, 174.76it/s]


Train Loss: 0.6987751507206469, Val Loss: 0.6678536323379708, Train F1: 0.3748922840792369, Val F1: 0.3927246398718795
Epoch:  1


100%|██████████| 906/906 [00:36<00:00, 25.00it/s]
100%|██████████| 219/219 [00:01<00:00, 156.65it/s]


Train Loss: 0.6707709766513201, Val Loss: 0.6562257428147477, Train F1: 0.40512667020145765, Val F1: 0.4167879518205866
Epoch:  2


100%|██████████| 906/906 [00:42<00:00, 21.44it/s]
100%|██████████| 219/219 [00:01<00:00, 156.94it/s]


Train Loss: 0.6604047863195274, Val Loss: 0.6494642232106701, Train F1: 0.41932009923783226, Val F1: 0.42319201072140555
Epoch:  3


100%|██████████| 906/906 [00:39<00:00, 23.10it/s]
100%|██████████| 219/219 [00:01<00:00, 145.62it/s]


Train Loss: 0.6537817921695067, Val Loss: 0.6451395852108525, Train F1: 0.42852710994737475, Val F1: 0.4284831768955887
Epoch:  4


100%|██████████| 906/906 [00:37<00:00, 24.21it/s]
100%|██████████| 219/219 [00:01<00:00, 149.91it/s]


Train Loss: 0.6486948361624418, Val Loss: 0.6417684333237339, Train F1: 0.43496116121895867, Val F1: 0.4337298812627581
Epoch:  5


100%|██████████| 906/906 [00:37<00:00, 24.40it/s]
100%|██████████| 219/219 [00:01<00:00, 173.12it/s]


Train Loss: 0.6442992698409437, Val Loss: 0.6389145513647767, Train F1: 0.43914932005353957, Val F1: 0.4344141799235426
Epoch:  6


100%|██████████| 906/906 [00:37<00:00, 23.95it/s]
100%|██████████| 219/219 [00:01<00:00, 165.28it/s]


Train Loss: 0.6403215026223896, Val Loss: 0.636435652160209, Train F1: 0.44300508971183966, Val F1: 0.4365404359397132
Epoch:  7


100%|██████████| 906/906 [00:38<00:00, 23.53it/s]
100%|██████████| 219/219 [00:01<00:00, 162.21it/s]


Train Loss: 0.6366377842610509, Val Loss: 0.6342574647844654, Train F1: 0.44616485362484753, Val F1: 0.44079209455464113
Epoch:  8


100%|██████████| 906/906 [00:38<00:00, 23.81it/s]
100%|██████████| 219/219 [00:01<00:00, 160.54it/s]


Train Loss: 0.6331809836770789, Val Loss: 0.632328130066667, Train F1: 0.4495940085047949, Val F1: 0.4442398268555838
Epoch:  9


100%|██████████| 906/906 [00:39<00:00, 23.01it/s]
100%|██████████| 219/219 [00:01<00:00, 152.92it/s]

Train Loss: 0.6299121094657886, Val Loss: 0.6306091001316837, Train F1: 0.45096639102456526, Val F1: 0.44672981845427473





In [24]:
torch.save(gru_model, 'Non Trainable Embeddings/Bert+GRU/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Bert+GRU/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Bert+GRU/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Bert+GRU/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Bert+GRU/val_f1.pkl', 'wb'))

#### Glove + LSTM

In [25]:
lstm_model = LSTM_model(len(word_to_idx), 300, 256, len(tag_to_ix), glove_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(lstm_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    lstm_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        lstm_model.zero_grad()
        tag_scores = lstm_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        lstm_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = lstm_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 906/906 [00:06<00:00, 133.89it/s]
100%|██████████| 219/219 [00:00<00:00, 342.00it/s]


Train Loss: 0.7816405328562192, Val Loss: 0.6583317537285965, Train F1: 0.3515741671625853, Val F1: 0.3531173110138638
Epoch:  1


100%|██████████| 906/906 [00:05<00:00, 151.06it/s]
100%|██████████| 219/219 [00:00<00:00, 400.92it/s]


Train Loss: 0.6441842318568008, Val Loss: 0.604199711182346, Train F1: 0.35803346825563187, Val F1: 0.38346339543386804
Epoch:  2


100%|██████████| 906/906 [00:06<00:00, 139.51it/s]
100%|██████████| 219/219 [00:00<00:00, 366.09it/s]


Train Loss: 0.5941185467027408, Val Loss: 0.5583752292340205, Train F1: 0.4065685712718787, Val F1: 0.48281831330461805
Epoch:  3


100%|██████████| 906/906 [00:06<00:00, 146.53it/s]
100%|██████████| 219/219 [00:00<00:00, 345.22it/s]


Train Loss: 0.5537682570533511, Val Loss: 0.5235781236870648, Train F1: 0.48463369581230736, Val F1: 0.5307228574394807
Epoch:  4


100%|██████████| 906/906 [00:06<00:00, 135.24it/s]
100%|██████████| 219/219 [00:00<00:00, 342.80it/s]


Train Loss: 0.523562083131825, Val Loss: 0.4971816104977098, Train F1: 0.5248353214608321, Val F1: 0.5583468355845068
Epoch:  5


100%|██████████| 906/906 [00:06<00:00, 139.96it/s]
100%|██████████| 219/219 [00:00<00:00, 346.78it/s]


Train Loss: 0.5004034005352204, Val Loss: 0.47629274489128426, Train F1: 0.55027184182903, Val F1: 0.5700771985616385
Epoch:  6


100%|██████████| 906/906 [00:06<00:00, 140.85it/s]
100%|██████████| 219/219 [00:00<00:00, 348.47it/s]


Train Loss: 0.4818193700912904, Val Loss: 0.4593843416264068, Train F1: 0.566907656149681, Val F1: 0.589930165461253
Epoch:  7


100%|██████████| 906/906 [00:06<00:00, 131.28it/s]
100%|██████████| 219/219 [00:00<00:00, 340.41it/s]


Train Loss: 0.4664415038020142, Val Loss: 0.4456977086932692, Train F1: 0.5790176834997439, Val F1: 0.5950062855061207
Epoch:  8


100%|██████████| 906/906 [00:06<00:00, 143.93it/s]
100%|██████████| 219/219 [00:00<00:00, 364.73it/s]


Train Loss: 0.45341039362628727, Val Loss: 0.4345080314297654, Train F1: 0.5877747024987416, Val F1: 0.6027366631881801
Epoch:  9


100%|██████████| 906/906 [00:06<00:00, 150.63it/s]
100%|██████████| 219/219 [00:00<00:00, 372.02it/s]

Train Loss: 0.4419508746292681, Val Loss: 0.4251132215879279, Train F1: 0.5954390516435106, Val F1: 0.6057877371685894





In [26]:
torch.save(lstm_model, 'Non Trainable Embeddings/Glove+LSTM/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Glove+LSTM/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Glove+LSTM/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Glove+LSTM/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Glove+LSTM/val_f1.pkl', 'wb'))

#### Word2vec + LSTM

In [27]:
lstm_model = LSTM_model(len(word_to_idx), 300, 256, len(tag_to_ix), word2vec_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(lstm_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    lstm_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        lstm_model.zero_grad()
        tag_scores = lstm_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        lstm_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = lstm_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 906/906 [00:06<00:00, 138.14it/s]
100%|██████████| 219/219 [00:00<00:00, 288.10it/s]


Train Loss: 0.8449351207310001, Val Loss: 0.6961075300223207, Train F1: 0.3483793784747095, Val F1: 0.3531173110138638
Epoch:  1


100%|██████████| 906/906 [00:07<00:00, 127.08it/s]
100%|██████████| 219/219 [00:00<00:00, 355.27it/s]


Train Loss: 0.6822504956793312, Val Loss: 0.6535659399751115, Train F1: 0.35221363021064966, Val F1: 0.3531173110138638
Epoch:  2


100%|██████████| 906/906 [00:06<00:00, 134.11it/s]
100%|██████████| 219/219 [00:00<00:00, 344.20it/s]


Train Loss: 0.6484705856106928, Val Loss: 0.6231789111274563, Train F1: 0.35221363021064966, Val F1: 0.3531173110138638
Epoch:  3


100%|██████████| 906/906 [00:09<00:00, 99.78it/s] 
100%|██████████| 219/219 [00:00<00:00, 274.21it/s]


Train Loss: 0.6191497126030869, Val Loss: 0.5933153915078673, Train F1: 0.3536845551176199, Val F1: 0.35800240050874527
Epoch:  4


100%|██████████| 906/906 [00:06<00:00, 132.94it/s]
100%|██████████| 219/219 [00:00<00:00, 310.89it/s]


Train Loss: 0.5893256704078341, Val Loss: 0.5631597685759471, Train F1: 0.3676505491866325, Val F1: 0.3944398364779217
Epoch:  5


100%|██████████| 906/906 [00:07<00:00, 128.07it/s]
100%|██████████| 219/219 [00:00<00:00, 348.28it/s]


Train Loss: 0.5596519062423022, Val Loss: 0.5345916339115465, Train F1: 0.41465366858600916, Val F1: 0.4676783862661805
Epoch:  6


100%|██████████| 906/906 [00:07<00:00, 123.25it/s]
100%|██████████| 219/219 [00:00<00:00, 347.73it/s]


Train Loss: 0.5323463338067463, Val Loss: 0.5095369756630023, Train F1: 0.4651418506933484, Val F1: 0.5190232596165744
Epoch:  7


100%|██████████| 906/906 [00:06<00:00, 132.44it/s]
100%|██████████| 219/219 [00:00<00:00, 331.70it/s]


Train Loss: 0.5089066948980159, Val Loss: 0.48845615146094806, Train F1: 0.5090672007879095, Val F1: 0.5473228506226737
Epoch:  8


100%|██████████| 906/906 [00:06<00:00, 130.94it/s]
100%|██████████| 219/219 [00:00<00:00, 339.19it/s]


Train Loss: 0.4892592430937106, Val Loss: 0.47070595696908696, Train F1: 0.5413926229152894, Val F1: 0.5652154331093707
Epoch:  9


100%|██████████| 906/906 [00:06<00:00, 129.49it/s]
100%|██████████| 219/219 [00:00<00:00, 375.02it/s]


Train Loss: 0.472696417412221, Val Loss: 0.4556077920818982, Train F1: 0.557499713473176, Val F1: 0.5850595774745727


In [28]:
torch.save(lstm_model, 'Non Trainable Embeddings/Word2vec+LSTM/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Word2vec+LSTM/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Word2vec+LSTM/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Word2vec+LSTM/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Word2vec+LSTM/val_f1.pkl', 'wb'))

#### Bert + LSTM

In [29]:
lstm_model = LSTM_model(len(word_to_idx), 768, 512, len(tag_to_ix), bert_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(lstm_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    lstm_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        lstm_model.zero_grad()
        tag_scores = lstm_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        lstm_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = lstm_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 906/906 [00:27<00:00, 33.53it/s]
100%|██████████| 219/219 [00:01<00:00, 157.20it/s]


Train Loss: 0.7167522716963002, Val Loss: 0.676812830717052, Train F1: 0.3547878682102667, Val F1: 0.35788249528469185
Epoch:  1


100%|██████████| 906/906 [00:25<00:00, 35.01it/s]
100%|██████████| 219/219 [00:01<00:00, 127.54it/s]


Train Loss: 0.6783066020073744, Val Loss: 0.6663340824107601, Train F1: 0.36790737678011937, Val F1: 0.38888443691028063
Epoch:  2


100%|██████████| 906/906 [00:31<00:00, 28.82it/s]
100%|██████████| 219/219 [00:02<00:00, 91.62it/s]


Train Loss: 0.6687125607371067, Val Loss: 0.6593067778027766, Train F1: 0.38656198439304984, Val F1: 0.42570941145959823
Epoch:  3


100%|██████████| 906/906 [00:33<00:00, 27.29it/s]
100%|██████████| 219/219 [00:02<00:00, 89.04it/s]


Train Loss: 0.6615877430104788, Val Loss: 0.6532567226451281, Train F1: 0.407689665085332, Val F1: 0.4316405581707851
Epoch:  4


100%|██████████| 906/906 [00:30<00:00, 29.36it/s]
100%|██████████| 219/219 [00:01<00:00, 169.18it/s]


Train Loss: 0.6563322812516168, Val Loss: 0.648992141904352, Train F1: 0.4196121755728854, Val F1: 0.43117207740887525
Epoch:  5


100%|██████████| 906/906 [00:22<00:00, 39.87it/s]
100%|██████████| 219/219 [00:02<00:00, 93.21it/s]


Train Loss: 0.652534528250989, Val Loss: 0.6458674903329649, Train F1: 0.425348784515098, Val F1: 0.4277023179828389
Epoch:  6


100%|██████████| 906/906 [00:24<00:00, 37.37it/s]
100%|██████████| 219/219 [00:01<00:00, 151.96it/s]


Train Loss: 0.6493790270082208, Val Loss: 0.6433128713200625, Train F1: 0.42886445526165556, Val F1: 0.4280994155913633
Epoch:  7


100%|██████████| 906/906 [00:26<00:00, 33.94it/s]
100%|██████████| 219/219 [00:02<00:00, 101.24it/s]


Train Loss: 0.6465524618100646, Val Loss: 0.6410244481476475, Train F1: 0.43197066002060525, Val F1: 0.4353229748345722
Epoch:  8


100%|██████████| 906/906 [00:27<00:00, 33.03it/s]
100%|██████████| 219/219 [00:01<00:00, 140.58it/s]


Train Loss: 0.6439385161330106, Val Loss: 0.6388820456043226, Train F1: 0.433782664336745, Val F1: 0.4356017746607418
Epoch:  9


100%|██████████| 906/906 [00:24<00:00, 36.80it/s]
100%|██████████| 219/219 [00:01<00:00, 152.38it/s]

Train Loss: 0.6414778806910609, Val Loss: 0.6368587898881468, Train F1: 0.4376682314077051, Val F1: 0.4383686839631542





In [30]:
torch.save(lstm_model, 'Non Trainable Embeddings/Bert+LSTM/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Bert+LSTM/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Bert+LSTM/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Bert+LSTM/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Bert+LSTM/val_f1.pkl', 'wb'))