In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import random
from transformers import BertTokenizer, BertModel
import json
import numpy as np
from tqdm import tqdm
import pickle
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
torch.manual_seed(1)

<torch._C.Generator at 0x23c7a6f0c30>

In [2]:
device = "cpu"

In [3]:
# #check if cuda is available
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(device)

In [4]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)

#### Data Loading

In [5]:
train_data = json.load(open('../Dataset/NER_train.json', 'r'))
test_data = json.load(open('../Dataset/NER_test.json', 'r'))
val_data = json.load(open('../Dataset/NER_val.json', 'r'))

In [6]:
word_to_idx = pickle.load(open('../Utils/word_to_idx.pkl', 'rb'))

In [7]:
tag_to_ix = pickle.load(open('../Utils/tag_to_ix.pkl', 'rb'))

In [15]:
ix_to_tag = {v: k for k, v in tag_to_ix.items()}

In [8]:
labels = set()
for tag in tag_to_ix.keys():
    if tag.startswith('B_') or tag.startswith('I_'):
        labels.add(tag[2:])
labels = list(labels)

In [16]:
def bio_tags_to_entities(tag_sequence):
    """Convert a sequence of BIO tags into a list of entities with spans and types."""
    entities = []
    current_entity = None

    for i, tag in enumerate(tag_sequence):
        if tag.startswith('B_'):
            if current_entity:
                entities.append(current_entity)
            current_entity = [tag[2:], i, i]  # Entity type, start, end
        elif tag.startswith('I_') and current_entity and current_entity[0] == tag[2:]:
            current_entity[2] = i  # Extend the current entity to the new index
        else:
            if current_entity:
                entities.append(current_entity)
                current_entity = None
            if tag == 'O':
                continue
            else:  # Incorrect sequence (e.g., I- tag without a preceding B- tag of the same type)
                current_entity = [tag[2:], i, i]  # Treat as a new entity for robustness

    if current_entity:
        entities.append(current_entity)

    return entities

def strict_f1(actual, pred, labels):
    # get bio tags from indices
    actual = [ix_to_tag[i] for i in actual]
    pred = [ix_to_tag[i] for i in pred]
    actual_entities = bio_tags_to_entities(actual)
    pred_entities = bio_tags_to_entities(pred)

    f1_scores = []

    for label in labels:
        label_tp = sum(1 for e in pred_entities if e[0] == label and e in actual_entities)
        label_fp = sum(1 for e in pred_entities if e[0] == label and e not in actual_entities)
        label_fn = sum(1 for e in actual_entities if e[0] == label and e not in pred_entities)

        precision = label_tp / (label_tp + label_fp) if label_tp + label_fp > 0 else 0
        recall = label_tp / (label_tp + label_fn) if label_tp + label_fn > 0 else 0
        f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0
        f1_scores.append(f1)

    macro_f1 = sum(f1_scores) / len(f1_scores)
    return macro_f1


#### RNN model

In [18]:
class RNN_model(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, target_size, embedding_mat, start_tag, end_tag, tag_to_ix, device='cpu'):
        super(RNN_model, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_mat), freeze=False).to(device)
        self.rnn = nn.RNN(embedding_dim, hidden_dim).to(device)
        self.hidden2tag = nn.Linear(hidden_dim, target_size).to(device)
        self.start_tag = start_tag
        self.end_tag = end_tag
        self.tag_to_ix = tag_to_ix
        self.target_size = target_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        rnn_out, _ = self.rnn(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(rnn_out.view(len(sentence), -1))
        tag_scores = nn.functional.log_softmax(tag_space, dim=1)
        return tag_scores


#### LSTM Model

In [11]:
class LSTM_model(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, target_size, embedding_mat, start_tag, end_tag, tag_to_ix, device='cpu'):
        super(LSTM_model, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_mat)).to(device)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim).to(device)
        self.hidden2tag = nn.Linear(hidden_dim, target_size).to(device)
        self.start_tag = start_tag
        self.end_tag = end_tag
        self.tag_to_ix = tag_to_ix
        self.target_size = target_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = nn.functional.log_softmax(tag_space, dim=1)
        return tag_scores

#### GRU model

In [12]:
class GRU_model(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, target_size, embedding_mat, start_tag, end_tag, tag_to_ix, device='cpu'):
        super(GRU_model, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_mat)).to(device)
        self.gru = nn.GRU(embedding_dim, hidden_dim).to(device)
        self.hidden2tag = nn.Linear(hidden_dim, target_size).to(device)
        self.start_tag = start_tag
        self.end_tag = end_tag
        self.tag_to_ix = tag_to_ix
        self.target_size = target_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        gru_out, _ = self.gru(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(gru_out.view(len(sentence), -1))
        tag_scores = nn.functional.log_softmax(tag_space, dim=1)
        return tag_scores

#### Embedding mats

In [13]:
bert_embedding_mat = pickle.load(open('../Utils/legal_bert_embedding_mat.pkl', 'rb'))
word2vec_embedding_mat = pickle.load(open('../Utils/word2vec_embedding_mat.pkl', 'rb'))
glove_embedding_mat = pickle.load(open('../Utils/glove_embedding_mat.pkl', 'rb'))

#### Glove + RNN 

In [19]:
rnn_model = RNN_model(len(word_to_idx), 300, 256, len(tag_to_ix), glove_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(rnn_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += strict_f1(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), labels)
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        rnn_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += strict_f1(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), labels)
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  0%|          | 4/8019 [00:00<03:40, 36.38it/s]

100%|██████████| 8019/8019 [02:48<00:00, 47.69it/s]
100%|██████████| 1416/1416 [00:01<00:00, 886.35it/s]


Train Loss: 0.6217549527264596, Val Loss: 0.523366157995145, Train F1: 0.0033218636732406832, Val F1: 0.009070331739823244
Epoch:  1


100%|██████████| 8019/8019 [02:46<00:00, 48.29it/s]
100%|██████████| 1416/1416 [00:01<00:00, 873.31it/s]


Train Loss: 0.47405354631749674, Val Loss: 0.43529565876322185, Train F1: 0.01865396730716614, Val F1: 0.02360128101653522
Epoch:  2


  9%|▉         | 742/8019 [00:15<02:30, 48.51it/s]


KeyboardInterrupt: 

In [13]:
torch.save(rnn_model, 'Non Trainable Embeddings/Glove+RNN/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Glove+RNN/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Glove+RNN/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Glove+RNN/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Glove+RNN/val_f1.pkl', 'wb'))

#### Word2vec + RNN

In [14]:
rnn_model = RNN_model(len(word_to_idx), 300, 256, len(tag_to_ix), word2vec_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(rnn_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        rnn_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 8019/8019 [01:01<00:00, 131.22it/s]
100%|██████████| 1416/1416 [00:03<00:00, 376.71it/s]


Train Loss: 0.6568109598340905, Val Loss: 0.5123076539343265, Train F1: 0.45022232001638784, Val F1: 0.5075236501389049
Epoch:  1


100%|██████████| 8019/8019 [01:03<00:00, 127.22it/s]
100%|██████████| 1416/1416 [00:03<00:00, 393.91it/s]


Train Loss: 0.46343237259119485, Val Loss: 0.4099017054983349, Train F1: 0.5393963593588122, Val F1: 0.5650450992340578
Epoch:  2


100%|██████████| 8019/8019 [01:00<00:00, 132.63it/s]
100%|██████████| 1416/1416 [00:03<00:00, 379.85it/s]


Train Loss: 0.3935642132259288, Val Loss: 0.36856085436455155, Train F1: 0.5896951710526034, Val F1: 0.5976405353162305
Epoch:  3


100%|██████████| 8019/8019 [01:00<00:00, 133.05it/s]
100%|██████████| 1416/1416 [00:04<00:00, 351.28it/s]


Train Loss: 0.35709066869295225, Val Loss: 0.3457368135658299, Train F1: 0.6150465788912581, Val F1: 0.6155416658324357
Epoch:  4


100%|██████████| 8019/8019 [01:01<00:00, 129.91it/s]
100%|██████████| 1416/1416 [00:03<00:00, 378.96it/s]


Train Loss: 0.3315858886363667, Val Loss: 0.33031778971485054, Train F1: 0.6306646733857134, Val F1: 0.6285339054627583
Epoch:  5


100%|██████████| 8019/8019 [01:01<00:00, 131.31it/s]
100%|██████████| 1416/1416 [00:03<00:00, 379.35it/s]


Train Loss: 0.31367390837457154, Val Loss: 0.3201191327903758, Train F1: 0.6436502582115257, Val F1: 0.6374339891746351
Epoch:  6


100%|██████████| 8019/8019 [01:01<00:00, 129.47it/s]
100%|██████████| 1416/1416 [00:04<00:00, 352.66it/s]


Train Loss: 0.2988803275990276, Val Loss: 0.31305965234721667, Train F1: 0.6539981862033819, Val F1: 0.6425627022798542
Epoch:  7


100%|██████████| 8019/8019 [01:02<00:00, 128.24it/s]
100%|██████████| 1416/1416 [00:04<00:00, 323.03it/s]


Train Loss: 0.2866293002884059, Val Loss: 0.3094808140665271, Train F1: 0.6634050770304782, Val F1: 0.6430225970760293
Epoch:  8


100%|██████████| 8019/8019 [01:02<00:00, 129.14it/s]
100%|██████████| 1416/1416 [00:04<00:00, 333.15it/s]


Train Loss: 0.27520281374611794, Val Loss: 0.3058609219299124, Train F1: 0.6721823049135188, Val F1: 0.6471620169636737
Epoch:  9


100%|██████████| 8019/8019 [01:04<00:00, 125.12it/s]
100%|██████████| 1416/1416 [00:04<00:00, 345.87it/s]

Train Loss: 0.26516265273937567, Val Loss: 0.30503580675272624, Train F1: 0.6799706768963885, Val F1: 0.6527177738060994





In [15]:
torch.save(rnn_model, 'Non Trainable Embeddings/Word2vec+RNN/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Word2vec+RNN/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Word2vec+RNN/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Word2vec+RNN/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Word2vec+RNN/val_f1.pkl', 'wb'))

#### Bert + RNN

In [16]:
rnn_model = RNN_model(len(word_to_idx), 768, 512, len(tag_to_ix), bert_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(rnn_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    rnn_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        rnn_model.zero_grad()
        tag_scores = rnn_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        rnn_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = rnn_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 8019/8019 [01:22<00:00, 96.96it/s] 
100%|██████████| 1416/1416 [00:04<00:00, 291.18it/s]


Train Loss: 0.7368409467210908, Val Loss: 0.6759775052331177, Train F1: 0.4277597931855873, Val F1: 0.4363632110328191
Epoch:  1


100%|██████████| 8019/8019 [01:22<00:00, 96.97it/s] 
100%|██████████| 1416/1416 [00:04<00:00, 328.16it/s]


Train Loss: 0.6338107338778406, Val Loss: 0.6241690827859828, Train F1: 0.46883327355645327, Val F1: 0.46872462492834777
Epoch:  2


100%|██████████| 8019/8019 [01:18<00:00, 102.77it/s]
100%|██████████| 1416/1416 [00:04<00:00, 301.09it/s]


Train Loss: 0.5837268076949951, Val Loss: 0.5889268368666749, Train F1: 0.4958340108381039, Val F1: 0.4847640812893645
Epoch:  3


100%|██████████| 8019/8019 [01:22<00:00, 96.62it/s] 
100%|██████████| 1416/1416 [00:05<00:00, 278.44it/s]


Train Loss: 0.5520310315170757, Val Loss: 0.5676649358928583, Train F1: 0.5151703970972682, Val F1: 0.49522527562232593
Epoch:  4


100%|██████████| 8019/8019 [01:21<00:00, 97.81it/s] 
100%|██████████| 1416/1416 [00:04<00:00, 291.54it/s]


Train Loss: 0.5288441904446184, Val Loss: 0.5486583332110218, Train F1: 0.5269894172271448, Val F1: 0.5062500949197646
Epoch:  5


100%|██████████| 8019/8019 [01:21<00:00, 98.78it/s] 
100%|██████████| 1416/1416 [00:04<00:00, 319.83it/s]


Train Loss: 0.5102261729334732, Val Loss: 0.5340848006058148, Train F1: 0.5382163843493825, Val F1: 0.5194841667735018
Epoch:  6


100%|██████████| 8019/8019 [01:21<00:00, 98.73it/s] 
100%|██████████| 1416/1416 [00:04<00:00, 315.50it/s]


Train Loss: 0.4927880446745838, Val Loss: 0.5229834659567029, Train F1: 0.5484856357378282, Val F1: 0.5284026581061378
Epoch:  7


100%|██████████| 8019/8019 [01:21<00:00, 98.76it/s] 
100%|██████████| 1416/1416 [00:04<00:00, 322.58it/s]


Train Loss: 0.47838883303430557, Val Loss: 0.5122087614355598, Train F1: 0.5566179090157893, Val F1: 0.5356131004121726
Epoch:  8


100%|██████████| 8019/8019 [01:20<00:00, 99.94it/s] 
100%|██████████| 1416/1416 [00:04<00:00, 289.39it/s]


Train Loss: 0.4688579626056207, Val Loss: 0.5046447033633473, Train F1: 0.5635292667932726, Val F1: 0.5387408807515905
Epoch:  9


100%|██████████| 8019/8019 [01:18<00:00, 101.60it/s]
100%|██████████| 1416/1416 [00:04<00:00, 308.58it/s]

Train Loss: 0.45632064041517484, Val Loss: 0.49597533890651274, Train F1: 0.5704571205647169, Val F1: 0.5436559214550242





In [17]:
torch.save(rnn_model, 'Non Trainable Embeddings/Bert+RNN/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Bert+RNN/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Bert+RNN/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Bert+RNN/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Bert+RNN/val_f1.pkl', 'wb'))

#### Glove + GRU

In [18]:
gru_model = GRU_model(len(word_to_idx), 300, 256, len(tag_to_ix), glove_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(gru_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    gru_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        gru_model.zero_grad()
        tag_scores = gru_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        gru_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = gru_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 8019/8019 [02:11<00:00, 60.95it/s]
100%|██████████| 1416/1416 [00:07<00:00, 195.19it/s]


Train Loss: 0.6882539816075419, Val Loss: 0.5976716481796387, Train F1: 0.42870465145343867, Val F1: 0.4430916613604604
Epoch:  1


100%|██████████| 8019/8019 [02:19<00:00, 57.65it/s]
100%|██████████| 1416/1416 [00:06<00:00, 207.10it/s]


Train Loss: 0.5495924759435642, Val Loss: 0.5153488048957726, Train F1: 0.47836394664500254, Val F1: 0.48875410650824846
Epoch:  2


100%|██████████| 8019/8019 [02:26<00:00, 54.74it/s]
100%|██████████| 1416/1416 [00:06<00:00, 214.45it/s]


Train Loss: 0.474431908247064, Val Loss: 0.452491026939571, Train F1: 0.5323216788718329, Val F1: 0.5428037358104196
Epoch:  3


100%|██████████| 8019/8019 [02:20<00:00, 57.01it/s]
100%|██████████| 1416/1416 [00:06<00:00, 204.83it/s]


Train Loss: 0.4226939980635995, Val Loss: 0.4179243547917544, Train F1: 0.5665807420094102, Val F1: 0.5700014063784595
Epoch:  4


100%|██████████| 8019/8019 [02:30<00:00, 53.28it/s]
100%|██████████| 1416/1416 [00:06<00:00, 208.56it/s]


Train Loss: 0.3920614507017979, Val Loss: 0.39534163706888914, Train F1: 0.5876207473111807, Val F1: 0.5884854919378038
Epoch:  5


100%|██████████| 8019/8019 [02:26<00:00, 54.69it/s]
100%|██████████| 1416/1416 [00:07<00:00, 185.11it/s]


Train Loss: 0.37001058997610237, Val Loss: 0.37807923638455715, Train F1: 0.6025012219368726, Val F1: 0.6006151393876423
Epoch:  6


100%|██████████| 8019/8019 [02:32<00:00, 52.55it/s]
100%|██████████| 1416/1416 [00:06<00:00, 203.83it/s]


Train Loss: 0.3524040918543519, Val Loss: 0.3645944263728774, Train F1: 0.6161772119032293, Val F1: 0.610336335187634
Epoch:  7


100%|██████████| 8019/8019 [02:22<00:00, 56.47it/s]
100%|██████████| 1416/1416 [00:06<00:00, 211.24it/s]


Train Loss: 0.3376779318998096, Val Loss: 0.35407042138165523, Train F1: 0.6265041783405458, Val F1: 0.618687691598818
Epoch:  8


100%|██████████| 8019/8019 [02:15<00:00, 59.38it/s]
100%|██████████| 1416/1416 [00:06<00:00, 219.14it/s]


Train Loss: 0.325041548060872, Val Loss: 0.34575062517185584, Train F1: 0.6359955101217711, Val F1: 0.626447280659448
Epoch:  9


100%|██████████| 8019/8019 [02:27<00:00, 54.47it/s]
100%|██████████| 1416/1416 [00:07<00:00, 196.38it/s]

Train Loss: 0.31393000045901454, Val Loss: 0.33899746557787597, Train F1: 0.6445698320951921, Val F1: 0.6314090060799529





In [19]:
torch.save(gru_model, 'Non Trainable Embeddings/Glove+GRU/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Glove+GRU/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Glove+GRU/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Glove+GRU/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Glove+GRU/val_f1.pkl', 'wb'))

#### Word2vec + GRU

In [20]:
gru_model = GRU_model(len(word_to_idx), 300, 256, len(tag_to_ix), word2vec_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(gru_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    gru_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        gru_model.zero_grad()
        tag_scores = gru_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        gru_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = gru_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 8019/8019 [02:26<00:00, 54.72it/s]
100%|██████████| 1416/1416 [00:07<00:00, 182.47it/s]


Train Loss: 0.737562996353968, Val Loss: 0.6298637887660945, Train F1: 0.4227261132052267, Val F1: 0.41879864694068475
Epoch:  1


100%|██████████| 8019/8019 [02:25<00:00, 55.19it/s]
100%|██████████| 1416/1416 [00:07<00:00, 201.97it/s]


Train Loss: 0.57293946362301, Val Loss: 0.5307543434467072, Train F1: 0.4667838049308789, Val F1: 0.48871960265662123
Epoch:  2


100%|██████████| 8019/8019 [02:32<00:00, 52.61it/s]
100%|██████████| 1416/1416 [00:06<00:00, 214.65it/s]


Train Loss: 0.4934417404609714, Val Loss: 0.46526762644140746, Train F1: 0.5107597230959102, Val F1: 0.5227218723974749
Epoch:  3


100%|██████████| 8019/8019 [02:28<00:00, 53.98it/s]
100%|██████████| 1416/1416 [00:07<00:00, 200.48it/s]


Train Loss: 0.4372184418201015, Val Loss: 0.4215953272530917, Train F1: 0.545327936667112, Val F1: 0.5535032209425645
Epoch:  4


100%|██████████| 8019/8019 [02:21<00:00, 56.70it/s]
100%|██████████| 1416/1416 [00:06<00:00, 218.95it/s]


Train Loss: 0.39919055633732276, Val Loss: 0.39206782220785524, Train F1: 0.5751892419151332, Val F1: 0.5789999833861168
Epoch:  5


100%|██████████| 8019/8019 [02:20<00:00, 56.97it/s]
100%|██████████| 1416/1416 [00:06<00:00, 203.71it/s]


Train Loss: 0.37083544028278165, Val Loss: 0.3695542228412762, Train F1: 0.5982670078389624, Val F1: 0.5947915837945644
Epoch:  6


100%|██████████| 8019/8019 [02:20<00:00, 57.15it/s]
100%|██████████| 1416/1416 [00:07<00:00, 199.09it/s]


Train Loss: 0.34821875970447863, Val Loss: 0.3515243546245192, Train F1: 0.6172448474912244, Val F1: 0.6123087902573725
Epoch:  7


100%|██████████| 8019/8019 [02:28<00:00, 54.11it/s]
100%|██████████| 1416/1416 [00:07<00:00, 192.49it/s]


Train Loss: 0.3296885823105584, Val Loss: 0.33693415130675036, Train F1: 0.6300532342860933, Val F1: 0.62197680186225
Epoch:  8


100%|██████████| 8019/8019 [02:21<00:00, 56.70it/s]
100%|██████████| 1416/1416 [00:07<00:00, 198.70it/s]


Train Loss: 0.31441248062531857, Val Loss: 0.325275576554813, Train F1: 0.6397966262046584, Val F1: 0.6283436745609907
Epoch:  9


100%|██████████| 8019/8019 [02:21<00:00, 56.74it/s]
100%|██████████| 1416/1416 [00:07<00:00, 198.84it/s]

Train Loss: 0.30172139871192627, Val Loss: 0.3159502736656742, Train F1: 0.6484249065592157, Val F1: 0.633736342605233





In [21]:
torch.save(gru_model, 'Non Trainable Embeddings/Word2vec+GRU/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Word2vec+GRU/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Word2vec+GRU/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Word2vec+GRU/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Word2vec+GRU/val_f1.pkl', 'wb'))

#### Bert + GRU

In [23]:
gru_model = GRU_model(len(word_to_idx), 768, 512, len(tag_to_ix), bert_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(gru_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    gru_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        gru_model.zero_grad()
        tag_scores = gru_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        gru_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = gru_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


  0%|          | 0/8019 [00:00<?, ?it/s]

100%|██████████| 8019/8019 [04:16<00:00, 31.22it/s]
100%|██████████| 1416/1416 [00:08<00:00, 159.35it/s]


Train Loss: 0.7718177673970524, Val Loss: 0.712550135905523, Train F1: 0.4236934122815464, Val F1: 0.42170276101007276
Epoch:  1


100%|██████████| 8019/8019 [04:55<00:00, 27.14it/s]
100%|██████████| 1416/1416 [00:11<00:00, 122.34it/s]


Train Loss: 0.6646527963973724, Val Loss: 0.6401108220972036, Train F1: 0.44142563517677624, Val F1: 0.4487380815411967
Epoch:  2


100%|██████████| 8019/8019 [04:37<00:00, 28.87it/s]
100%|██████████| 1416/1416 [00:09<00:00, 144.06it/s]


Train Loss: 0.6034736071032343, Val Loss: 0.5962978212312811, Train F1: 0.47328289447101607, Val F1: 0.4715909469108296
Epoch:  3


100%|██████████| 8019/8019 [04:50<00:00, 27.59it/s]
100%|██████████| 1416/1416 [00:15<00:00, 90.91it/s] 


Train Loss: 0.5652169087843758, Val Loss: 0.5675093091400814, Train F1: 0.49156399682866636, Val F1: 0.4845472168531068
Epoch:  4


100%|██████████| 8019/8019 [04:14<00:00, 31.49it/s]
100%|██████████| 1416/1416 [00:08<00:00, 169.24it/s]


Train Loss: 0.5380313842882445, Val Loss: 0.5463344408054495, Train F1: 0.507321971787787, Val F1: 0.4958758372142531
Epoch:  5


100%|██████████| 8019/8019 [03:53<00:00, 34.31it/s]
100%|██████████| 1416/1416 [00:08<00:00, 159.00it/s]


Train Loss: 0.5163939018439502, Val Loss: 0.5297214849628403, Train F1: 0.5208051627273991, Val F1: 0.5034881854134534
Epoch:  6


100%|██████████| 8019/8019 [04:43<00:00, 28.25it/s]
100%|██████████| 1416/1416 [00:10<00:00, 131.12it/s]


Train Loss: 0.49807887467905165, Val Loss: 0.5163377769512393, Train F1: 0.5335578044374705, Val F1: 0.5107599031044531
Epoch:  7


100%|██████████| 8019/8019 [05:22<00:00, 24.90it/s]
100%|██████████| 1416/1416 [00:12<00:00, 115.50it/s]


Train Loss: 0.4821960475611798, Val Loss: 0.5054146561108656, Train F1: 0.5432516232031239, Val F1: 0.5186259280894187
Epoch:  8


100%|██████████| 8019/8019 [04:03<00:00, 32.91it/s]
100%|██████████| 1416/1416 [00:08<00:00, 158.14it/s]


Train Loss: 0.4682953209257489, Val Loss: 0.4963482150809292, Train F1: 0.5514721365929035, Val F1: 0.5250640637391422
Epoch:  9


100%|██████████| 8019/8019 [03:39<00:00, 36.51it/s]
100%|██████████| 1416/1416 [00:08<00:00, 166.67it/s]

Train Loss: 0.45596104070301746, Val Loss: 0.4886721015873623, Train F1: 0.5595118671059827, Val F1: 0.5309913574711819





In [24]:
torch.save(gru_model, 'Non Trainable Embeddings/Bert+GRU/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Bert+GRU/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Bert+GRU/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Bert+GRU/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Bert+GRU/val_f1.pkl', 'wb'))

#### Glove + LSTM

In [25]:
lstm_model = LSTM_model(len(word_to_idx), 300, 256, len(tag_to_ix), glove_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(lstm_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    lstm_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        lstm_model.zero_grad()
        tag_scores = lstm_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        lstm_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = lstm_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 8019/8019 [00:54<00:00, 147.82it/s]
100%|██████████| 1416/1416 [00:05<00:00, 280.76it/s]


Train Loss: 0.7460598774406, Val Loss: 0.6544797988647942, Train F1: 0.42508995751908224, Val F1: 0.4216943793316018
Epoch:  1


100%|██████████| 8019/8019 [01:01<00:00, 129.50it/s]
100%|██████████| 1416/1416 [00:05<00:00, 246.85it/s]


Train Loss: 0.611286590423444, Val Loss: 0.5812828869773943, Train F1: 0.44119222107682715, Val F1: 0.45504164818478976
Epoch:  2


100%|██████████| 8019/8019 [01:05<00:00, 121.58it/s]
100%|██████████| 1416/1416 [00:05<00:00, 245.93it/s]


Train Loss: 0.5451993814715482, Val Loss: 0.5141528328950714, Train F1: 0.4784273578063643, Val F1: 0.49307966444325396
Epoch:  3


100%|██████████| 8019/8019 [01:08<00:00, 116.27it/s]
100%|██████████| 1416/1416 [00:06<00:00, 229.79it/s]


Train Loss: 0.48119108553700335, Val Loss: 0.46045840614574624, Train F1: 0.5172136748517872, Val F1: 0.5274958640883904
Epoch:  4


100%|██████████| 8019/8019 [01:09<00:00, 116.03it/s]
100%|██████████| 1416/1416 [00:05<00:00, 238.93it/s]


Train Loss: 0.43566570920179376, Val Loss: 0.4272615499736111, Train F1: 0.5513706001105356, Val F1: 0.556485275577772
Epoch:  5


100%|██████████| 8019/8019 [01:07<00:00, 118.14it/s]
100%|██████████| 1416/1416 [00:05<00:00, 272.62it/s]


Train Loss: 0.40367328855518564, Val Loss: 0.4032549147963087, Train F1: 0.575762640984738, Val F1: 0.574476640223407
Epoch:  6


100%|██████████| 8019/8019 [01:04<00:00, 123.85it/s]
100%|██████████| 1416/1416 [00:06<00:00, 232.86it/s]


Train Loss: 0.38053527020649297, Val Loss: 0.3877811541474664, Train F1: 0.5916291290045934, Val F1: 0.5873142158249975
Epoch:  7


100%|██████████| 8019/8019 [01:08<00:00, 116.95it/s]
100%|██████████| 1416/1416 [00:04<00:00, 285.30it/s]


Train Loss: 0.36223064416297013, Val Loss: 0.37481622979499396, Train F1: 0.6019901011110216, Val F1: 0.596579407022704
Epoch:  8


100%|██████████| 8019/8019 [01:01<00:00, 130.60it/s]
100%|██████████| 1416/1416 [00:04<00:00, 285.92it/s]


Train Loss: 0.34632849099186774, Val Loss: 0.36451842268310797, Train F1: 0.6137311961358733, Val F1: 0.607048867482402
Epoch:  9


100%|██████████| 8019/8019 [01:00<00:00, 132.55it/s]
100%|██████████| 1416/1416 [00:04<00:00, 290.04it/s]

Train Loss: 0.332572354783671, Val Loss: 0.35643205488945645, Train F1: 0.624278752916603, Val F1: 0.6170964888249129





In [26]:
torch.save(lstm_model, 'Non Trainable Embeddings/Glove+LSTM/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Glove+LSTM/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Glove+LSTM/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Glove+LSTM/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Glove+LSTM/val_f1.pkl', 'wb'))

#### Word2vec + LSTM

In [27]:
lstm_model = LSTM_model(len(word_to_idx), 300, 256, len(tag_to_ix), word2vec_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(lstm_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    lstm_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        lstm_model.zero_grad()
        tag_scores = lstm_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        lstm_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = lstm_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 8019/8019 [01:02<00:00, 128.69it/s]
100%|██████████| 1416/1416 [00:04<00:00, 299.94it/s]


Train Loss: 0.8227432623100498, Val Loss: 0.6962273388464362, Train F1: 0.42253654090304393, Val F1: 0.4191522214298386
Epoch:  1


100%|██████████| 8019/8019 [01:01<00:00, 131.24it/s]
100%|██████████| 1416/1416 [00:04<00:00, 289.97it/s]


Train Loss: 0.6502300609249382, Val Loss: 0.6238453137049699, Train F1: 0.4263505287644238, Val F1: 0.42403403350740615
Epoch:  2


100%|██████████| 8019/8019 [01:02<00:00, 128.13it/s]
100%|██████████| 1416/1416 [00:04<00:00, 292.24it/s]


Train Loss: 0.5865603690773127, Val Loss: 0.5654897332332218, Train F1: 0.4473758342387352, Val F1: 0.4622641498921814
Epoch:  3


100%|██████████| 8019/8019 [01:02<00:00, 129.02it/s]
100%|██████████| 1416/1416 [00:04<00:00, 290.90it/s]


Train Loss: 0.5323489941225131, Val Loss: 0.5103811479124601, Train F1: 0.48157647217528604, Val F1: 0.49081135755915
Epoch:  4


100%|██████████| 8019/8019 [01:01<00:00, 130.22it/s]
100%|██████████| 1416/1416 [00:04<00:00, 289.71it/s]


Train Loss: 0.479845833732062, Val Loss: 0.46069785070672664, Train F1: 0.5081355938904248, Val F1: 0.5171208162919425
Epoch:  5


100%|██████████| 8019/8019 [01:01<00:00, 129.52it/s]
100%|██████████| 1416/1416 [00:04<00:00, 290.96it/s]


Train Loss: 0.43604966501574927, Val Loss: 0.4222450606076219, Train F1: 0.5392481896692126, Val F1: 0.5512403706508415
Epoch:  6


100%|██████████| 8019/8019 [01:04<00:00, 124.11it/s]
100%|██████████| 1416/1416 [00:04<00:00, 289.10it/s]


Train Loss: 0.40142806201545295, Val Loss: 0.39359480250746176, Train F1: 0.566758820439511, Val F1: 0.5725770479065012
Epoch:  7


100%|██████████| 8019/8019 [01:02<00:00, 127.71it/s]
100%|██████████| 1416/1416 [00:04<00:00, 284.36it/s]


Train Loss: 0.37480780907984057, Val Loss: 0.37224403546700824, Train F1: 0.5864893015303219, Val F1: 0.5866231054262977
Epoch:  8


100%|██████████| 8019/8019 [01:02<00:00, 129.01it/s]
100%|██████████| 1416/1416 [00:05<00:00, 268.26it/s]


Train Loss: 0.3529992197997122, Val Loss: 0.3556284400818052, Train F1: 0.6030219802336886, Val F1: 0.6005365972403784
Epoch:  9


100%|██████████| 8019/8019 [01:02<00:00, 127.65it/s]
100%|██████████| 1416/1416 [00:06<00:00, 207.98it/s]

Train Loss: 0.3349447936105953, Val Loss: 0.3429008135387505, Train F1: 0.6171922774295108, Val F1: 0.6119369816960177





In [28]:
torch.save(lstm_model, 'Non Trainable Embeddings/Word2vec+LSTM/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Word2vec+LSTM/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Word2vec+LSTM/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Word2vec+LSTM/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Word2vec+LSTM/val_f1.pkl', 'wb'))

#### Bert + LSTM

In [29]:
lstm_model = LSTM_model(len(word_to_idx), 768, 512, len(tag_to_ix), bert_embedding_mat, tag_to_ix['START_TAG'], tag_to_ix['END_TAG'], tag_to_ix, device)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(lstm_model.parameters(), lr=0.01)

epochs = 10
train_loss = []
val_loss = []
train_f1 = []
val_f1 = []

for epoch in range(epochs):
    print("Epoch: ", epoch)
    train_loss_temp = 0
    val_loss_temp = 0
    train_f1_temp = 0
    val_f1_temp = 0
    lstm_model.train()
    for case in tqdm(train_data):
        sentence = prepare_sequence(train_data[case]['text'].split(' '), word_to_idx)
        targets = prepare_sequence(train_data[case]['labels'], tag_to_ix)
        lstm_model.zero_grad()
        tag_scores = lstm_model(sentence)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        train_loss_temp += loss.item()
        train_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    train_loss.append(train_loss_temp/len(train_data))
    train_f1.append(train_f1_temp/len(train_data))
    
    with torch.no_grad():
        lstm_model.eval()
        for case in tqdm(val_data):
            sentence = prepare_sequence(val_data[case]['text'].split(' '), word_to_idx)
            targets = prepare_sequence(val_data[case]['labels'], tag_to_ix)
            tag_scores = lstm_model(sentence)
            loss = loss_function(tag_scores, targets)
            val_loss_temp += loss.item()
            val_f1_temp += f1_score(targets.detach().numpy(), torch.argmax(tag_scores, dim=1).detach().numpy(), average='macro')
    val_loss.append(val_loss_temp/len(val_data))
    val_f1.append(val_f1_temp/len(val_data))

    print(f'Train Loss: {train_loss[-1]}, Val Loss: {val_loss[-1]}, Train F1: {train_f1[-1]}, Val F1: {val_f1[-1]}')

Epoch:  0


100%|██████████| 8019/8019 [03:08<00:00, 42.51it/s]
100%|██████████| 1416/1416 [00:11<00:00, 127.74it/s]


Train Loss: 0.8090987489394419, Val Loss: 0.7614968724859258, Train F1: 0.4218863639172591, Val F1: 0.41721149948390157
Epoch:  1


100%|██████████| 8019/8019 [03:06<00:00, 42.94it/s]
100%|██████████| 1416/1416 [00:12<00:00, 115.86it/s]


Train Loss: 0.7181812747980791, Val Loss: 0.707030585882817, Train F1: 0.4275592383267264, Val F1: 0.4230149647993177
Epoch:  2


100%|██████████| 8019/8019 [03:15<00:00, 40.99it/s]
100%|██████████| 1416/1416 [00:12<00:00, 110.16it/s]


Train Loss: 0.6607951759067479, Val Loss: 0.65577495609026, Train F1: 0.43772948992759475, Val F1: 0.43677108179533564
Epoch:  3


100%|██████████| 8019/8019 [03:29<00:00, 38.32it/s]
100%|██████████| 1416/1416 [00:12<00:00, 113.30it/s]


Train Loss: 0.613725942568483, Val Loss: 0.6140029739370139, Train F1: 0.46123876291067567, Val F1: 0.4583928132171114
Epoch:  4


100%|██████████| 8019/8019 [03:09<00:00, 42.26it/s]
100%|██████████| 1416/1416 [00:18<00:00, 74.90it/s] 


Train Loss: 0.5776995284269605, Val Loss: 0.5836308524933365, Train F1: 0.48168950027618285, Val F1: 0.4744164385757485
Epoch:  5


100%|██████████| 8019/8019 [03:08<00:00, 42.55it/s]
100%|██████████| 1416/1416 [00:08<00:00, 157.93it/s]


Train Loss: 0.5504277543694269, Val Loss: 0.5608321339599683, Train F1: 0.49485250182167684, Val F1: 0.4852158981371446
Epoch:  6


100%|██████████| 8019/8019 [02:44<00:00, 48.64it/s]
100%|██████████| 1416/1416 [00:09<00:00, 147.82it/s]


Train Loss: 0.5282529647711073, Val Loss: 0.543449678773976, Train F1: 0.5062130914485871, Val F1: 0.49412449231447125
Epoch:  7


100%|██████████| 8019/8019 [02:44<00:00, 48.84it/s]
100%|██████████| 1416/1416 [00:09<00:00, 153.80it/s]


Train Loss: 0.5096635013008204, Val Loss: 0.529840325638073, Train F1: 0.5178209172023773, Val F1: 0.4990178079725728
Epoch:  8


100%|██████████| 8019/8019 [02:40<00:00, 49.87it/s]
100%|██████████| 1416/1416 [00:09<00:00, 150.17it/s]


Train Loss: 0.49355230407467876, Val Loss: 0.5186011874883851, Train F1: 0.5295591679044606, Val F1: 0.5092878348683654
Epoch:  9


100%|██████████| 8019/8019 [02:47<00:00, 47.76it/s]
100%|██████████| 1416/1416 [00:09<00:00, 147.17it/s]

Train Loss: 0.47902098420054123, Val Loss: 0.5082034010027067, Train F1: 0.5395419626211806, Val F1: 0.5164417034263564





In [30]:
torch.save(lstm_model, 'Non Trainable Embeddings/Bert+LSTM/model.pt')
pickle.dump(train_loss, open('Non Trainable Embeddings/Bert+LSTM/train_loss.pkl', 'wb'))
pickle.dump(val_loss, open('Non Trainable Embeddings/Bert+LSTM/val_loss.pkl', 'wb'))
pickle.dump(train_f1, open('Non Trainable Embeddings/Bert+LSTM/train_f1.pkl', 'wb'))
pickle.dump(val_f1, open('Non Trainable Embeddings/Bert+LSTM/val_f1.pkl', 'wb'))