<a href="https://colab.research.google.com/github/sm354/COL870-Assignment-1/blob/main/2_1BILSTM_NER_GMB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize

import psutil
import humanize
import os
import GPUtil as GPU

GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
    process = psutil.Process(os.getpid())
    print("Gen RAM Free: " + humanize.naturalsize(psutil.virtual_memory().available), " |     Proc size: " + humanize.naturalsize(process.memory_info().rss))
    print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total     {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()


Collecting gputil
  Downloading https://files.pythonhosted.org/packages/ed/0e/5c61eedde9f6c87713e89d794f01e378cfd9565847d4576fa627d758c554/GPUtil-1.4.0.tar.gz
Building wheels for collected packages: gputil
  Building wheel for gputil (setup.py) ... [?25l[?25hdone
  Created wheel for gputil: filename=GPUtil-1.4.0-cp37-none-any.whl size=7411 sha256=ebd87a0e13d17c5e026e1e624b002aee6650791cfae1b1d3cdbd96517e91a4b9
  Stored in directory: /root/.cache/pip/wheels/3d/77/07/80562de4bb0786e5ea186911a2c831fdd0018bda69beab71fd
Successfully built gputil
Installing collected packages: gputil
Successfully installed gputil-1.4.0
Gen RAM Free: 10.5 GB  |     Proc size: 4.1 GB
GPU RAM Free: 585MB | Used: 14524MB | Util  96% | Total     15109MB


In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import Dataset, DataLoader, TensorDataset

if torch.cuda.is_available():  
  device = "cuda:0" 
else:  
  device = "cpu"  

In [22]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
# reading text file in python and making list of sentences (list of lists) and list of tags(list of lists)
def load_data(datapath):
    all_words = []
    all_tags = []
    with open(datapath) as f:
        lines = f.readlines()
        sent_num = 0
        for line in lines[1:]: #1: so that the first blank line isn't taken into account
            if(line == "\n"):
                sent_num+=1
            else:
                line_sep = line.split(sep = " ")
                all_words.append(line_sep[0])
                all_tags.append(line_sep[3][:-1])
                
    words = list(set(all_words))
    tags = list(set(all_tags))

    vocab = {}
    vocab['<pad>'] = 0 # for padding input sequences
    for i, word in enumerate(words):
        vocab[word] = i+1
        
    nertags = {}
    nertags['padtag'] = 0
    for i,nertag in enumerate(tags):
        nertags[nertag] = i+1

    train_sent = []
    train_tags = []
    with open(datapath) as f:
        lines = f.readlines()
        sent_num = 0
        sentence = []
        tag = []
        for line in lines[1:]: #1: so that the first blank line isn't taken into account
            if(line == "\n"):
                sent_num+=1
                train_sent.append(sentence)
                train_tags.append(tag)
                sentence = []
                tag = []
            else:
                line_sep = line.split(sep = " ")
                sentence.append(vocab[line_sep[0]])
                tag.append(nertags[line_sep[3][:-1]])

    # padding the sentences at the end
    seq_maxlen = max(len(x) for x in train_sent)
    x_lengths = [len(x) for x in train_sent]
    Xtrain = []
    Ytrain = []
    for sent, tags in zip(train_sent, train_tags):
        length_toappend = seq_maxlen - len(sent)
        Xtrain.append(sent+[0]*length_toappend)
        Ytrain.append(tags+[0]*length_toappend)


    Xtrain = torch.Tensor(Xtrain)
    Ytrain = torch.Tensor(Ytrain)
    x_lengths = torch.Tensor(x_lengths)
    # print(Xtrain.shape, Ytrain.shape, x_lengths.shape)
    
    return Xtrain, Ytrain, x_lengths, vocab, nertags

### Training Data
### using dataloader to make data batches

In [34]:
traindatapath = "/content/drive/MyDrive/Q2_DL/train.txt"
devdatapath = "/content/drive/MyDrive/Q2_DL/dev.txt"

Xtrain, Ytrain, x_trainlengths, vocab, nertags = load_data(traindatapath)
Xdev, Ydev, x_devlengths, _, _ = load_data(devdatapath)

traindataset = TensorDataset(Xtrain, Ytrain, x_trainlengths)
Trainloader = DataLoader(traindataset, batch_size= 128, shuffle=True)

devdataset = TensorDataset(Xdev, Ydev, x_devlengths)
Devloader = DataLoader(devdataset, batch_size = 128)

In [35]:
# print(next(iter(Devloader))[0].shape)

# BiLSTM Model

In [36]:
class BiLSTM(nn.Module):
    def __init__(self, embedding_size, hidden_size, total_words, num_class, pretrained = False, pretrained_embed = None):
        super(BiLSTM, self).__init__()
        self.hidden_size = hidden_size
        
        self.wordembed = nn.Embedding(total_words, embedding_size) #weights initiallized from std normal dist, randomly
        if(pretrained == True):
            self.wordembed.weight = nn.Parameter(pretrained_embed) #pretrained embeds have size (total_words, embedding_size)

        self.bilstm = nn.LSTM(embedding_size,hidden_size, bidirectional = True, batch_first = True)
        self.linear = nn.Linear(2*hidden_size, num_class) # 2 because forward and backward concatenate

    def forward(self, x, xlengths):
        out = self.wordembed(x) # x is of size(batchsize, seq_len), out is of size (batchsize, seq_len, embedding_size = 100)
        
#         out = pack_padded_sequence(out, xlengths, batch_first=True, enforce_sorted=False)
        
        out, (h,c) = self.bilstm(out) #'out' has dimension(batchsize, seq_len, hidden_size)
        
#         out, out_lengths = pad_packed_sequence(out, batch_first=True)
#         assert(out.shape[-1] == self.hidden_size)
#         print(out.shape)

        out = self.linear(out) #now 'out' has dimension(batchsize, seq_len, num_class)

        out = F.softmax(out, dim=2) # take the softmax across the dimension num_class, 'out' has dimension(batchsize, seq_len, num_class)
        
        return out
        
        

In [41]:
model = BiLSTM(embedding_size = 100, hidden_size = 100, total_words = len(vocab), num_class = 18, pretrained = False).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01) 
lossfunction = nn.CrossEntropyLoss()

def calcloss(model, loader):
    model.eval()
    validloss = 0
    i = 0
    for step, (X, Y, xlen) in enumerate(loader):
        ypred = model(X.long().to(device), xlen.to(device)).permute(0, 2, 1)
        vloss = lossfunction(ypred.to('cpu'), Y.type(torch.LongTensor))
        validloss+=vloss
        i+=1

    model.train()
    return validloss/i

In [42]:
# print(model)

In [43]:
# Model is ready now we have to train using cross entropy loss
num_epochs = 100
trainloss = []
validloss = []
# validloss = []
model.train()
for epoch in range(num_epochs):
    for step, (Xbatch, Ybatch, xbatch_len) in enumerate(Trainloader):
        #make gradients 0
        optimizer.zero_grad()
        
        #get output from model and claculate loss
        ypred = model(Xbatch.long().to(device), xbatch_len.to(device)).permute(0, 2, 1)
        # print(ypred.shape, Ybatch.shape)
        
        loss = lossfunction(ypred.to('cpu'), Ybatch.type(torch.LongTensor)) #Ybatch has dimension (batchsize, seqlen), ypred has dimension(batchsize, num_classes, seqlen)
        
        #backward and step
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 5) # clip gradient to 5
        optimizer.step()
    
    vloss = calcloss(model, Devloader)
    trainloss.append(loss)
    validloss.append(vloss)
    print('epoch = {}, training_loss = {}, validation_loss = {}'.format(epoch, trainloss[-1], validloss[-1]))        
        
        
        

epoch = 0, training_loss = 2.206904411315918, validation_loss = 2.2933318614959717
epoch = 1, training_loss = 2.182199478149414, validation_loss = 2.2774016857147217
epoch = 2, training_loss = 2.167578935623169, validation_loss = 2.2742362022399902
epoch = 3, training_loss = 2.1907167434692383, validation_loss = 2.272693395614624


RuntimeError: ignored

In [9]:
print(trainloss)

NameError: ignored

In [10]:
plt.plot(trainloss)

NameError: ignored