In [33]:
import numpy as np
import os
import math
import re
import matplotlib.pyplot as plt
import time
import copy
import h5py
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms.functional as TF

from itertools import chain
from numpy import expand_dims
from torchvision import *
from torchsummary import summary
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
from torch.utils.data import TensorDataset
from torch.autograd import Variable
from torch.utils.mobile_optimizer import optimize_for_mobile

from gensim.models import Word2Vec, KeyedVectors
from gensim.test.utils import datapath
from gensim.models import FastText

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CUDA_LAUNCH_BLOCKING="1"

In [2]:
def make_dict(file):
    with open(file, 'r') as f:
        lines = f.readlines()
        d = {}
        for x in lines:
            idx = 0
            if x.split(' ')[-1] != 'O\n':
                idx=1
            temp = x.split(' ')[0].lower()
            if temp in d and d[temp]==0:
                continue
            else:
                d[temp]=idx
    return d

In [3]:
main_dict = make_dict(r'train.txt')
main_dict_valid = make_dict(r'valid.txt')
main_dict_test = make_dict(r'test.txt')

In [4]:
main_dict, main_dict_valid, main_dict_test

({'-docstart-': 0,
  '\n': 1,
  'eu': 1,
  'rejects': 0,
  'german': 1,
  'call': 0,
  'to': 0,
  'boycott': 0,
  'british': 1,
  'lamb': 0,
  '.': 0,
  'peter': 1,
  'blackburn': 1,
  'brussels': 1,
  '1996-08-22': 0,
  'the': 0,
  'european': 1,
  'commission': 0,
  'said': 0,
  'on': 0,
  'thursday': 0,
  'it': 0,
  'disagreed': 0,
  'with': 0,
  'advice': 0,
  'consumers': 0,
  'shun': 0,
  'until': 0,
  'scientists': 0,
  'determine': 0,
  'whether': 0,
  'mad': 0,
  'cow': 0,
  'disease': 0,
  'can': 0,
  'be': 0,
  'transmitted': 0,
  'sheep': 0,
  'germany': 1,
  "'s": 0,
  'representative': 0,
  'union': 0,
  'veterinary': 0,
  'committee': 0,
  'werner': 1,
  'zwingmann': 1,
  'wednesday': 0,
  'should': 0,
  'buy': 0,
  'sheepmeat': 0,
  'from': 0,
  'countries': 0,
  'other': 0,
  'than': 0,
  'britain': 1,
  'scientific': 0,
  'was': 0,
  'clearer': 0,
  '"': 0,
  'we': 0,
  'do': 0,
  "n't": 0,
  'support': 0,
  'any': 0,
  'such': 0,
  'recommendation': 0,
  'because': 0

In [5]:
class load_dataset(Dataset):
    def __init__(self,train_dict,wvs):
        super(Dataset, self).__init__()
        self.count = 0
        self.train_dict = train_dict
        self.wvs = wvs
        self.keys = list(self.train_dict.keys())
        for i,x in enumerate(self.train_dict):
            try:
                self.wvs.wv[self.keys[i]]
            except:
                continue
                
            try:
                self.wvs.wv[self.keys[i+1]]
            except:
                continue                                                                                
                
            try:
                self.wvs.wv[self.keys[i+2]]
            except:
                continue
                
            if self.count==0:
                self.x_cat_sub = torch.cat((torch.tensor(self.wvs.wv[self.keys[i]]),torch.tensor(self.wvs.wv[self.keys[i+1]])),0)
                self.x_cat_sub = torch.cat((self.x_cat_sub,torch.tensor(self.wvs.wv[self.keys[i+2]])),0)
                self.X_train = torch.unsqueeze(self.x_cat_sub,0)
                self.Y_train = torch.unsqueeze(torch.tensor(self.train_dict[self.keys[i+1]]),0)
                self.count=self.count+1
                
            else:
                self.x_cat_sub = torch.cat((torch.tensor(self.wvs.wv[self.keys[i]]),torch.tensor(self.wvs.wv[self.keys[i+1]])),0)
                self.x_cat_sub = torch.cat((self.x_cat_sub,torch.tensor(self.wvs.wv[self.keys[i+2]])),0)
                self.x_cat_sub = torch.unsqueeze(self.x_cat_sub,0)
                
                self.temp_tensor_y = torch.unsqueeze(torch.tensor(self.train_dict[x]),0)
                
                self.X_train = torch.cat((self.X_train,self.x_cat_sub),0)
                self.Y_train = torch.cat((self.Y_train,self.temp_tensor_y),0)
        
    def __len__(self):
        return self.X_train.shape[0]
        
    def __getitem__(self, idx):
        return self.X_train[idx], self.Y_train[idx]

In [6]:
class Binary_classifier(nn.Module):
    def __init__(self):
        super(Binary_classifier, self).__init__()
        self.fci = nn.Linear(300,100)
        self.fc1 = nn.Linear(100,50)
        self.fc2 = nn.Linear(50,2)
        self.sigmoid  = nn.Sigmoid()
                
    def forward(self,x):
        x = self.fci(x)
        x = self.sigmoid(x)
        x = self.fc1(x)
        x = self.sigmoid(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x

In [156]:
def Named_Entity_Recognition(train, valid, save_file, num_of_epochs = 100):

    model = Binary_classifier()
    temp_tensor = torch.randn(64,300)
    model(temp_tensor).shape

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(filter(lambda p:p.requires_grad,model.parameters()) , lr = 0.000001) 
    scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)    

    pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Number of trainable parameters: \n{}".format(pytorch_total_params))    

    print(summary(model,(1,300)))
    
    min_valid_loss = np.inf
    
    for e in range(num_of_epochs):

        train_loss = 0.0
        for data, labels in train:
            # Transfer Data to GPU if available
    #         if torch.cuda.is_available():
    #             data, labels = data.cuda(), labels.cuda()
            
            # Clear the gradients
            optimizer.zero_grad()
            # Forward Pass
            target = model(data)
            # Find the Loss
            loss = criterion(target,labels)
            # Calculate gradients
            loss.backward()
            # Update Weights
            optimizer.step()
            # Calculate Loss
            train_loss += loss.item()
        
        valid_loss = 0.0
        model.eval()     # Optional when not using Model Specific layer
        
        for data, labels in valid:
            # Transfer Data to GPU if available
    #         if torch.cuda.is_available():
    #             data, labels = data.cuda(), labels.cuda()
            
            # Forward Pass
            target = model(data)
            # Find the Loss
            loss = criterion(target,labels)
            # Calculate Loss
            valid_loss += loss.item()
    
        print(f'Epoch {e+1} \t\t Training Loss: {train_loss / len(train)} \t\t Validation Loss: { valid_loss / len(valid)}')
        
        if min_valid_loss > valid_loss:
            print(f'Validation Loss Decreased({min_valid_loss:.6f}--->{valid_loss:.6f}) \t Saving The Model')
            min_valid_loss = valid_loss
            
            # Saving State Dict
            torch.save(model.state_dict(), save_file)    
    

In [27]:
def evaluate(test, save_file):
    
    model = Binary_classifier()
    model.load_state_dict(torch.load(save_file))
    model.eval()

    cf_mat = np.zeros((2,2))
    for x, y in test:
        output = model(x)
        output_arr = np.asarray(output.detach())
        fin = np.argmax(output_arr, axis=None)
        cf_mat[y][fin] += 1

    print("Results of the model are:")
    print(cf_mat)
    
    # Calculating Accuracy
    accuracy = (cf_mat[0][0] + cf_mat[1][1])/(cf_mat[0][0] + cf_mat[0][1] + cf_mat[1][0] + cf_mat[1][1])
    print("Accuracy: {}".format(accuracy))

    # Calculating Precision
    precision = cf_mat[0][0]/(cf_mat[0][0] + cf_mat[0][1])
    print("Precision: {}".format(precision))

    # Calculating Recall

    recall = cf_mat[0][0]/(cf_mat[0][0] + cf_mat[1][0])
    print("Recall: {}".format(recall))

    # Calculating F1 Score
    f1_score = 2*(precision*recall)/(precision+recall)
    print("F1 Score: {}".format(f1_score))

### CBOW with Negative Sampling

In [10]:
word_embedding = Word2Vec.load(r'cbow_with_negative_model')
train = DataLoader(load_dataset(main_dict,word_embedding), batch_size=64)
valid = DataLoader(load_dataset(main_dict_valid,word_embedding), batch_size=64)

In [157]:
Named_Entity_Recognition(train, valid, r'cbow_negative.pth')

Number of trainable parameters: 
35252
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 100]          30,100
           Sigmoid-2               [-1, 1, 100]               0
            Linear-3                [-1, 1, 50]           5,050
           Sigmoid-4                [-1, 1, 50]               0
            Linear-5                 [-1, 1, 2]             102
           Sigmoid-6                 [-1, 1, 2]               0
Total params: 35,252
Trainable params: 35,252
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.13
Estimated Total Size (MB): 0.14
----------------------------------------------------------------
None
Epoch 1 		 Training Loss: 0.6671067841418155 		 Validation Loss: 0.6664536790769608
Validation Loss Decreased(inf--->40.653674) 

In [158]:
test = DataLoader(load_dataset(main_dict_test,word_embedding), batch_size=1)

In [159]:
evaluate(test, r'cbow_negative.pth')

Results of the model are:
[[3263.    0.]
 [ 200.    0.]]
Accuracy: 0.9422466069881605
Precision: 1.0
Recall: 0.9422466069881605
F1 Score: 0.9702646446625037


### SKIP GRAM with Negative Sampling

In [29]:
word_embedding = Word2Vec.load(r'skip_with_negative_model')
train = DataLoader(load_dataset(main_dict,word_embedding), batch_size=64)
valid = DataLoader(load_dataset(main_dict_valid,word_embedding), batch_size=64)

In [160]:
Named_Entity_Recognition(train, valid, r'skip_negative.pth')

Number of trainable parameters: 
35252
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 100]          30,100
           Sigmoid-2               [-1, 1, 100]               0
            Linear-3                [-1, 1, 50]           5,050
           Sigmoid-4                [-1, 1, 50]               0
            Linear-5                 [-1, 1, 2]             102
           Sigmoid-6                 [-1, 1, 2]               0
Total params: 35,252
Trainable params: 35,252
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.13
Estimated Total Size (MB): 0.14
----------------------------------------------------------------
None
Epoch 1 		 Training Loss: 0.6544987800958995 		 Validation Loss: 0.6539470680424424
Validation Loss Decreased(inf--->39.890771) 

In [161]:
test = DataLoader(load_dataset(main_dict_test,word_embedding), batch_size=1)

In [162]:
evaluate(test, r'skip_negative.pth')

Results of the model are:
[[3263.    0.]
 [ 200.    0.]]
Accuracy: 0.9422466069881605
Precision: 1.0
Recall: 0.9422466069881605
F1 Score: 0.9702646446625037


### GLOVE with Negative Sampling

In [141]:
word_embedding = Word2Vec.load(r'glove_with_negative_model')

In [142]:
train = DataLoader(load_dataset(main_dict,word_embedding), batch_size=64)
valid = DataLoader(load_dataset(main_dict_valid,word_embedding), batch_size=64)

In [165]:
Named_Entity_Recognition(train, valid, r'glove_negative.pth')

Number of trainable parameters: 
35252
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 100]          30,100
           Sigmoid-2               [-1, 1, 100]               0
            Linear-3                [-1, 1, 50]           5,050
           Sigmoid-4                [-1, 1, 50]               0
            Linear-5                 [-1, 1, 2]             102
           Sigmoid-6                 [-1, 1, 2]               0
Total params: 35,252
Trainable params: 35,252
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.13
Estimated Total Size (MB): 0.14
----------------------------------------------------------------
None
Epoch 1 		 Training Loss: 0.6699392086750752 		 Validation Loss: 0.6693937006543894
Validation Loss Decreased(inf--->40.833016) 

In [163]:
test = DataLoader(load_dataset(main_dict_test,word_embedding), batch_size=1)

In [164]:
evaluate(test, r'glove_negative.pth')

Results of the model are:
[[3263.    0.]
 [ 200.    0.]]
Accuracy: 0.9422466069881605
Precision: 1.0
Recall: 0.9422466069881605
F1 Score: 0.9702646446625037
