In [139]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import math
import numpy as np
import spacy
from torchtext import data
import pandas as pd
from sklearn.metrics import accuracy_score

torch.manual_seed(1)

<torch._C.Generator at 0x7fdc4273feb0>

#Word Embedding:

In [140]:
class Embedder(nn.Module):
    def __init__(self, vocab_size, model_dim):
        super().__init__()
        
        self.embed = nn.Embedding(vocab_size, model_dim)
    
    def forward(self, X):
        return self.embed(X)

#Positional Encoding:

In [141]:
class PositionalEncoder(nn.Module):
    def __init__(self, model_dim, max_seq_len = 60):
        super().__init__()
        self.model_dim = model_dim
        
        # create constant 'pe' matrix with values dependant on 
        # pos and i
        pe = torch.zeros(max_seq_len, model_dim)
        for pos in range(max_seq_len):
            for i in range(0, model_dim, 2):
                pe[pos, i] = math.sin(pos / (10000 ** ((2 * i)/model_dim)))
                pe[pos, i + 1] = math.cos(pos / (10000 ** ((2 * (i + 1))/model_dim)))
                
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
 
    
    def forward(self, x):
    
        x = x * math.sqrt(self.model_dim)
        
        seq_len = x.size(1)
        
        x = x + self.pe[:,:seq_len].clone().detach()
        
        return x

#Multi-headed attention

In [142]:
class MultiHeadAttention(nn.Module):
    def __init__(self, heads, model_dim,dropout = 0.1):
        super().__init__()
        
        self.model_dim = model_dim
        self.d_k = model_dim//heads
        self.h = heads
        
        self.q_linear = nn.Linear(model_dim, model_dim,bias=False)
        self.k_linear = nn.Linear(model_dim, model_dim,bias=False)
        self.v_linear = nn.Linear(model_dim,model_dim,bias=False)
        self.dropout  = nn.Dropout(dropout)
        self.out  = nn.Linear(model_dim,model_dim,bias=False)
        
    def forward(self, q,k,v):
        
        
        bs = q.size(0)
        
        # perform linear operation and split into h heads
        
        k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
        q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
        v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
        
        
       
        k  = k.transpose(1,2)
        q = q.transpose(1,2)
        v = v.transpose(1,2)
        
        scores = attention(q, k, v, self.d_k, self.dropout)
        
        # concatenate heads and put through final linear layer
        concat = scores.transpose(1,2).contiguous().view(bs, -1, self.model_dim)
        
        output = self.out(concat)
    
        return output


#Feed forward sublayer

In [143]:
class FeedForward(nn.Module):
    def __init__(self, model_dim, d_ff=512, dropout = 0.1):
        super().__init__() 
        
        self.linear_1 = nn.Linear(model_dim, d_ff)
        self.dropout = nn.Dropout(dropout)
        self.linear_2 = nn.Linear(d_ff, model_dim)
    def forward(self, x):
        x = self.dropout(F.relu(self.linear_1(x)))
        x = self.linear_2(x)
        return x

#Normalization

In [144]:
class Norm(nn.Module):
    def __init__(self, model_dim, eps = 1e-6):
        super().__init__()
    
        self.size = model_dim
        # create two learnable parameters to calibrate normalisation
        self.alpha = nn.Parameter(torch.ones(self.size))
        self.bias = nn.Parameter(torch.zeros(self.size))
        self.eps = eps
    def forward(self, x):
        norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) /(x.std(dim=-1, keepdim=True) + self.eps) + self.bias
        return norm

#Encoder layer

In [145]:
class EncoderLayer(nn.Module):
    def __init__(self, model_dim, heads, dropout = 0.1):
        super().__init__()
        self.norm_1 = Norm(model_dim)
        self.norm_2 = Norm(model_dim)
        self.attn = MultiHeadAttention(heads, model_dim)
        self.ff = FeedForward(model_dim)
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
        
    def forward(self, x):
        x2 = self.norm_1(x)
        x = x + self.dropout_1(self.attn(x2,x2,x2))
        x2 = self.norm_2(x)
        x = x + self.dropout_2(self.ff(x2))
        return x

#Attention Function

In [146]:
def attention(q, k, v, d_k, dropout=None):
        
        scores = torch.matmul(q, k.transpose(-2, -1)) /  math.sqrt(d_k)
        scores = F.softmax(scores, dim=-1)
        output = torch.matmul(scores, v)
        return output

#Encoder

In [147]:
class Encoder(nn.Module):
    def __init__(self, vocab_size, model_dim,heads):
        super().__init__()
        
        self.embed = Embedder(vocab_size, model_dim)
        self.pe = PositionalEncoder(model_dim)
        self.encode = EncoderLayer(model_dim, heads)
        self.norm = Norm(model_dim)
    def forward(self, src):
        x = self.embed(src)
        x = self.pe(x)
        x = self.encode(x)
        return self.norm(x)
    

#Transformer

In [148]:
class Transformer(nn.Module):
    def __init__(self, config,src_vocab):
        super().__init__()
        self.config = config
        h = self.config.h
        model_dim = self.config.model_dim
        self.encoder = Encoder(src_vocab, model_dim,h)
        self.softmax = nn.Softmax(dim=1)
        self.out = nn.Linear(self.config.model_dim, self.config.output_size)
    def forward(self, src):
        e_outputs = self.encoder(src)
        e_outputs = e_outputs[:,-1,:]
        output = self.out(e_outputs)
        return self.softmax(output )
    def add_optimizer(self, optimizer):
        self.optimizer = optimizer
        
    def add_loss_op(self, loss_op):
        self.loss_op = loss_op
    
    def reduce_lr(self):
        print("Reducing LR")
        for g in self.optimizer.param_groups:
            g['lr'] = g['lr'] / 2
                
    def run_epoch(self, train_iterator, val_iterator, epoch):
        train_losses = []
        val_accuracies = []
        losses = []
        
        # Reduce learning rate as number of epochs increase
        if (epoch == int(self.config.max_epochs/3)) or (epoch == int(2*self.config.max_epochs/3)):
            self.reduce_lr()
            
        for i, batch in enumerate(train_iterator):
            self.optimizer.zero_grad()
            x = batch.text
            y = (batch.label - 1).type(torch.LongTensor)
            y_pred = self.__call__(x)
            loss = self.loss_op(y_pred, y)
            loss.backward()
            losses.append(loss.data.cpu().numpy())
            self.optimizer.step()
    
            if i % 100 == 0:
                avg_train_loss = np.mean(losses)
                train_losses.append(avg_train_loss)
                print("\tAverage training loss: {:.5f}".format(avg_train_loss))
                losses = []
                
                # Evalute Accuracy on validation set
                val_accuracy = evaluate_model(self, val_iterator)
                print("\tVal Accuracy: {:.4f}".format(val_accuracy))
                self.train()
                
        return train_losses, val_accuracies

In [149]:
class Config(object):
    
    model_dim = 256
    h = 8
    output_size = 4
    lr = 0.0003
    max_epochs = 35
    batch_size = 128
    max_sen_len = 60

In [150]:
class Dataset(object):
    def __init__(self, config):
        self.config = config
        self.train_iterator = None
        self.test_iterator = None
        self.val_iterator = None
        self.vocab = []
        self.word_embeddings = {}
    
    

    def get_pandas_df(self, filename):
        '''
        Load the data into Pandas.DataFrame object
        
        '''
        with open(filename, 'r') as file:
            
            data = [line.strip().split(',', maxsplit=1) for line in file]
            data_text = list(map(lambda x: x[1], data))
            data_label = list(map(lambda x: x[0].strip()[-1], data))

        full_df = pd.DataFrame({"text":data_text, "label":data_label})
        return full_df
    
    def load_data(self, train_file, test_file=None, val_file=None):
        

        NLP = spacy.load('en_core_web_sm')
        tokenizer = lambda sent: [x.text for x in NLP.tokenizer(sent) if x.text != " "]
        
        # Creating Field for data
        TEXT = data.Field(sequential=True, tokenize=tokenizer, batch_first= True,lower=True, fix_length=self.config.max_sen_len)
        LABEL = data.Field(sequential=False, use_vocab=False)
        datafields = [("text",TEXT),("label",LABEL)]
        
        # Load data from pd.DataFrame into torchtext.data.Dataset
        train_df = self.get_pandas_df(train_file)
        train_examples = [data.Example.fromlist(i, datafields) for i in train_df.values.tolist()]
        train_data = data.Dataset(train_examples, datafields)
        
        test_df = self.get_pandas_df(test_file)
        test_examples = [data.Example.fromlist(i, datafields) for i in test_df.values.tolist()]
        test_data = data.Dataset(test_examples, datafields)
        
        
        train_data, val_data = train_data.split(split_ratio=0.8)
        
        TEXT.build_vocab(train_data)
        self.vocab = TEXT.vocab
        
        self.train_iterator = data.BucketIterator(
            (train_data),
            batch_size=self.config.batch_size,
            sort_key=lambda x: len(x.text),
            repeat=False,
            shuffle=True)
        
        self.val_iterator, self.test_iterator = data.BucketIterator.splits(
            (val_data, test_data),
            batch_size=self.config.batch_size,
            sort_key=lambda x: len(x.text),
            repeat=False,
            shuffle=False)
        
        print ("Loaded {} training examples".format(len(train_data)))
        print ("Loaded {} test examples".format(len(test_data)))
        print ("Loaded {} validation examples".format(len(val_data)))

In [151]:
def evaluate_model(model, iterator):
    all_preds = []
    all_y = []
    for idx,batch in enumerate(iterator):

        x = batch.text
        y_pred = model(x)
        predicted = torch.max(y_pred.cpu().data, 1)[1] + 1
        all_preds.extend(predicted.numpy())
        all_y.extend(batch.label.numpy())
    score = accuracy_score(all_y, np.array(all_preds).flatten())
    return score


In [152]:

if __name__=='__main__':
    config = Config()
    train_file = '/home/shrey/Documents/NLP reserach papers/tran'
    
    test_file = '/home/shrey/Documents/NLP reserach papers/test'
    
    
    dataset = Dataset(config)
    dataset.load_data('/home/shrey/Documents/NLP reserach papers/tran', '/home/shrey/Documents/NLP reserach papers/test')
    
    model = Transformer(config, len(dataset.vocab))
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    NLLLoss = nn.NLLLoss()
    model.add_optimizer(optimizer)
    model.add_loss_op(NLLLoss)
    
    train_losses = []
    val_accuracies = []
    
    for i in range(config.max_epochs):
        print ("Epoch: {}".format(i))
        train_loss,val_accuracy = model.run_epoch(dataset.train_iterator, dataset.val_iterator, i)
        train_losses.append(train_loss)
        val_accuracies.append(val_accuracy)

    train_acc = evaluate_model(model, dataset.train_iterator)
    val_acc = evaluate_model(model, dataset.val_iterator)
    test_acc = evaluate_model(model, dataset.test_iterator)

    print ('Final Training Accuracy: {:.4f}'.format(train_acc))
    print ('Final Validation Accuracy: {:.4f}'.format(val_acc))
    print ('Final Test Accuracy: {:.4f}'.format(test_acc))

Loaded 7788 training examples
Loaded 783 test examples
Loaded 1947 validation examples
Epoch: 0
	Average training loss: -0.25666
	Val Accuracy: 0.2722
Epoch: 1
	Average training loss: -0.27707
	Val Accuracy: 0.3071
Epoch: 2
	Average training loss: -0.44610
	Val Accuracy: 0.4212
Epoch: 3
	Average training loss: -0.49375
	Val Accuracy: 0.4710
Epoch: 4
	Average training loss: -0.59122
	Val Accuracy: 0.5265
Epoch: 5
	Average training loss: -0.57599
	Val Accuracy: 0.5501
Epoch: 6
	Average training loss: -0.64595
	Val Accuracy: 0.5670
Epoch: 7
	Average training loss: -0.64669
	Val Accuracy: 0.5763
Epoch: 8
	Average training loss: -0.63635
	Val Accuracy: 0.6091
Epoch: 9
	Average training loss: -0.81618
	Val Accuracy: 0.6364
Epoch: 10
	Average training loss: -0.79254
	Val Accuracy: 0.6662
Epoch: 11
Reducing LR
	Average training loss: -0.81670
	Val Accuracy: 0.6610
Epoch: 12
	Average training loss: -0.81410
	Val Accuracy: 0.6687
Epoch: 13
	Average training loss: -0.83793
	Val Accuracy: 0.6826
E