In [1]:
import pandas as pd
import numpy as np
import torch
import torchtext
from torchtext.data import Field, BucketIterator
import torch.nn as nn
import torch.nn.functional as F

In [2]:
from torchtext import data
from torchtext import datasets

In [3]:
import spacy
spacy_en = spacy.load('en_core_web_sm')

def tokenizer(text): # create a tokenizer function
    return [tok.text for tok in spacy_en.tokenizer(text)]

import nltk
from nltk.corpus import stopwords
import copy

nltk.download("stopwords")
stopwords = set(stopwords.words("english"))

sentences = data.Field(lower=True, tokenize=tokenizer,include_lengths=True)
ans = data.Field(sequential=False)

train, dev, test = datasets.SNLI.splits(sentences, ans, root='.dataset')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
sentences.build_vocab(train, dev, test,min_freq=3)
ans.build_vocab(train, dev, test)
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')
    
Batch_Size=256
train_iter, dev_iter, test_iter = data.BucketIterator.splits(
            (train, dev, test), batch_size=Batch_Size, device=device)

In [11]:
n_layer=1
n_layer=1
class My_RNN_back(nn.Module):

    def __init__(self, embed_dim,hidden_dim, drop_p):
        super(My_RNN_back, self).__init__()
        self.hidden_dim = hidden_dim
        self.rnn = nn.LSTM(input_size=embed_dim, hidden_size=hidden_dim,
                        num_layers=1, dropout=drop_p,bidirectional=True)

    def forward(self, inputs, text_lengths):
        
        #print(inputs.shape)

        text_lengths = text_lengths.cpu()

        batch_size = inputs.size()[1]

        #pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(inputs, text_lengths, enforce_sorted = False)
        
        packed_output, (hidden, cell) = self.rnn(packed_embedded)
        
        #unpack sequence
        outputs, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)


        outputs=outputs.view(-1, batch_size, 2, self.hidden_dim)

        return outputs[:,:,1,:].view(-1, batch_size, self.hidden_dim)

class My_RNN(nn.Module):

    def __init__(self, embed_dim,hidden_dim,drop_p):
        super(My_RNN, self).__init__()
        self.rnn = nn.LSTM(input_size=embed_dim+hidden_dim, hidden_size=hidden_dim,
                        num_layers=1, dropout=drop_p,bidirectional=False)#True)

    def forward(self, inputs, hidden_backward, text_lengths):
        
        text_lengths = text_lengths.cpu()
        
        batch_size = inputs.size()[1]
        inputs=torch.cat([inputs,hidden_backward],2)
        #print(inputs.shape)

        #pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(inputs, text_lengths, enforce_sorted = False)
        
        packed_output, (hidden, cell) = self.rnn(packed_embedded)
        
        #unpack sequence
        outputs, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)


        return outputs #ht[-2:].transpose(0, 1).contiguous().view(batch_size, -1)  

class My_RNN1(nn.Module):

    def __init__(self, embed_dim,hidden_dim,drop_p):
        super(My_RNN1, self).__init__()
        self.hidden_dim = hidden_dim
        self.rnn = nn.LSTM(input_size=embed_dim+hidden_dim, hidden_size=hidden_dim,
                        num_layers=1, dropout=drop_p,bidirectional=False)#True)

    def forward(self, inputs, hidden_backward, text_lengths):

         
        text_lengths = text_lengths.cpu()

        batch_size = inputs.size()[1]

        #pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(inputs, text_lengths, enforce_sorted = False)
        
        packed_output, (hidden, cell) = self.rnn(packed_embedded)
        
        #unpack sequence
        outputs, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)
        #print(outputs.shape)

        #outputs=outputs.view(-1,batch_size,2,self.hidden_dim)


        return outputs[-1,:,:].view(batch_size, -1) #ht[-2:].transpose(0, 1).contiguous().view(batch_size, -1)    


class My_Bi_LSTM(nn.Module):
    def __init__(self, embed_dim, hidden_dim, drop_p, n_layers=1):
        super(My_Bi_LSTM, self).__init__()
        self.rnn_back=[]
        self.rnn=[]
        self.n_layers=n_layers
        for i in range(int(n_layers)):
            if (i==0):
                if (i==(n_layers-1)):
                    self.rnn_back.append(My_RNN_back(embed_dim,hidden_dim, drop_p).to(device))
                    self.rnn.append(My_RNN1(embed_dim,hidden_dim, drop_p).to(device))
                else:    
                    self.rnn_back.append(My_RNN_back(embed_dim,hidden_dim, drop_p).to(device))
                    self.rnn.append(My_RNN(embed_dim,hidden_dim, drop_p).to(device))
            else:
                
                if (i==(n_layers-1)):
                    self.rnn_back.append(My_RNN_back(2*hidden_dim,hidden_dim, drop_p).to(device))
                    self.rnn.append(My_RNN1(hidden_dim,hidden_dim, drop_p).to(device))
                else:    
                    self.rnn_back.append(My_RNN_back(2*hidden_dim,hidden_dim, drop_p).to(device))
                    self.rnn.append(My_RNN(hidden_dim,hidden_dim, drop_p).to(device))
        self.rnn_back = nn.ModuleList(self.rnn_back)
        self.rnn = nn.ModuleList(self.rnn)

    def forward(self, embeddings, embeddings_len):
        
        temp_output=embeddings
        
        batch_size = embeddings.size()[1]
        
        for i in range(self.n_layers):
            
            if (i==(self.n_layers-1)):
                hid_back=self.rnn_back[i](temp_output, embeddings_len)
                out1 = self.rnn[i](temp_output,hid_back, embeddings_len)
                temp_output=torch.cat([hid_back[0,:,:].view(batch_size, -1), out1], 1)


            else:
                if (i==0):
                    hid_back=self.rnn_back[i](temp_output, embeddings_len)
                    out1 = self.rnn[i](temp_output, hid_back, embeddings_len)
                    temp_output=torch.cat([hid_back, out1], 2)
                
                else:
                    hid_back=self.rnn_back[i](temp_output, embeddings_len)
                    out1 = self.rnn[i](temp_output, hid_back, embeddings_len)
                    temp_output=torch.cat([hid_back, out1], 2)
        return temp_output





class Output(nn.Module):

    def __init__(self, out_dim,inp_dim,drop_p):
        super(Output, self).__init__()
        self.fc1=nn.Linear(inp_dim,int(inp_dim/2))
        #self.fc2=nn.Linear(int(inp_dim/2),int(inp_dim/2))
        self.fc3=nn.Linear(int(inp_dim/2),int(inp_dim/4))
        self.fc4=nn.Linear(int(inp_dim/4),out_dim)
        self.p=drop_p

    def forward(self, x):
        x=F.dropout(F.relu(self.fc1(x)),p=self.p)
        #x=F.dropout(F.relu(self.fc2(x)),p=self.p)
        x=F.dropout(F.relu(self.fc3(x)),p=self.p)
        x=(self.fc4(x))
        return x

In [12]:
hidden_dim=128
embed_dim=200
out_dim=4
drop_p1=0.25
drop_p2=0.4
class Classifier2(nn.Module):
    def __init__(self):
        super(Classifier,self).__init__()
        self.embedding=nn.Embedding(len(sentences.vocab),embed_dim)
        self.RNN_add=My_RNN_back(embed_dim,hidden_dim,drop_p1)
        self.RNN=My_RNN(embed_dim,hidden_dim,drop_p1)
        #self.final_l=Output(out_dim,4*hidden_dim,drop_p2)
        self.final_l=Output(out_dim,2*hidden_dim,drop_p2)
        
    def forward(self,batch):
        sen1 = self.embedding(batch.premise)
        sen2 = self.embedding(batch.hypothesis)
        hid_back=self.RNN_add(sen1)
        hid_back2=self.RNN_add(sen2)
        premise = self.RNN(sen1,hid_back)
        hypothesis = self.RNN(sen2,hid_back2)
        out = self.final_l(torch.cat([premise, hypothesis], 1))
        return out

class Classifier(nn.Module):
    def __init__(self,n_layer):
        super(Classifier,self).__init__()
        self.embedding=nn.Embedding(len(sentences.vocab),embed_dim)
        self.RNN=My_Bi_LSTM(embed_dim,hidden_dim,drop_p1,n_layer)
        self.final_l=Output(out_dim,4*hidden_dim,drop_p2)
        
    def forward(self,batch):
        prem,prem_len=batch.premise
        hypo,hypo_len=batch.hypothesis
        sen1 = self.embedding(prem)
        sen2 = self.embedding(hypo)
        premise = self.RNN(sen1,prem_len)
        hypothesis = self.RNN(sen2,hypo_len)
        out = self.final_l(torch.cat([premise, hypothesis], 1))
        return out

In [13]:
def train(model,train_loader,val_loader,optimizer,criterion,scheduler,epochs,print_iter=5):
    train_loss=[]
    val_loss=[]
    for i in range(epochs):
        model.train()
        train_loader.init_epoch()
        running_loss_train=0 
        total=0.0
        for indx,inputs in enumerate(train_loader):
            #inputs=inputs.to(device)
            #labels=labels.to(device)
            optimizer.zero_grad()
            output=model(inputs)
            loss=criterion(output,inputs.label)
            running_loss_train+=loss.item()
            loss.backward()
            optimizer.step()
            total+=inputs.batch_size
        train_loss.append(running_loss_train/total)
        if (i%print_iter)==0:
            model.eval()
            running_corrects=0.0
            running_loss=0.0
            total=0.0
            with torch.no_grad():
                for inputs in val_loader:
                    #inputs=inputs.to(device)
                    #labels=labels.to(device)
                    optimizer.zero_grad()
                    output=model(inputs)
                    loss=criterion(output,inputs.label)
                    _,pred=torch.max(output, 1)
                    running_corrects += torch.sum(pred == inputs.label).item()
                    running_loss+=loss.item()
                    total+=inputs.batch_size
            print(' {} Loss: {:.6f} Acc: {:.6f}'.format(
                  i,running_loss/total,(running_corrects/total)))
            val_loss.append(running_loss/total)
        scheduler.step()
    return model,train_loss,val_loss

In [14]:
model2=Classifier(2)
model2.to(device)

  "num_layers={}".format(dropout, num_layers))


Classifier(
  (embedding): Embedding(21571, 200)
  (RNN): My_Bi_LSTM(
    (rnn_back): ModuleList(
      (0): My_RNN_back(
        (rnn): LSTM(200, 128, dropout=0.25, bidirectional=True)
      )
      (1): My_RNN_back(
        (rnn): LSTM(256, 128, dropout=0.25, bidirectional=True)
      )
    )
    (rnn): ModuleList(
      (0): My_RNN(
        (rnn): LSTM(328, 128, dropout=0.25)
      )
      (1): My_RNN1(
        (rnn): LSTM(256, 128, dropout=0.25)
      )
    )
  )
  (final_l): Output(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc3): Linear(in_features=256, out_features=128, bias=True)
    (fc4): Linear(in_features=128, out_features=4, bias=True)
  )
)

In [15]:
import torch.optim as optim
lr=0.001
optimizer2=optim.Adam(model2.parameters(),lr,weight_decay=0.0001)
criterion2=nn.CrossEntropyLoss()
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
exp_lr_scheduler2 = optim.lr_scheduler.StepLR(optimizer2, step_size=4, gamma=0.5)
model2.to(device)

Classifier(
  (embedding): Embedding(21571, 200)
  (RNN): My_Bi_LSTM(
    (rnn_back): ModuleList(
      (0): My_RNN_back(
        (rnn): LSTM(200, 128, dropout=0.25, bidirectional=True)
      )
      (1): My_RNN_back(
        (rnn): LSTM(256, 128, dropout=0.25, bidirectional=True)
      )
    )
    (rnn): ModuleList(
      (0): My_RNN(
        (rnn): LSTM(328, 128, dropout=0.25)
      )
      (1): My_RNN1(
        (rnn): LSTM(256, 128, dropout=0.25)
      )
    )
  )
  (final_l): Output(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc3): Linear(in_features=256, out_features=128, bias=True)
    (fc4): Linear(in_features=128, out_features=4, bias=True)
  )
)

In [16]:
_,train_loss,val_loss=train(model2,train_iter,dev_iter,optimizer2,criterion2,exp_lr_scheduler2,epochs=20,print_iter=1)

 0 Loss: 0.002955 Acc: 0.680045
 1 Loss: 0.002694 Acc: 0.720382
 2 Loss: 0.002596 Acc: 0.731762
 3 Loss: 0.002500 Acc: 0.741821
 4 Loss: 0.002390 Acc: 0.755029
 5 Loss: 0.002348 Acc: 0.764174
 6 Loss: 0.002337 Acc: 0.768746
 7 Loss: 0.002342 Acc: 0.764784
 8 Loss: 0.002272 Acc: 0.772201
 9 Loss: 0.002252 Acc: 0.777383
 10 Loss: 0.002244 Acc: 0.778602
 11 Loss: 0.002249 Acc: 0.780431
 12 Loss: 0.002252 Acc: 0.777078
 13 Loss: 0.002251 Acc: 0.777992
 14 Loss: 0.002233 Acc: 0.782666


KeyboardInterrupt: ignored

In [17]:
def accuracy(model,train_loader):
    model.eval()
    running_corrects=0.0
    running_loss=0.0
    total=0.0
    with torch.no_grad():
        for inputs in train_loader:
            #inputs=inputs.to(device)
            #labels=labels.to(device)
            output=model(inputs)
            _,pred=torch.max(output, 1)
            running_corrects += torch.sum(pred == inputs.label)
            total+=inputs.batch_size
    print(' Acc: {:.6f}'.format((running_corrects/total)))
    return running_corrects/total

In [18]:
accuracy(model2,test_iter)

 Acc: 0.774430


tensor(0.7744, device='cuda:0')

In [None]:
torch.save(model2.state_dict(), 'hs250.pt')

In [None]:
from matplotlib import pyplot as plt
plt.ion()
plt.figure()
plt.plot(train_loss,label='train_loss')
plt.plot(val_loss,label='validation_loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend(loc='upper left')
plt.savefig("./loss1.jpg")

In [None]:
plt.savefig("./loss.jpg")

In [None]:
torch.save(sentences,'models/vocab')