In [1]:
import pickle
import numpy as np
import torch
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
torch.cuda.set_device(1)
from tqdm import tnrange, tqdm_notebook
from torch.autograd import Variable
from sklearn.metrics import confusion_matrix
from IPython.display import clear_output
import time
import math

In [2]:
with open('used_data-Copy1.p', 'rb') as handle:
    data = pickle.load(handle)

In [3]:
text =data[:100]

#load X and y
x =[[j[0] for j in i] for i in text]
y =[[j[1] for j in i] for i in text]

#unique words and tags
total_vocab =list(set([j[0]for i in text for j in i]))
total_tags = [ i for i in range(6)]

x =list(filter(None,x))
y= list(filter(None,y))

In [4]:
vocab = {}
vocab['PAD'] = 0
vocab['UNK'] = 1
#start at 1 becuase we add UNK at 0 index
for i,l in enumerate(total_vocab,2):
    vocab[l] =i

In [5]:
#filter UNK word from datasets
train_sentences = []
train_labels=[]
for sentence in x:
    s= [ vocab[token] if token in vocab else vocab['UNK'] for token in sentence]
    train_sentences.append(s)

In [6]:
train_labels =y

In [7]:
# seq_lengths = torch.cuda.LongTensor(list(map(len, train_sentences)))

In [8]:
#maximum len of seq
max_len = max(len(s) for s in train_sentences)
# get the length of each seq in your batch
seq_lengths = torch.LongTensor(list(map(len, train_sentences)))
# seq_lengths = np.stack(list(map(len, train_sentences)))


seq_tensor = Variable(torch.zeros((len(train_sentences), seq_lengths.max()))).long()
for idx, (seq, seqlen) in enumerate(zip(train_sentences, seq_lengths)):
    seq_tensor[idx, :seqlen] = torch.LongTensor(seq)
    
labels_tensor = Variable(torch.zeros((len(train_sentences), seq_lengths.max()))).long()
for idx, (label, seqlen) in enumerate(zip(y, seq_lengths)):
    labels_tensor[idx, :seqlen] = torch.LongTensor(label)

# SORT YOUR TENSORS BY LENGTH!
seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
seq_tensor = seq_tensor[perm_idx]
#we didnt transpose because we need batch in beginning
# seq_tensor = seq_tensor.transpose(0,1)
labels = labels_tensor[perm_idx]
# labels = labels.transpose(0,1)

In [28]:
def create_valid(x,y,percent):
    percent = percent/100
    x_train =(x[:round(len(x)*(percent))])
    y_train =(x[:round(len(x)*(1-percent))])
    x_test =(y[:round(len(y)*(percent))])
    y_test =(y[:round(len(y)*(1-percent))])
    return x_train,x_test,y_train,y_test

In [31]:
X_train, X_test, y_train, y_test =create_valid(seq_tensor,labels,90)

In [32]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

(torch.Size([90, 322]),
 torch.Size([90, 322]),
 torch.Size([10, 322]),
 torch.Size([10, 322]))

In [34]:
seq_tensor.shape

torch.Size([100, 322])

In [12]:
#hyperpramater
bs =512
hiddem_size = 64
emb_size = 100
lr = 1e-3
num_classes = 6

In [13]:
class trn_dataset(Dataset):
        def __init__(self):
            self.len = seq_tensor.shape[0]
            self.x_data = seq_tensor
            self.y_data = labels
            self.seq_len = seq_lengths
            self.perm_idx =perm_idx
            
        def __getitem__(self,index):
            return self.x_data[index],self.y_data[index],self.seq_len[index],self.perm_idx[index]
        def __len__(self):
            return self.len

In [None]:
class valid_dataset(Dataset):
        def __init__(self):
            self.len = seq_tensor.shape[0]
            self.x_data = seq_tensor
            self.y_data = labels
            self.seq_len = seq_lengths
            self.perm_idx =perm_idx
            
        def __getitem__(self,index):
            return self.x_data[index],self.y_data[index],self.seq_len[index],self.perm_idx[index]
        def __len__(self):
            return self.len

In [14]:
train_loader =  DataLoader(dataset=trn_dataset(),batch_size=bs,shuffle=False)

In [15]:
class lstm(nn.Module):
    def __init__(self):
        super(lstm,self).__init__()
        #layer
        self.embedding = nn.Embedding(len(vocab),emb_size)
        self.lstm =  nn.LSTM(emb_size,hiddem_size, bidirectional=True)
        self.fc = nn.Linear(hiddem_size*2,num_classes)
        self.hidden = self.init_hidden()
        self.init_hidden()
        
    def init_hidden(self,bs=bs):
        self.h = (Variable(torch.zeros(2, bs, hiddem_size).cuda()),Variable(torch.zeros(2, bs, hiddem_size).cuda()))
    
    
    def forward(self,inp,seq_lengths):
        bs = inp[0].size(0)
        if self.h[0].size() != bs:
            self.init_hidden(bs)

        emb = self.embedding(inp)

        packed_input = pack_padded_sequence(emb, seq_lengths.numpy())
        packed_output,h = self.lstm(packed_input,self.h)
        self.h = h
        out, _ = pad_packed_sequence(packed_output)
        s = self.fc(out)
        return F.log_softmax(s,dim=-1)

In [16]:
model = lstm()
model.cuda()

lstm(
  (embedding): Embedding(699, 100)
  (lstm): LSTM(100, 64, bidirectional=True)
  (fc): Linear(in_features=128, out_features=6, bias=True)
)

In [17]:
def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [18]:
def nll_loss_seq(inp,targ):
    sl,bs,nh = inp.size()
    #we want targ size match the inp size because we use pack pad only in x but y 
    targ = targ[:,:sl]
    targ = targ.transpose(0,1).contiguous().view(-1)
    
    return F.nll_loss(inp.view(-1,nh),targ.view(-1))
#     return F.nll_loss(inp.view(-1,nh),targ.view(-1))

In [19]:
def div(denominator,numerator):
    if numerator == 0:
        return 0
    else:
        result = denominator/numerator
        return result

In [20]:
def f1(precision,recall):
    try:
        
        return 2*((precision*recall)/(precision+recall))
    except:
        return 0

In [21]:
def evaluate(m,epoch):
    number_classes = int(m.shape[0])
    clear_output(wait=True)
    print('Epoch: {} Time: {}'.format(epoch,timeSince(start)))
    
    for i in range(number_classes):
#     for i in range(6):
#         if i >=number_classes:

#             print('Class: {} P: {:.2f}, R: {:.2f}, F1: {:.2f}'.format(i,0,0,0))
#             continue
        precsion = div(m[i,i],sum(m[:,i]))
        recall = div(m[i,i],sum(m[i,:]))
        f1_score = f1(precsion,recall)
        
        print('Class: {} P: {:.2f}, R: {:.2f}, F1: {:.2f}'.format(i,precsion,recall,f1_score))

In [35]:
def concat_bs_result(outputs,p):
    
    outc =outputs.transpose(0,1)
    all_preds = []
    all_acts = []
    for i,(data,p_idx) in enumerate(zip(outc,p)):
        t = np.argmax(data.cpu().detach().numpy(),axis=1)
        max_len_act =len(np.array(train_labels[p_idx]))
        pred = list(t[:max_len_act])
        act =list(np.array(train_labels[p_idx]))
        all_preds.extend(pred)
        all_acts.extend(act)
    return all_preds,all_acts

In [23]:
optimizer =optim.Adam(model.parameters(), lr=lr)

In [24]:
num_epochs=1

In [25]:
all_loss =[]
start = time.time()

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    all_p=[]
    all_a=[]
    all_loss=0.0

    for index,data in enumerate(train_loader):
        
        numbers = index+1
        x,y,l,p = data
        x,y,l,p = Variable(x).cuda(), Variable(y).cuda(),Variable(l),Variable(p)
        x = x.transpose(0,1)
        optimizer.zero_grad()
        outputs =model(x,l)
        loss = nll_loss_seq(outputs,y)
        loss.backward()
        optimizer.step()
        all_loss += loss.item()
        if numbers%10==0:
            print('Avg.loss: {:.4f}, Time :{} '.format(all_loss/numbers,timeSince(start)))
#         all_loss.append(loss.item())


        all_p_minib,all_a_minib =concat_bs_result(outputs)
        all_p.extend(all_p_minib)
        all_a.extend(all_a_minib)
    m =confusion_matrix(all_a, all_p)
    evaluate(m,epoch)


Epoch: 0 Time: 0m 0s
Class: 0 P: 0.94, R: 0.01, F1: 0.02
Class: 1 P: 0.00, R: 0.00, F1: nan
Class: 2 P: 0.00, R: 0.43, F1: 0.01
Class: 3 P: 0.02, R: 0.17, F1: 0.03
Class: 4 P: 0.02, R: 0.63, F1: 0.03
Class: 5 P: 0.00, R: 0.00, F1: nan


  after removing the cwd from sys.path.
