In [1]:
import argparse
import time
import torch
from torch.autograd import Variable
import numpy as np
torch.manual_seed(5)
import sys
sys.path.append("..")
# ############################################################################
# param
##############################################################################
batch_size=50
use_cuda=1
##############################################################################
# Load data
##############################################################################
from data_loader import DataLoader
#question
path='/media/kwane/3160053c-937e-4de9-a540-b28bd2802040/kwane/NLP/lstm_text_class/data/rt/'
pretrained_wordvec=np.load(path+'pretrained_wordvec.npy')
data = torch.load(path+'corpus.pt')
max_len = data["max_len"]
vocab_size = data['dict']['vocab_size']
label_size = data['dict']['label_size']

training_data = DataLoader(
             data['train']['src'],
             data['train']['label'],
             data['train']['train_mask'],
             data['train']['train_mask_all'],
             max_len,
             batch_size=batch_size)

validation_data = DataLoader(
              data['valid']['src'],
              data['valid']['label'],
              data['valid']['valid_mask'],
              data['valid']['valid_mask_all'],
              max_len,
              batch_size=batch_size,
              shuffle=False)

In [1]:
#model
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import init
import torch.nn.functional as F
import const
torch.manual_seed(5)


class LayerNorm(nn.Module):
    def __init__(self, hidden_size, eps=1e-6):
        super(LayerNorm,self).__init__()
        self.eps = eps
        self.weight = nn.Parameter(torch.ones(hidden_size))
        self.bias = nn.Parameter(torch.zeros(hidden_size))

    def forward(self, input):
        mu = torch.mean(input, dim=-1, keepdim=True)
        sigma = torch.std(input, dim=-1, keepdim=True).clamp(min=self.eps)
        output = (input - mu) / sigma
        return output * self.weight.expand_as(output) + self.bias.expand_as(output)

class LSTM_Text(nn.Module):

    def __init__(self,vocab_size,batch_size,embed_dim,label_size):
        super(LSTM_Text,self).__init__()
        self.vocab_size=vocab_size
        self.embed_dim=embed_dim
        self.hidden_size=200
        self.lstm_layers=1
        self.dropout=0.5
        self.batch_size=batch_size
        self.bidirectional=True
        self.label_size=label_size
        self.num_directions = 2 if self.bidirectional else 1
        #self.num_directions = 2 if 1 else 1

        self.lookup_table = nn.Embedding(self.vocab_size, self.embed_dim,
                                padding_idx=const.PAD)
        self.lstm = nn.LSTM(self.embed_dim,
                            self.hidden_size,
                            self.lstm_layers,
                            dropout=self.dropout,
                            bidirectional=self.bidirectional)
        self.ln = LayerNorm(self.hidden_size*self.num_directions)
        self.encode_ln = LayerNorm(self.embed_dim)

        self.logistic = nn.Linear(self.hidden_size*self.num_directions,
                                self.label_size)

        self._init_weights()

    def _init_weights(self, scope=1.):
        #self.lookup_table.weight.data.uniform_(-scope, scope)
        self.lookup_table.weight.data.copy_(torch.from_numpy(pretrained_wordvec))
        self.logistic.weight.data.uniform_(-scope, scope)
        self.logistic.bias.data.fill_(0)

    def init_hidden(self):
        num_layers = self.lstm_layers*self.num_directions

        weight = next(self.parameters()).data
        return (Variable(weight.new(num_layers, self.batch_size, self.hidden_size).zero_()),Variable(weight.new(num_layers, self.batch_size, self.hidden_size).zero_()))

    def forward(self, input,mask, hidden):
        encode = self.lookup_table(input)
        encode = self.encode_ln(encode)#batch_norm
        lstm_out, hidden = self.lstm(encode.transpose(0, 1), hidden)
        
        output = F.tanh(lstm_out)
        final_h = torch.mul(output, mask[:, :, None].transpose(0, 1).type(torch.cuda.FloatTensor))
        final_h=torch.sum(final_h.transpose(0, 1), 1)
        
        return self.logistic(final_h), hidden
# ##############################################################################
# Build model
# ##############################################################################
import model


rnn = LSTM_Text(vocab_size=vocab_size,batch_size=batch_size,embed_dim=300,label_size=label_size)
if use_cuda:
    rnn = rnn.cuda()

optimizer = torch.optim.Adam(rnn.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()
print rnn

ImportError: No module named const

In [6]:
# ##############################################################################
# Training
# ##############################################################################
import datetime
import matplotlib.pyplot as plt  

epoches=100
starttime = datetime.datetime.now()
def repackage_hidden(h):
    if type(h) == Variable:
        if use_cuda:
            return Variable(h.data).cuda()
        return Variable(h.data)
    else:
        return tuple(repackage_hidden(v) for v in h)
    
def train():
    loss_plt=[]
    hidden = rnn.init_hidden()
    for step,(data,label,mask,mask_all) in enumerate(training_data):
        optimizer.zero_grad()
        hidden = repackage_hidden(hidden)
        target, hidden = rnn(data,mask, hidden)
        loss = criterion(target, label)
        
        loss.backward()
        optimizer.step()

        
        #print step,('loss: %.3f' % ( loss.data[0]))
        loss_plt.append(loss.data[0]) 
    return loss_plt

def test():
    rnn.eval()
    corrects = eval_loss = 0
    _size = validation_data.sents_size
    hidden = rnn.init_hidden()
    for step,(data,label,mask,mask_all) in enumerate(validation_data):
        hidden = repackage_hidden(hidden)
        pred, hidden = rnn(data,mask, hidden)
        loss = criterion(pred, label)

        eval_loss += loss.data
        corrects += (torch.max(pred, 1)[1].view(label.size()).data == label.data).sum()

    return 1.0*corrects/_size * 100.0,corrects,_size


In [None]:
#main

epoches=10


starttime = datetime.datetime.now()
loss_plt=[]
Accuracy_=[]
for epoch in range(epoches):
    #train
    loss_plt+=train()
    
    #test
    Accuracy,corrects,_size=test()
    Accuracy_.append(Accuracy)
    print 'epoch:',epoch,'Accuracy:',str(Accuracy)+'%',str(corrects)+'/'+str(_size)#,loss_plt[-1]
    
plt.plot(loss_plt)  
plt.xlabel('num')  
plt.ylabel('loss')  
plt.show()

plt.plot(Accuracy_)  
plt.xlabel('epochs')  
plt.ylabel('Accuracy')  
plt.show()
        
print('Finished Training')
endtime = datetime.datetime.now()

print 'starttime:',starttime,'|endtime:',endtime
