In [None]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.dataset import TensorDataset
from torch.optim import Adam
from torch.autograd import Variable
from torch import cuda

In [None]:
from keras.datasets import reuters
from keras.preprocessing.sequence import  pad_sequences
from tqdm import trange

In [None]:
import numpy as np

In [70]:
(x_train, y_train), (x_test, y_test) = reuters.load_data()

Downloading data from https://s3.amazonaws.com/text-datasets/reuters.npz


In [105]:
# (x_train, y_train), (x_test, y_test)

In [241]:
vocab_len = 5000

In [242]:
x_train_p,x_test_p = tuple(np.clip(np.array(pad_sequences(a,maxlen=200,value = vocab_len-1 )),0,vocab_len-1) for a in [x_train,x_test])

y_train_oh, y_test_oh = tuple(np.eye(max(y_train)+1)[a] for a in [y_train,y_test])

In [243]:
train_ds = TensorDataset(torch.from_numpy(x_train_p),torch.from_numpy(y_train))
test_ds = TensorDataset(torch.from_numpy(x_test_p),torch.from_numpy(y_test))

train_dl = DataLoader(train_ds, batch_size=128, num_workers=2,shuffle=True)
test_dl = DataLoader(test_ds, batch_size=128, num_workers=2,shuffle=True)

In [245]:
class senti(nn.Module):
    def __init__(self,seq_len,in_size,hidden,rnnmodel,nb_class,fc1_size=256):
        super(senti,self).__init__()
        self.seq_len = seq_len
        self.in_size = in_size
        self.hidden = hidden
        self.fc1_size = fc1_size
        self.nb_class = nb_class
        
        self.ebd = nn.Embedding(vocab_len,self.in_size)
        
        self.rnn = rnnmodel(seq_len=self.seq_len,in_size=self.in_size,hidden=self.hidden)

        self.bn1 = nn.BatchNorm1d(self.hidden)
        self.fc1 = nn.Linear(self.hidden,self.fc1_size)
        
        self.bn2 = nn.BatchNorm1d(self.fc1_size)
        self.fc2 = nn.Linear(self.fc1_size,self.nb_class)
        
    def forward(self,x):
        x = self.ebd(x)
        x = self.rnn(x)
        
        x = self.bn1(x)
        x = F.relu(self.fc1(x))
        x = self.bn2(x)
        x = F.softmax(self.fc2(x))
        return x

In [246]:
class rnn(nn.Module):
    def __init__(self,seq_len,in_size,hidden):
        super(rnn,self).__init__()
        self.seq_len = seq_len
        self.hidden = hidden
        self.in_size = in_size
        
        self.dense1 = nn.Linear(self.in_size+self.hidden,self.hidden)
        
    def forward(self,xs):
        bs = int(xs.size()[0])
        cellout = Variable(torch.zeros(bs,self.hidden))
        
        for i in range(self.seq_len):
            ipt = torch.cat([xs[:,i],cellout],1)
            cellout += F.tanh(self.dense1(ipt))
        return cellout

In [247]:
class rnn_forget(nn.Module):
    def __init__(self,seq_len,in_size,hidden):
        super(rnn_forget,self).__init__()
        self.seq_len = seq_len
        self.hidden = hidden
        self.in_size = in_size
        
        self.gate_fgt = nn.Linear(self.in_size+self.hidden,self.hidden)
        
        self.dense1 = nn.Linear(self.in_size+self.hidden,self.hidden)
        
    def forward(self,xs):
        bs = int(xs.size()[0])
        cellout = Variable(torch.zeros(bs,self.hidden))
        
        for i in range(self.seq_len):
            ipt = torch.cat([xs[:,i],cellout],1)
            fgt = F.sigmoid(self.gate_fgt(ipt))
            
            cellout = cellout * fgt
            
            cellout += F.tanh(self.dense1(ipt))
        return cellout
    
class rnn_in(nn.Module):
    def __init__(self,seq_len,in_size,hidden):
        super(rnn_in,self).__init__()
        self.seq_len = seq_len
        self.hidden = hidden
        self.in_size = in_size
        
#         self.gate_fgt = nn.Linear(self.in_size+self.hidden,self.hidden)
        self.gate_in = nn.Linear(self.in_size+self.hidden,self.hidden)
        
        self.dense1 = nn.Linear(self.in_size+self.hidden,self.hidden)
        
    def forward(self,xs):
        bs = int(xs.size()[0])
        cellout = Variable(torch.zeros(bs,self.hidden))
        
        for i in range(self.seq_len):
            ipt = torch.cat([xs[:,i],cellout],1)
#             fgt = F.sigmoid(self.gate_fgt(ipt))
            igt = F.sigmoid(self.gate_in(ipt))
            
#             cellout = cellout * fgt
            
            cellout += igt * F.tanh(self.dense1(ipt))
        return cellout

In [256]:
class lstm(nn.Module):
    def __init__(self,seq_len,in_size,hidden):
        """Long short term memory"""
        super(lstm,self).__init__()
        self.seq_len = seq_len
        self.hidden = hidden
        self.in_size = in_size
        
        self.gate_fgt = nn.Linear(self.in_size+self.hidden,self.hidden)
        self.gate_in = nn.Linear(self.in_size+self.hidden,self.hidden)
        self.gate_out = nn.Linear(self.in_size+self.hidden,self.hidden)
        
        self.dense1 = nn.Linear(self.in_size+self.hidden,self.hidden)
        self.dense2 = nn.Linear(self.hidden,self.hidden)
        
    def forward(self,xs):
        bs = int(xs.size()[0])
        cell = Variable(torch.zeros(bs,self.hidden))
        h = Variable(torch.ones(bs,self.hidden))
        
        for i in range(self.seq_len):
            ipt = torch.cat([xs[:,i],h],1)
            
            # 3 gates
            fgt = F.sigmoid(self.gate_fgt(ipt))
            igt = F.sigmoid(self.gate_in(ipt))
            ogt = F.sigmoid(self.gate_out(ipt))
            
            cell = cell * fgt
            
            cell += igt * F.tanh(self.dense2(F.relu(self.dense1(ipt))))
            
            h = ogt * F.tanh(cell)
            
        return h

In [None]:
class lstm_cell(nn.)

In [257]:
model = senti(seq_len=200,in_size=50,hidden=60,nb_class=46,rnnmodel=lstm)
if CUDA:
    model.CUDA()

In [258]:
ce_loss = nn.CrossEntropyLoss()
opt = Adam(model.parameters())

In [9]:
def accuracy(y_,y):
    y_max,y_idx = torch.max(y_,1)
    acc = torch.mean(torch.eq(y_idx,y).type(torch.FloatTensor))
    return acc

In [275]:
CUDA = cuda.is_available()

In [277]:
window = 5
for epoch in range(2):
    train_gen = iter(train_dl)
    test_gen = iter(test_dl)
    r_acc = 0
    r_ce = 0
    t=trange(len(train_dl))
    for i in t:
        x,y = train_gen.__next__()
        x,y = Variable(x.type(torch.LongTensor)),Variable(y)
        if CUDA:
            x.cuda()
            y.cuda()
        
        y_ = model(x)
        ce = ce_loss(y_,y)
        # calc accuracy
        y_max,y_idx = torch.max(y_,1)
        acc = accuracy(y_,y)
        
        opt.zero_grad()
        
        ce.backward()
        
        opt.step()
        r_acc += acc.data[0]
        r_ce += ce.data[0]
        if i % window ==(window-1):
            t.set_description("ep%s_bt%s\t cross_ent:\t%.2f\tacc:\t%.3f"%(epoch,
                                             i,
                                             r_ce/window,
                                             r_acc/window,
                                             ))
            r_acc,r_ce = 0,0


  0%|          | 0/71 [00:00<?, ?it/s][A
ep0_bt69	 cross_ent:	3.54	acc:	0.328: 100%|██████████| 71/71 [01:41<00:00,  1.43s/it]
ep1_bt69	 cross_ent:	3.53	acc:	0.338: 100%|██████████| 71/71 [01:24<00:00,  1.19s/it]


rnn, relu:

ep0_bt69	 cross_ent:	3.50	acc:	0.383: 100%|██████████| 71/71 [00:56<00:00,  1.26it/s]

ep1_bt69	 cross_ent:	3.52	acc:	0.345: 100%|██████████| 71/71 [00:57<00:00,  1.24it/s]

rnn_forget:

ep0_bt69	 cross_ent:	3.72	acc:	0.205: 100%|██████████| 71/71 [01:02<00:00,  1.14it/s]

ep1_bt69	 cross_ent:	3.66	acc:	0.208: 100%|██████████| 71/71 [01:00<00:00,  1.17it/s]

rnn,tanh

ep0_bt69	 cross_ent:	3.57	acc:	0.295: 100%|██████████| 71/71 [00:58<00:00,  1.22it/s]

ep1_bt69	 cross_ent:	3.53	acc:	0.342: 100%|██████████| 71/71 [01:00<00:00,  1.17it/s]

rnn_in, tanh

ep0_bt69	 cross_ent:	3.53	acc:	0.347: 100%|██████████| 71/71 [01:05<00:00,  1.09it/s]

ep1_bt69	 cross_ent:	3.47	acc:	0.397: 100%|██████████| 71/71 [01:06<00:00,  1.07it/s]

lstm, hidden:60

ep0_bt69	 cross_ent:	3.54	acc:	0.336: 100%|██████████| 71/71 [01:27<00:00,  1.24s/it]

ep1_bt69	 cross_ent:	3.51	acc:	0.359: 100%|██████████| 71/71 [01:36<00:00,  1.36s/it]

lstm, with vocab length:5000

ep0_bt69	 cross_ent:	3.63	acc:	0.355: 100%|██████████| 71/71 [01:32<00:00,  1.30s/it]

ep1_bt69	 cross_ent:	3.50	acc:	0.364: 100%|██████████| 71/71 [01:30<00:00,  1.28s/it]

ep0_bt69	 cross_ent:	3.52	acc:	0.347: 100%|██████████| 71/71 [01:34<00:00,  1.33s/it]

ep1_bt69	 cross_ent:	3.49	acc:	0.377: 100%|██████████| 71/71 [01:35<00:00,  1.35s/it]