In [17]:
from __future__ import print_function
import numpy as np
# fix random seed for reproducibility
np.random.seed(1337)

import os
from datetime import datetime
import time

import torch
import torch.nn as nn 
from torch.autograd import Variable
import torch.nn.functional as F

import torch.optim as optim
import torch.utils.data
from  torch.nn.utils import clip_grad_norm

from bnlstm import LSTM, LSTMCell, BNLSTMCell

import matplotlib
import matplotlib.pyplot as plt

In [2]:
exp_name = 'singleFwLSTM'

datapath='/root/data/data_pororo/'
savepath='/root/data/save_pororo/'

# diagEpisodes: (171,) --- (35,4800), ...
#inp = np.load(datapath + 'diagEpisodes.npy')
#inp = np.load(datapath + 'pororo_combVec_rebuild.npy') # (16066,9600)
inp = np.load(datapath + 'combEmbEpisodes_rebuild.npy') # (171,) --- (35,50), ...
stendIdx=np.load(datapath + 'stendIdx.npy')
rmIdx=np.load(datapath + 'rmIdx.npy')

inp[0].shape

(36, 50)

In [3]:
# inp.shape (171,)
# inp[0].shape (36,50)

test_idx=[9]
X_all=inp

test_indices = [i for i in range(len(X_all)) if i % 10 in test_idx]
train_indices = [i for i in range(len(X_all)) if i not in test_indices]

print( 'test: ',len(test_indices), ' train: ', len(train_indices), 
                  ' sum:', len(test_indices)+len(train_indices)  )

test:  17  train:  154  sum: 171


In [4]:
X_train = [X_all[i] for i in train_indices]
X_test = [X_all[i] for i in test_indices]

lenEp=[len(oneEp) for oneEp in X_all]

maxLen = 210
pairSkVec_train = [zip(oneEp[:-1], oneEp[1:]) for oneEp in X_train]
pairSkVec_test = [zip(oneEp[:-1], oneEp[1:]) for oneEp in X_test]
#pairSkVec_train = [ (oneEp[:-1], oneEp[1:]) for oneEp in X_train]
#pairSkVec_test = [(oneEp[:-1], oneEp[1:]) for oneEp in X_test]

i,t=0,0
print(len(pairSkVec_train)) #: 154
print(len(pairSkVec_train[i])) #: 35 (varible-length) (원래는 36)
print(len(pairSkVec_train[i][t])) #: 2
print(len(pairSkVec_train[i][t][0])) #: 50
batch_size=32

154
35
2
50


In [5]:
from sequence import pad_sequences
X_train_pad = pad_sequences(pairSkVec_train, dtype='float32', padding='pre', truncating='pre', value=0.0)
X_test_pad = pad_sequences(pairSkVec_test, dtype='float32', padding='pre', truncating='pre', value=0.0)

In [6]:
#X_train_pad = nn.utils.rnn.pack_padded_sequence(X_train_pad, lenEp)

In [7]:
kwargs = {'num_workers': 1, 'pin_memory': True} #if args.cuda else {}
train_loader = torch.utils.data.DataLoader( X_train_pad, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader( X_test_pad, batch_size=batch_size, shuffle=True, **kwargs)

In [8]:
N, D_in=X_train[0].shape
D_out=D_in
embD=50
hidden_size=200
num_layers=1
use_gpu=True
batch_first=True

In [9]:
#model_name='bnlstm'
#if model_name == 'bnlstm':
#    model = LSTM(cell_class=BNLSTMCell, input_size=D_in,
#                 hidden_size=hidden_size, batch_first=batch_first, max_length=maxLen)
#elif model_name == 'lstm':
#    model = LSTM(cell_class=LSTMCell, input_size=D_in,
#                 hidden_size=hidden_size, batch_first=batch_first)
#else:
#    raise ValueError

In [10]:
class Net(nn.Module):
    def __init__(self, inpD, rnn_inpD, hidden_size, rnn_name='bnlstm', maxLen=250, batch_first=True, bias=True):
        super(Net, self).__init__()
        self.batch_first = batch_first
        self.inpD = inpD
        self.rnn_inpD = rnn_inpD
        self.hidden_size = hidden_size
        
        self.fc_in = nn.Linear(inpD, rnn_inpD, bias)
        if rnn_name == 'bnlstm':
            self.rnn = LSTM(cell_class=BNLSTMCell, input_size=rnn_inpD,
                         hidden_size=hidden_size, batch_first=batch_first, max_length=maxLen)
        elif rnn_name == 'lstm':
            self.rnn = LSTM(cell_class=LSTMCell, input_size=rnn_inpD,
                         hidden_size=hidden_size, batch_first=batch_first)
        else:
            raise ValueError    
        self.fc_out = nn.Linear(hidden_size, inpD, bias)
        
    def forward(self, x):
        
        x_rnn_in = F.tanh(self.fc_in(x))
        
        hx = None

        h0 = Variable(x.data.new(x_rnn_in.size(0), hidden_size)
                          .normal_(0, 0.1))
        c0 = Variable(x.data.new(x_rnn_in.size(0), hidden_size)
                          .normal_(0, 0.1))
        hx = (h0, c0)
        o_n, (h_n, c_n) = self.rnn(input_=x_rnn_in, hx=hx)
        if self.batch_first:
            o_n = o_n.transpose(0,1)
            
        out=F.tanh(self.fc_out(o_n))
        
        return out, x_rnn_in

In [11]:
model = Net(D_in,embD,hidden_size, rnn_name='bnlstm',maxLen=250)

#model =torch.nn.DataParallel(model).cuda()
#x=x.cuda(async=True)# there is no difference no matter whether we include async=True or not
#yt=yt.cuda(async=True)#

loss_fn = nn.MSELoss()
params = list(model.parameters())
#optimizer = optim.RMSprop(params=params, lr=1e-4, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=1e-4) 

if use_gpu:
    model.cuda()

In [12]:
def to_np(x):
    return x.data.cpu().numpy()

In [25]:
def test(model, test_loader):
    test_loss = 0
    for test_batch in test_loader:
        test_data = test_batch[:,:,0,:]
        test_target = test_batch[:,:,1,:]
        
        test_data = Variable(test_data, volatile=True)
        test_target = Variable(test_target, volatile=True)
        if use_gpu:
            test_data = test_data.cuda()
            test_target = test_target.cuda()

        model.train(False)

        out, _  = model(test_data)
        
        test_loss += loss_fn(out, test_target).data[-1]

    test_loss /= len(test_loader.dataset)
    print('\t\tTest set loss: {0:0.6f}'.format(test_loss))
    return test_loss

In [27]:
from logger import Logger  # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/04-utils/tensorboard/logger.py

# Set the logger
cur_t = datetime.now()
folder_id='{:%Y%m%d_%H%M%S}'.format(cur_t)
#log_dir=savepath+'./logs/BiLSTM_LA_emb/'+folder_id + '_s' + str(state_size) + '/'
log_dir='./logs/pytorch_emb_story/' + exp_name + '/'+folder_id + '_s' + str(hidden_size) + '/'

logger = Logger(log_dir)

log_interval = 20
save_interval = 10
loss_test_hist=[]

eidx = 0

In [None]:
iter_cnt=0
start_time=time.time()
for epoch in range(eidx, eidx+3000):
    model.train(True)
    for train_batch in train_loader:
        #print(train_batch.shape)
        train_data = train_batch[:,:,0,:]
        train_target = train_batch[:,:,1,:]
        #print(train_data.shape, train_target.shape)
        train_data = Variable(train_data)
        train_target = Variable(train_target)
        if use_gpu:
            train_data = train_data.cuda()
            train_target = train_target.cuda()
        model.zero_grad()
        
        out, _ =model(train_data)
        
        train_loss = loss_fn(out, train_target)
        train_loss.backward()
        clip_grad_norm(parameters=params, max_norm=1)
        optimizer.step()

        if iter_cnt % log_interval == 0:
            print("Epoch: {0}\ttrain_loss: {1:0.6f}".format(epoch, train_loss.data[0]) )
            
            loss_test_hist.append(test(model,test_loader))

            #============ TensorBoard logging ============#
            # (1) Log the scalar values
            info = {
                'loss': train_loss.data[0],
                'val_loss': loss_test_hist[-1]
            }

            for tag, value in info.items():
                logger.scalar_summary(tag, value, epoch+1)

            # (2) Log values and gradients of the parameters (histogram)
            for tag, value in model.named_parameters():
                tag = tag.replace('.', '/')
                logger.histo_summary(tag, to_np(value), epoch+1)
                logger.histo_summary(tag+'/grad', to_np(value.grad), epoch+1)
            #=============================================#
            
        iter_cnt += 1
    if epoch % save_interval == 0:
        save_filename = '{}/{}_h{}_epoch{}'.format(savepath, exp_name, hidden_size, epoch)
        torch.save(model, save_filename)
        print('saving.... by now Elapsed time: ', time.time() - start_time)
    eidx += 1

Epoch: 3	train_loss: 0.045522
		Test set loss: 0.008930
Epoch: 7	train_loss: 0.043307
		Test set loss: 0.008070
64.4683749676
Epoch: 11	train_loss: 0.040625
		Test set loss: 0.006362
Epoch: 15	train_loss: 0.043230
		Test set loss: 0.004274
Epoch: 19	train_loss: 0.040220
		Test set loss: 0.004027
145.422455788
Epoch: 23	train_loss: 0.040594
		Test set loss: 0.004227
Epoch: 27	train_loss: 0.034258
		Test set loss: 0.003843


In [30]:
iter_cnt

10000

In [None]:
class simpleFWLSTM(nn.Module):
    def __init__(self, D_in, lstm_dim, num_layers=1,
                use_bias=True, batch_first=False, dropout=0, **kwargs):
        super(simpleFWLSTM, self).__init__()
        self.D_in = D_in
        self.lsmt_dim = lstm_dim
        self.embD = embD
        self.num_layers = num_layers
        self.encoder = nn.Linear(D_in,embD)
        self.lstm = nn.LSTMCell(embD, lstm_dim)
        self.decoder = nn.Linear(lstm_dim, D_in)
        
    def forward(self, x, future = 0):
        
        outputs = []
        embVec = self.encoder(x, self.embD)
        hx,cx = self.lstm(embVec, self.lstm_dim)
        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
            h_t, c_t = self.lstm(input_t, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(c_t, (h_t2, c_t2))
            outputs += [c_t2]
        #for i in range(future):# if we should predict the future
        #    h_t, c_t = self.lstm1(c_t2, (h_t, c_t))
        #    h_t2, c_t2 = self.lstm2(c_t, (h_t2, c_t2))
        #    outputs += [c_t2]
        outputs = torch.stack(outputs, 1).squeeze(2)
        return outputs    
