In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import argparse
import math
import time
import numpy as np;
import importlib

import torch.optim
from torch.autograd import Variable


In [11]:
class LSTNet(nn.Module):
    def __init__(self, args, data):
        super(LSTNet, self).__init__()
        self.P = args.window;
        self.m = data.m
        self.hidR = args.hidRNN;
        self.hidC = args.hidCNN;
        self.hidS = args.hidSkip;
        self.Ck = args.CNN_kernel;
        self.skip = args.skip;
        self.pt = int((self.P - self.Ck)/self.skip)
        self.hw = args.highway_window
        self.conv1 = nn.Conv2d(1, self.hidC, kernel_size = (self.Ck, self.m));
        self.GRU1 = nn.GRU(self.hidC, self.hidR);
        self.dropout = nn.Dropout(p = args.dropout);
        if (self.skip > 0):
            self.GRUskip = nn.GRU(self.hidC, self.hidS);
            self.linear1 = nn.Linear(self.hidR + self.skip * self.hidS, self.m);
        else:
            self.linear1 = nn.Linear(self.hidR, self.m);
        self.output = None;
        if (args.output_fun == 'sigmoid'):
            self.output = F.sigmoid;
        if (args.output_fun == 'tanh'):
            self.output = F.tanh;
 
    def forward(self, x):
        batch_size = x.size(0);
        
        #CNN
        c = x.view(-1, 1, self.P, self.m);
        c = F.relu(self.conv1(c));
        c = self.dropout(c);
        c = torch.squeeze(c, 3);
        
        # RNN 
        r = c.permute(2, 0, 1).contiguous();
        _, r = self.GRU1(r);
        r = self.dropout(torch.squeeze(r,0));

        
        #skip-rnn
        
        if (self.skip > 0):
            s = c[:,:, int(-self.pt * self.skip):].contiguous();
            s = s.view(batch_size, self.hidC, self.pt, self.skip);
            s = s.permute(2,0,3,1).contiguous();
            s = s.view(self.pt, batch_size * self.skip, self.hidC);
            _, s = self.GRUskip(s);
            s = s.view(batch_size, self.skip * self.hidS);
            s = self.dropout(s);
            r = torch.cat((r,s),1);
        
        res = self.linear1(r);
        
            
        if (self.output):
            res = self.output(res);
        return res;

In [12]:
def normal_std(x):
    return x.std() * np.sqrt((len(x) - 1.)/(len(x)))

class Data_utility(object):
    # train and valid is the ratio of training set and validation set. test = 1 - train - valid
    def __init__(self, file_name, train, valid,  horizon, window, normalize = 2):
#         self.cuda = cuda;
        self.P = window
        self.h = horizon
        fin = open(file_name)
        self.rawdat = np.loadtxt(fin,delimiter=',')
        self.dat = np.zeros(self.rawdat.shape)
        self.n, self.m = self.dat.shape
        self.normalize = 2
        self.scale = np.ones(self.m)
        self._normalized(normalize)
        self._split(int(train * self.n), int((train+valid) * self.n), self.n)
        self.scale = torch.from_numpy(self.scale).float()
        tmp = self.test[1] * self.scale.expand(self.test[1].size(0), self.m)

        self.rse = normal_std(tmp)
        self.rae = torch.mean(torch.abs(tmp - torch.mean(tmp)))
    
    def _normalized(self, normalize):
        #normalized by the maximum value of entire matrix.
       
        if (normalize == 0):
            self.dat = self.rawdat
            
        if (normalize == 1):
            self.dat = self.rawdat / np.max(self.rawdat)
            
        #normlized by the maximum value of each row(sensor).
        if (normalize == 2):
            for i in range(self.m):
                self.scale[i] = np.max(np.abs(self.rawdat[:,i]))
                self.dat[:,i] = self.rawdat[:,i] / np.max(np.abs(self.rawdat[:,i]))
            
        
    def _split(self, train, valid, test):
        
        train_set = range(self.P+self.h-1, train)
        valid_set = range(train, valid)
        test_set = range(valid, self.n)
        self.train = self._batchify(train_set, self.h)
        self.valid = self._batchify(valid_set, self.h)
        self.test = self._batchify(test_set, self.h)
        
        
    def _batchify(self, idx_set, horizon):
        
        n = len(idx_set)
        X = torch.zeros((n,self.P,self.m))
        Y = torch.zeros((n,self.m))
        
        for i in range(n):
            end = idx_set[i] - self.h + 1
            start = end - self.P
            X[i,:,:] = torch.from_numpy(self.dat[start:end, :])
            Y[i,:] = torch.from_numpy(self.dat[idx_set[i], :])

        return [X, Y]

    def get_batches(self, inputs, targets, batch_size, shuffle=True):
        length = len(inputs)
        if shuffle:
            index = torch.randperm(length)
        else:
            index = torch.LongTensor(range(length))
        start_idx = 0

        while (start_idx < length):
            end_idx = min(length, start_idx + batch_size)
            excerpt = index[start_idx:end_idx]
            X = inputs[excerpt]; Y = targets[excerpt]
#             if (self.cuda):
#                 X = X.cuda()
#                 Y = Y.cuda()  
            yield Variable(X), Variable(Y)
            start_idx += batch_size

In [13]:
def evaluate(data, X, Y, model, evaluateL2, evaluateL1, batch_size):
    model.eval()
    total_loss = 0
    total_loss_l1 = 0
    n_samples = 0
    predict = None
    test = None
    
    for X, Y in data.get_batches(X, Y, batch_size, False):
        output = model(X)
        if predict is None:
            predict = output
            test = Y
        else:
            predict = torch.cat((predict,output))
            test = torch.cat((test, Y))
        
        scale = data.scale.expand(output.size(0), data.m)
        total_loss += evaluateL2(output * scale, Y * scale).data
        total_loss_l1 += evaluateL1(output * scale, Y * scale).data
        n_samples += (output.size(0) * data.m)
    rse = math.sqrt(total_loss / n_samples)/data.rse
    rae = (total_loss_l1/n_samples)/data.rae
    
    predict = predict.data.cpu().numpy()
    Ytest = test.data.cpu().numpy()
    sigma_p = (predict).std(axis = 0)
    sigma_g = (Ytest).std(axis = 0)
    mean_p = predict.mean(axis = 0)
    mean_g = Ytest.mean(axis = 0)
    index = (sigma_g!=0)
    correlation = ((predict - mean_p) * (Ytest - mean_g)).mean(axis = 0)/(sigma_p * sigma_g)
    correlation = (correlation[index]).mean()
    return rse, rae, correlation

def train(data, X, Y, model, criterion, batch_size):
    model.train()
    total_loss = 0
    n_samples = 0
    for X, Y in data.get_batches(X, Y, batch_size, False):
        model.zero_grad()
        output = model(X)
        scale = data.scale.expand(output.size(0), data.m)
        loss = criterion(output * scale, Y * scale)
        loss.backward()
#         grad_norm = optim.step()
        total_loss += loss.data
        n_samples += (output.size(0) * data.m)
    return total_loss

In [14]:
class Arguments():
    def __init__(self,data,hidCNN=100,hidRNN=100,window=35,CNN_kernel=6,highway_window=24,clip=10,epochs=100,batch_size=128,dropout=0.2,seed=54321,save="save.pt",optim="adam",lr=0.001,horizon=12,skip=24,hidskip=5,L1loss=True,normalize=2,output_fun="sigmoid"):
        self.data=data
        self.hidCNN=hidCNN
        self.hidRNN=hidRNN
        self.window=window
        self.CNN_kernel=CNN_kernel
        self.highway_window=highway_window
        self.clip=clip
        self.epochs=epochs
        self.batch_size=batch_size
        self.dropout=dropout
        self.seed=seed
        self.optim=optim
        self.lr=lr
        self.skip=skip
        self.normalize=normalize
        self.horizon=horizon
        self.save=save
        self.output_fun=output_fun
        self.hidSkip=hidskip
        self.L1Loss=L1loss

In [28]:
args=Arguments(horizon=24,hidCNN=50, hidRNN=50,L1loss=True,data="data/exchange_rate.txt",save="save/exch.pt",output_fun=None)


In [29]:
Data = Data_utility(args.data, 0.6, 0.2, args.horizon, args.window, args.normalize);
print(Data.rse)

tensor(0.4558)


In [30]:
model = LSTNet(args, Data)
nParams = sum([p.nelement() for p in model.parameters()])
print('* number of parameters: %d' % nParams)

if args.L1Loss:
    criterion = nn.L1Loss(size_average=False);
else:
    criterion = nn.MSELoss(size_average=False);
evaluateL2 = nn.MSELoss(size_average=False);
evaluateL1 = nn.L1Loss(size_average=False)

best_val = 10000000;

# optim = Optim.Optim(
#     model.parameters(), args.optim, args.lr, args.clip,
# )


* number of parameters: 19973


In [31]:

try:
    print('begin training');
    for epoch in range(1, args.epochs+1):
        epoch_start_time = time.time()
        train_loss = train(Data, Data.train[0], Data.train[1], model, criterion, args.batch_size)
        val_loss, val_rae, val_corr = evaluate(Data, Data.valid[0], Data.valid[1], model, evaluateL2, evaluateL1, args.batch_size);
        print('| end of epoch {:3d} | time: {:5.2f}s | train_loss {:5.4f} | valid rse {:5.4f} | valid rae {:5.4f} | valid corr  {:5.4f}'.format(epoch, (time.time() - epoch_start_time), train_loss, val_loss, val_rae, val_corr))
        # Save the model if the validation loss is the best we've seen so far.

        if val_loss < best_val:
            with open(args.save, 'wb') as f:
                torch.save(model, f)
            best_val = val_loss
        if epoch % 5 == 0:
            test_acc, test_rae, test_corr  = evaluate(Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, args.batch_size);
            print ("test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}".format(test_acc, test_rae, test_corr))

except KeyboardInterrupt:
    print('-' * 89)
    print('Exiting from training early')

# Load the best saved model.
with open(args.save, 'rb') as f:
    model = torch.load(f)
test_acc, test_rae, test_corr  = evaluate(Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, args.batch_size);
print ("test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}".format(test_acc, test_rae, test_corr))


begin training
| end of epoch   1 | time:  1.42s | train_loss 21752.0898 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of epoch   2 | time:  1.34s | train_loss 21764.4180 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of epoch   3 | time:  1.34s | train_loss 21755.4512 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of epoch   4 | time:  1.34s | train_loss 21762.7930 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of epoch   5 | time:  1.31s | train_loss 21751.4375 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
test rse 1.8149 | test rae 2.0701 | test corr 0.0042
| end of epoch   6 | time:  1.34s | train_loss 21754.3965 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of epoch   7 | time:  1.34s | train_loss 21716.2441 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of epoch   8 | time:  1.31s | train_loss 21749.4043 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0

| end of epoch  66 | time:  1.40s | train_loss 21778.3574 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of epoch  67 | time:  1.37s | train_loss 21725.3438 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of epoch  68 | time:  1.36s | train_loss 21757.1133 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of epoch  69 | time:  1.29s | train_loss 21737.8867 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of epoch  70 | time:  1.33s | train_loss 21758.8594 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
test rse 1.8149 | test rae 2.0701 | test corr 0.0042
| end of epoch  71 | time:  1.34s | train_loss 21690.5840 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of epoch  72 | time:  1.23s | train_loss 21788.3711 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of epoch  73 | time:  1.35s | train_loss 21762.7129 | valid rse 1.9185 | valid rae 2.1628 | valid corr  0.0092
| end of ep