In [31]:
import pandas as pd
import datetime
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import numpy as np
import argparse
from copy import deepcopy # Add Deepcopy for args
from sklearn.metrics import mean_absolute_error

import seaborn as sns
import matplotlib.pyplot as plt

print(torch.__version__)
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (15, 9)
import chart_studio.plotly as py
import cufflinks as cf
cf.go_offline(connected=True)

1.7.1
Populating the interactive namespace from numpy and matplotlib


In [3]:
class CoinDataset(Dataset):
    def __init__(self, csv_path, x_frames, y_frames, start, end):
        self.x_frames = x_frames
        self.y_frames = y_frames

        _time_start = datetime.datetime(*start)
        _time_end = datetime.datetime(*end)
        _start = _time_start.strftime('%Y-%m-%d')
        _end = _time_end.strftime('%Y-%m-%d')

        _data = pd.read_csv(csv_path)
        _data = _data.set_index('Timestamp')
        _data = _data[_start:_end]
        self.data = _data

        #print(self.data.isna().sum())
        print(f'Dataset length: {len(self.data)}')
        
    def __len__(self):
        return len(self.data) - (self.x_frames + self.y_frames) + 1
    
    def __getitem__(self, idx):
        idx += self.x_frames
        data = self.data.iloc[idx-self.x_frames:idx+self.y_frames]
        data = data[['Close', 'Open', 'High', 'Low']]
        price = self.data.iloc[idx-1]
        # print('X\n', data[:self.x_frames])
        # print('y\n', data[self.x_frames:])
        data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1))
        data = data.values
        X = data[:self.x_frames]
        y = data[self.x_frames:]
        
        return X, y, price.values


In [4]:
# Model Define
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, batch_size, dropout, use_bn):
        super(LSTM, self).__init__()
        self.input_dim = input_dim 
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers

        self.batch_size = batch_size
        self.dropout = dropout
        self.use_bn = use_bn 
        
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
        self.hidden = self.init_hidden()
        self.regressor = self.make_regressor()
        
    def init_hidden(self):
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
    
    def make_regressor(self):
        layers = []
        if self.use_bn:
            layers.append(nn.BatchNorm1d(self.hidden_dim))
        layers.append(nn.Dropout(self.dropout))
        
        layers.append(nn.Linear(self.hidden_dim, self.hidden_dim // 2))
        layers.append(nn.ReLU())
        layers.append(nn.Linear(self.hidden_dim // 2, self.output_dim))
        regressor = nn.Sequential(*layers)
        return regressor
    
    def forward(self, x):
        lstm_out, self.hidden = self.lstm(x, self.hidden)
        y_pred = self.regressor(lstm_out[-1].view(self.batch_size, -1))
        return y_pred

In [75]:
# Load Model
MODEL_PATH = './models/test2.tar'
checkpoint = torch.load(MODEL_PATH)
args = checkpoint['args']
print(args)
args.batch_size = 1

# Load Dataset
DATASET_PATH = '/home/greenstar/sensorcloud/LSTM-playground/coin_desk_data.csv'
testset = CoinDataset(DATASET_PATH, args.x_frames, args.y_frames, (2020,1,1), (2021,1,12))

# Model Define
model = LSTM(args.input_dim, args.hid_dim, args.y_frames, args.n_layers, args.batch_size, args.dropout, args.use_bn)
model.to(args.device)
# Optimizer Define
if args.optim == 'SGD':
    optimizer = optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.l2)
elif args.optim == 'RMSprop':
    optimizer = optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.l2)
elif args.optim == 'Adam':
    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2)
else:
    raise ValueError('In-valid optimizer choice')

# Restore Model State
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

model.eval()



testloader = DataLoader(testset, 
                        batch_size=args.batch_size, 
                        shuffle=False, drop_last=True)

test_acc = 0.0
with torch.no_grad():
    for i, (X, y, price) in enumerate(testloader):

        X = X.transpose(0, 1).float().to(args.device)
        y_true = y[:, :, 0].float().to(args.device)
        model.hidden = [hidden.to(args.device) for hidden in model.init_hidden()]

        y_pred = model(X)
        base_price = price[0][0].numpy()
        #print(np.exp(X[:, :, 0].cpu().detach().numpy()) * base_price)
        print(np.exp(y_pred.cpu().detach().numpy()) * base_price)
        print(np.exp(y[:, :, 0].numpy()) * base_price)
        if i == 30:
            break
        #print(f'i: {i}, y_pred: {y_pred}')
        #test_acc += metric(y_pred, y_true)[0]

#test_acc = test_acc / len(testloader)


Namespace(batch_size=128, csv_path='/home/greenstar/sensorcloud/LSTM-playground/coin_desk_data.csv', device='cuda', dropout=0.0, epoch=100, exp_name='exp180_30', hid_dim=50, input_dim=4, l2=1e-05, lr=0.0001, n_layers=4, optim='Adam', use_bn=True, x_frames=180, y_frames=30)
Dataset length: 378
[[8768.538  9061.599  8464.683  9481.761  8805.284  9455.12   9655.404
  9343.487  8667.028  8546.325  8561.049  8655.079  8992.681  8177.4023
  9080.986  8114.343  9287.686  9767.201  9129.125  9080.374  9088.412
  9359.443  8885.333  9022.485  8985.848  9525.77   8526.945  9230.024
  9859.427  8579.242 ]]
[[ 9101.84122494  9188.04298859  9148.43086222  9236.2930356
   9097.78898311  9094.31656262  9124.64217476  9055.46079082
   9278.77719334  9244.39118212  9471.71014536  9236.11953811
   9243.15076422  9229.83673572  9286.70771938  9238.96972335
   9260.44456844  9211.00550089  9133.76545949  9160.39526904
   9176.66926387  9190.25290238  9172.57166051  9395.02311769
   9530.73451513  9617.311

In [76]:
X_data = np.exp(X[:, :, 0].cpu().detach().numpy()) * base_price
y_real_data = (np.exp(y[:, :, 0].numpy()) * base_price)[0]
y_pred_data = (np.exp(y_pred.cpu().detach().numpy()) * base_price)[0]

y_data = np.transpose([y_real_data, y_pred_data])

df_y = pd.DataFrame(y_data, columns=['real', 'pred'], index=range(180,210))
df_x = pd.DataFrame(X_data, columns=['X'])

df = pd.concat([df_x, df_y])

df.iplot(kind='line')