In [1]:
import os
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils import data
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.preprocessing import StandardScaler
from datetime import datetime
totimestamp = lambda s: np.int32(time.mktime(datetime.strptime(s, "%d/%m/%Y").timetuple()))
train_window = [totimestamp("01/01/2019"), totimestamp("30/05/2021")]
test_window = [totimestamp("01/06/2021"), totimestamp("30/06/2021")]
BATCH_SIZE    = 10
SEQ_LENGTH    = 2160
EPOCHS        = 40
DROPOUT       = 0.1
NUM_LAYERS    = 10
INPUT_DIM     = 238
OUTPUT_SIZE   = 1
HIDDEN_SIZE   = 60
LEARNING_RATE = 0.001
STATE_DIM     = NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE

ModuleNotFoundError: No module named 'pandas'

In [2]:
df = pd.read_csv('./g-research-crypto-forecasting/train.csv')
df.dropna(axis = 0, inplace = True)

def log_return(series, periods=1):
    return np.log(series).diff(periods=periods)

data = df.set_index("timestamp")
upper_shadow = lambda asset: asset.High - np.maximum(asset.Close,asset.Open)
lower_shadow = lambda asset: np.minimum(asset.Close,asset.Open)- asset.Low
gap          = lambda asset: asset.High - asset.Low
trend        = lambda asset: asset.Close - asset.Open

In [3]:
inform_list = []
for i in range(14):
    inform_list.extend([
        log_return(data[data["Asset_ID"]==i].VWAP,periods=1), \
        log_return(data[data["Asset_ID"]==i].VWAP,periods=2), \
        log_return(data[data["Asset_ID"]==i].VWAP,periods=3), \
        log_return(data[data["Asset_ID"]==i].VWAP,periods=4), \
        log_return(data[data["Asset_ID"]==i].VWAP,periods=5), \
        log_return(data[data["Asset_ID"]==i].VWAP,periods=6), \
        log_return(data[data["Asset_ID"]==i].VWAP,periods=7), \
        log_return(data[data["Asset_ID"]==i].VWAP,periods=8), \
        log_return(data[data["Asset_ID"]==i].VWAP,periods=9), \
        log_return(data[data["Asset_ID"]==i].VWAP,periods=10), \
        log_return(data[data["Asset_ID"]==i].VWAP,periods=50), \
        log_return(data[data["Asset_ID"]==i].VWAP,periods=100), \
        log_return(data[data["Asset_ID"]==i].VWAP,periods=500), \
        upper_shadow(data[data["Asset_ID"]==i]), \
        lower_shadow(data[data["Asset_ID"]==i]), \
        gap(data[data["Asset_ID"]==i]), \
        trend(data[data["Asset_ID"]==i])
    ])
X = pd.concat(inform_list, axis = 1)
y = data[data["Asset_ID"]==0].Target
X = X.loc[y.index].replace([np.inf, -np.inf], np.nan).fillna(0)
X_train = X.loc[train_window[0]:train_window[1]].fillna(0).to_numpy()  # filling NaN's with zeros
y_train = y.loc[train_window[0]:train_window[1]].fillna(0).to_numpy()
X_test = X.loc[test_window[0]:test_window[1]].fillna(0).to_numpy() 
y_test = y.loc[test_window[0]:test_window[1]].fillna(0).to_numpy() 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
dl = DataLoader(X_train_scaled, batch_size = 64, shuffle = True, drop_last = False)

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [4]:
class AE(nn.Module):
    """
    #dim ==> 8 ==> #dim
    """
    def __init__(self):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(238, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 8)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 238),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

In [5]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

model = AE().to(device)
criteon = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

for epoch in tqdm(range(100)):
    running_training_loss = 0.0
    for idx, (x_batch) in enumerate(dl):
        x_batch = x_batch.float().to(device)
        optimizer.zero_grad()
        _, output = model(x_batch)
        loss = criteon(output, x_batch)
        optimizer.zero_grad()
        loss.backward()
        running_training_loss += loss.item()
        optimizer.step()
    if epoch % 10 == 0:
        training_loss = running_training_loss/idx
        print(epoch, 'loss:', training_loss)

  1%|▍                                          | 1/100 [00:26<43:47, 26.54s/it]

0 loss: 0.8977740478416228


 11%|████▌                                     | 11/100 [04:47<38:09, 25.73s/it]

10 loss: 0.8145235352109436


 21%|████████▊                                 | 21/100 [09:06<34:25, 26.15s/it]

20 loss: 0.8066206114010864


 31%|█████████████                             | 31/100 [13:25<29:53, 26.00s/it]

30 loss: 0.8032873481189333


 41%|█████████████████▏                        | 41/100 [17:47<26:11, 26.63s/it]

40 loss: 0.8018188981833761


 51%|█████████████████████▍                    | 51/100 [22:13<21:42, 26.57s/it]

50 loss: 0.8008817999879263


 61%|█████████████████████████▌                | 61/100 [26:39<17:21, 26.70s/it]

60 loss: 0.8001096879772592


 71%|█████████████████████████████▊            | 71/100 [31:01<12:33, 25.98s/it]

70 loss: 0.7996038359066329


 81%|██████████████████████████████████        | 81/100 [35:23<08:18, 26.24s/it]

80 loss: 0.7994603278748269


 91%|██████████████████████████████████████▏   | 91/100 [39:52<04:03, 27.07s/it]

90 loss: 0.7992111459549432


100%|█████████████████████████████████████████| 100/100 [43:48<00:00, 26.29s/it]
