In [8]:
import os
import time
import numpy as np
import pandas as pd
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils import data
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('/kaggle/input/g-research-crypto-forecasting/train.csv')
df.dropna(axis = 0, inplace = True)

# auxiliary function, from datetime to timestamp
totimestamp = lambda s: np.int32(time.mktime(datetime.strptime(s, "%d/%m/%Y").timetuple()))

In [9]:
train_window = [totimestamp("01/05/2021"), totimestamp("30/05/2021")]
test_window = [totimestamp("01/06/2021"), totimestamp("30/06/2021")]
BATCH_SIZE = 5
SEQ_LENGTH = 20

def log_return(series, periods=1):
    return np.log(series).diff(periods=periods)

def create_xy_pairs(X_series, y_series, seq_length):
    data_length = len(X_series)
    pairs = []
    for idx in range(data_length - seq_length):
        x = X_series[idx:idx + seq_length]
        y = y_series[idx + seq_length:idx + seq_length + 1]
        pairs.append((x, y))
    return pairs

In [10]:
data = df.set_index("timestamp")
upper_shadow = lambda asset: asset.High - np.maximum(asset.Close,asset.Open)
lower_shadow = lambda asset: np.minimum(asset.Close,asset.Open)- asset.Low

X = pd.concat([log_return(data[data["Asset_ID"]==0].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==0].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==0]), lower_shadow(data[data["Asset_ID"]==0]), \
               log_return(data[data["Asset_ID"]==1].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==1].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==1]), lower_shadow(data[data["Asset_ID"]==1]), \
               log_return(data[data["Asset_ID"]==2].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==2].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==2]), lower_shadow(data[data["Asset_ID"]==2]), \
               log_return(data[data["Asset_ID"]==3].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==3].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==3]), lower_shadow(data[data["Asset_ID"]==3]), \
               log_return(data[data["Asset_ID"]==4].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==4].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==4]), lower_shadow(data[data["Asset_ID"]==4]), \
               log_return(data[data["Asset_ID"]==5].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==5].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==5]), lower_shadow(data[data["Asset_ID"]==5]), \
               log_return(data[data["Asset_ID"]==6].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==6].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==6]), lower_shadow(data[data["Asset_ID"]==6]), \
               log_return(data[data["Asset_ID"]==7].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==7].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==7]), lower_shadow(data[data["Asset_ID"]==7]), \
               log_return(data[data["Asset_ID"]==8].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==8].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==8]), lower_shadow(data[data["Asset_ID"]==8]), \
               log_return(data[data["Asset_ID"]==9].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==9].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==9]), lower_shadow(data[data["Asset_ID"]==9]), \
               log_return(data[data["Asset_ID"]==10].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==10].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==10]), lower_shadow(data[data["Asset_ID"]==10]), \
               log_return(data[data["Asset_ID"]==11].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==11].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==11]), lower_shadow(data[data["Asset_ID"]==11]), \
               log_return(data[data["Asset_ID"]==12].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==12].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==12]), lower_shadow(data[data["Asset_ID"]==12]), \
               log_return(data[data["Asset_ID"]==13].VWAP,periods=5), \
               log_return(data[data["Asset_ID"]==13].VWAP,periods=1).abs(), \
               upper_shadow(data[data["Asset_ID"]==13]), lower_shadow(data[data["Asset_ID"]==13]), \
              ], axis=1)
y = data[data["Asset_ID"]==0].Target

X = X.loc[y.index].fillna(0)
    
X_train = X.loc[train_window[0]:train_window[1]].fillna(0).to_numpy()  # filling NaN's with zeros
y_train = y.loc[train_window[0]:train_window[1]].fillna(0).to_numpy()  

X_test = X.loc[test_window[0]:test_window[1]].fillna(0).to_numpy() 
y_test = y.loc[test_window[0]:test_window[1]].fillna(0).to_numpy() 

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
    
    
train = create_xy_pairs(X_train_scaled, y_train, SEQ_LENGTH)
test = create_xy_pairs(X_test_scaled, y_test, SEQ_LENGTH)

train_dl = DataLoader(train, batch_size = BATCH_SIZE, shuffle = False, drop_last = True)
test_dl = DataLoader(test, batch_size = BATCH_SIZE, shuffle = False, drop_last = True)

In [11]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

EPOCHS        = 100
DROPOUT       = 0.1
NUM_LAYERS    = 2
INPUT_DIM     = 56
OUTPUT_SIZE   = 1
HIDDEN_SIZE   = 200
LEARNING_RATE = 0.0001
STATE_DIM     = NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_prob):
        super(LSTM, self).__init__()

        self.num_layers = num_layers
        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob)
        self.dropout = nn.Dropout(dropout_prob)
        self.linear = nn.Linear(hidden_size, output_size)

    def init_hidden(self, batch_size):
        state_dim = (self.num_layers, batch_size, self.hidden_size)
        return (torch.zeros(state_dim).to(device), torch.zeros(state_dim).to(device))

    def forward(self, x, states):
        x, (h, c) = self.lstm(x, states)
        out = self.linear(x)
        return out, (h, c)

model = LSTM(
    INPUT_DIM,
    HIDDEN_SIZE,
    NUM_LAYERS,
    OUTPUT_SIZE,
    DROPOUT
).to(device)

criterion = nn.MSELoss()
optimizer = optim.AdamW(model.linear.parameters(), lr=LEARNING_RATE, weight_decay=0.01)

In [12]:
def training(model, epochs, validate_every=2):

    training_losses = []
    coefficient = []
    
    
    # Initialize hidden and cell states with dimension:
    # (num_layers * num_directions, batch, hidden_size)
    states = model.init_hidden(BATCH_SIZE)

    for epoch in tqdm(range(epochs)):

        running_training_loss = 0.0
        
        model.train()
        
        # Begin training
        for idx, (x_batch, y_batch) in enumerate(train_dl):
            # Convert to Tensors
            x_batch = x_batch.float().to(device)
            y_batch = y_batch.float().to(device)
      
            # Truncated Backpropagation
            states = [state.detach() for state in states]          

            optimizer.zero_grad()

            # Make prediction
            output, states = model(x_batch, states)

            # Calculate loss
            loss = criterion(output[:, -1, :], y_batch)
            loss.backward()
            running_training_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
            optimizer.step()
        
        # Average loss across timesteps
        training_losses.append(running_training_loss / len(train_dl))
        
        #predict
        if (epoch+1) % 5 == 0:
            # Set to eval mode
            model.eval()
            #torch.no_grad()
            
            pred = []
            for idx, (x_batch, y_batch) in enumerate(test_dl):
                # Convert to Tensors
                x_batch = x_batch.float().to(device)
                y_batch = y_batch.float().to(device)
                
                validation_states = [state.detach() for state in states]
                output, _ = model(x_batch, validation_states)
                output = output[:, -1, :].flatten().tolist()
                pred += output
            pred_btc = np.array(pred)
            test_btc = y_test[20:len(pred_btc)+20]
            coef = np.corrcoef(pred_btc, test_btc)[0,1]
            coefficient.append(coef)
            print('Epoch %i : corrcoef is %f.' %(epoch+1,coef))
        
        
    # Visualize loss
    epoch_count = range(1, len(training_losses) + 1)
    plt.plot(epoch_count, training_losses, 'r--')
    plt.legend(['Training Loss'])
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.show()

    coef_epoch_count = range(5, (len(coefficient)+1)*5, 5)
    plt.plot(coef_epoch_count, coefficient, 'b--')
    plt.xticks(coef_epoch_count)
    plt.legend(['Correlation Coefficient'])
    plt.xlabel('Epoch')
    plt.ylabel('Coefficient')
    plt.show()

In [13]:
training(model, 100)

In [14]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

EPOCHS        = 100
DROPOUT       = 0.1
NUM_LAYERS    = 3
INPUT_DIM     = 56
OUTPUT_SIZE   = 1
HIDDEN_SIZE   = 100
LEARNING_RATE = 0.0001
STATE_DIM     = NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_prob):
        super(LSTM, self).__init__()

        self.num_layers = num_layers
        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob)
        self.dropout = nn.Dropout(dropout_prob)
        self.linear = nn.Linear(hidden_size, output_size)

    def init_hidden(self, batch_size):
        state_dim = (self.num_layers, batch_size, self.hidden_size)
        return (torch.zeros(state_dim).to(device), torch.zeros(state_dim).to(device))

    def forward(self, x, states):
        x, (h, c) = self.lstm(x, states)
        out = self.linear(x)
        return out, (h, c)

model = LSTM(
    INPUT_DIM,
    HIDDEN_SIZE,
    NUM_LAYERS,
    OUTPUT_SIZE,
    DROPOUT
).to(device)

criterion = nn.MSELoss()
optimizer = optim.AdamW(model.linear.parameters(), lr=LEARNING_RATE, weight_decay=0.01)

In [15]:
training(model, 40)

In [16]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

EPOCHS        = 100
DROPOUT       = 0.1
NUM_LAYERS    = 4
INPUT_DIM     = 56
OUTPUT_SIZE   = 1
HIDDEN_SIZE   = 56
LEARNING_RATE = 0.0001
STATE_DIM     = NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_prob):
        super(LSTM, self).__init__()

        self.num_layers = num_layers
        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob)
        self.dropout = nn.Dropout(dropout_prob)
        self.linear = nn.Linear(hidden_size, output_size)

    def init_hidden(self, batch_size):
        state_dim = (self.num_layers, batch_size, self.hidden_size)
        return (torch.zeros(state_dim).to(device), torch.zeros(state_dim).to(device))

    def forward(self, x, states):
        x, (h, c) = self.lstm(x, states)
        out = self.linear(x)
        return out, (h, c)

model = LSTM(
    INPUT_DIM,
    HIDDEN_SIZE,
    NUM_LAYERS,
    OUTPUT_SIZE,
    DROPOUT
).to(device)

criterion = nn.MSELoss()
optimizer = optim.AdamW(model.linear.parameters(), lr=LEARNING_RATE, weight_decay=0.01)
training(model, 50)

In [17]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

EPOCHS        = 100
DROPOUT       = 0.1
NUM_LAYERS    = 10
INPUT_DIM     = 56
OUTPUT_SIZE   = 1
HIDDEN_SIZE   = 56
LEARNING_RATE = 0.0001
STATE_DIM     = NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_prob):
        super(LSTM, self).__init__()

        self.num_layers = num_layers
        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob)
        self.dropout = nn.Dropout(dropout_prob)
        self.linear = nn.Linear(hidden_size, output_size)

    def init_hidden(self, batch_size):
        state_dim = (self.num_layers, batch_size, self.hidden_size)
        return (torch.zeros(state_dim).to(device), torch.zeros(state_dim).to(device))

    def forward(self, x, states):
        x, (h, c) = self.lstm(x, states)
        out = self.linear(x)
        return out, (h, c)

model = LSTM(
    INPUT_DIM,
    HIDDEN_SIZE,
    NUM_LAYERS,
    OUTPUT_SIZE,
    DROPOUT
).to(device)

criterion = nn.MSELoss()
optimizer = optim.AdamW(model.linear.parameters(), lr=LEARNING_RATE, weight_decay=0.01)
training(model, 100)

In [18]:
training(model, 100)