In [1]:
import warnings
warnings.filterwarnings("ignore")

import sys 
sys.path.append("./l1tf")
import pandas_wrapper

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from torch.utils.data import Dataset

from FeatureEngineering import FeatureEngineering

import os
os.environ['CUDA_VISIBLE_DEVICES'] = "2"

SEED = 10
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
np.random.seed(SEED)

data_folder = './data/'

In [2]:
train = pd.read_csv(data_folder + 'NASDAQ_m_l1tf0.005_wFE_train.csv')
test = pd.read_csv(data_folder + 'NASDAQ_m_l1tf0.005_wFE_test.csv')

In [3]:
num_points_for_train = 1950 # see 64 window and predict after 30m
offset = 1
target_col = train.columns.get_loc("close") # target column index
batch_size = 1
num_workers = 4
pin_memory = True
device = 'cuda'
num_epoch = 100
lr = 1e-4

In [4]:
class MyDataset(Dataset):
    def __init__(self, data, window, offset, target_col):
        self.data = torch.Tensor(data)
        self.window = window #num_points_for_train
        self.used_cols = [x for x in range(data.shape[1]) if x!=target_col] # X = except target column
        self.target_col = target_col #y
        
        self.shape = self.__getshape__()
        self.size = self.__getsize__()

    def __getitem__(self, index):
        x = self.data[index:index+self.window, self.used_cols] #for train window length
        y = self.data[index+self.window+offset, self.target_col]
        return x, y

    def __len__(self):
        return len(self.data) -  self.window - offset # train data length
    
    def __getshape__(self):
        return (self.__len__(), *self.__getitem__(0)[0].shape) # row, col
    
    def __getsize__(self):
        return (self.__len__())

In [5]:
total_indices = train.index
train_indices = total_indices[int(len(total_indices)*0):int(len(total_indices)*0.7)] #train index length
valid_indices = total_indices[int(len(total_indices)*0.7):int(len(total_indices)*1.0)] #valid index length

In [6]:
train_dataset = MyDataset(train.loc[train_indices].values, 
                          num_points_for_train, offset, target_col)
val_dataset = MyDataset(train.loc[valid_indices].values, 
                          num_points_for_train, offset, target_col)
test_dataset = MyDataset(test.values, 
                          num_points_for_train, offset, target_col)

In [7]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, 
                                     drop_last=False, 
                                     num_workers=num_workers, pin_memory=pin_memory)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, 
                                     drop_last=False, 
                                     num_workers=num_workers, pin_memory=pin_memory)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, 
                                     drop_last=False, 
                                     num_workers=num_workers, pin_memory=pin_memory)

In [8]:
#simple FCN regression model
device='cuda'
class DNN(nn.Module):
    def __init__(self, in_features, hidden_dim, out_features):
        super().__init__()        
        self.fc1 = nn.Linear(in_features, hidden_dim) 
        self.fc2 = nn.Linear(hidden_dim, hidden_dim*2) 
        self.fc3 = nn.Linear(hidden_dim*2, hidden_dim*4)
        self.fc4 = nn.Linear(hidden_dim*4, hidden_dim*2)
        self.fc5 = nn.Linear(hidden_dim*2, hidden_dim) 
        self.out = nn.Linear(hidden_dim, out_features)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        x = self.fc5(x)
        x = self.out(x)
        return x

In [9]:
model = DNN(in_features= 15,
            hidden_dim = 64,
            out_features=1).to(device) 
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.MSELoss()


for epoch in range(0, num_epoch+1):
    model = model.train()
    train_loss = []
    for i, (x, y) in enumerate(train_loader):
        x = x.to(device)
        y = y.to(device)
        
        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        
        train_loss.append(loss.item())
    
    test_loss1 = [] 
    with torch.no_grad():
        model = model.eval()
        for i, (x, y) in enumerate(val_loader):
            x = x.to(device)
            y = y.to(device)

            outputs = model(x)
            loss = criterion(outputs, y)

            test_loss1.append(loss.item())
            
    if epoch %10 ==0:
        print(f"{epoch} Step Regression Loss: ", np.mean(train_loss), "|| val: ", np.mean(test_loss1) )

0 Step Regression Loss:  44369.40213434537 || val:  21520.53189918106
10 Step Regression Loss:  5554.133003223722 || val:  24709.199625675363


KeyboardInterrupt: 

In [None]:
with torch.no_grad():
    model = model.eval()
    test_loss1 = []
    test_loss2 = []
    test_loss3 = []
    test_loss4 = []
    for i, (x, y) in enumerate(test_loader):
        x = x.to(device)
        y = y.to(device)

        outputs = model(x)
        loss = criterion(outputs, y)
                
        criterion3 = nn.L1Loss()
        loss3 = criterion3(input=outputs, target = y.float().to(device))
                
        loss4 = torch.mean(torch.abs(torch.subtract(outputs, y.float().to(device)) / y.float().to(device)))
        
        test_loss1.append(loss.item())
        test_loss2.append(torch.sqrt(loss).item())
        test_loss3.append(loss3.item())
        test_loss4.append(loss4.item())
        
print("MSE: ", np.mean(test_loss1))
print("rMSE: ", np.mean(test_loss2))
print("MAE: ", np.mean(test_loss3))
print("MAPE: ", np.mean(test_loss4))

In [None]:
torch.save({
    'epoch': 100,
    'model_state_dict' : model.state_dict(),
    'optimizer_state_dict' : optimizer.state_dict(),
    'loss' : criterion
}, './checkpoint-reg-baseline/fc.pt')