In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [2]:
train_o = pd.read_csv("data/train.csv")
test_o = pd.read_csv("data/test.csv")

In [3]:
def fillize(data):
    nans = np.isnan(data["opened_position_qty"])
    return data.fillna(0).assign(nans=nans)

def deidize(data):
    return data.drop("id", axis=1)

def relize(data):
    rel = data.copy()
    for i in range(5,1,-1):
        rel[f"bid{i}"] -= rel[f"bid{i-1}"]
        rel[f"ask{i}"] -= rel[f"ask{i-1}"]

    rel["ask1"] -= rel["last_price"]
    rel["bid1"] -= rel["last_price"]
    return rel

def normize(train, test):
    mu = train["last_price"].mean()
    sigma = train["last_price"].std()
    ntrain = train.copy()
    ntest = test.copy()
    ntrain.loc[:,["last_price", "mid"]] = (ntrain.loc[:,["last_price", "mid"]] - mu) / sigma
    ntest.loc[:,["last_price", "mid"]] = (ntest.loc[:,["last_price", "mid"]] - mu) / sigma
    return ntrain, ntest

In [5]:
train_d = fillize(relize(deidize(train_o)))
test_d = fillize(relize(deidize(test_o)))
train_d, test_d = normize(train_d, test_d)

In [38]:
class StockDataset(Dataset):
    def __init__(self, df):
        self.X = torch.tensor(df.drop("y",axis=1).values.astype(np.float32))
        self.y = torch.tensor(df["y"].values.astype(np.long))
    
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):       
        return (self.X[idx],
                self.y[idx]
               )

In [39]:
train = StockDataset(train_d)

loader = DataLoader(train, batch_size=32)

#next(iter(loader))

In [40]:
model = torch.nn.Sequential(
    nn.Linear(27, 10),
    nn.ReLU(),
    nn.Linear(10,2)
)

In [44]:
model.train()

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for i, (data, target) in enumerate(loader):
    if (i % int(len(loader)/20) == 0):
        print(f"{i} ", end="")
    optimizer.zero_grad()
    output = model(data)
    loss = loss_fn(output, target)
    loss.backward()
    optimizer.step()

0 925 1850 2775 3700 4625 5550 6475 7400 8325 9250 10175 11100 12025 12950 13875 14800 15725 16650 17575 18500 

In [45]:
d, t = next(iter(loader))
p = model(d)
loss_fn(p,t)

tensor(0.5638, grad_fn=<NllLossBackward>)

In [46]:
p

tensor([[-4.5612, -3.3968],
        [-2.9975, -2.2433],
        [-4.2436, -4.0971],
        [-2.2189, -0.9703],
        [-2.7718, -1.0973],
        [-0.0991, -1.1571],
        [-1.1637, -1.8702],
        [-1.7730, -0.7834],
        [-1.9753, -1.9643],
        [-1.6277, -1.0757],
        [-0.2062, -0.6001],
        [-0.6880, -0.3104],
        [-0.7328, -1.0500],
        [-0.2756, -0.3346],
        [ 0.0204, -0.3752],
        [-0.5357,  0.0910],
        [ 0.0204, -0.3752],
        [ 0.0970, -0.7680],
        [-0.2841, -0.8246],
        [-0.0758, -0.4474],
        [-0.2002, -0.4690],
        [-0.2338,  0.1357],
        [ 0.7511, -0.2473],
        [ 0.0134, -0.9611],
        [-0.0405, -0.7741],
        [ 0.2607,  0.1703],
        [-0.4224,  0.1860],
        [ 0.0635, -0.4187],
        [-0.1772, -0.5140],
        [-0.0628, -0.5233],
        [-0.4378, -1.0469],
        [-1.0409,  0.3420]], grad_fn=<AddmmBackward>)