Copyright 2021 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F

from sklearn.decomposition import PCA
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader

# Model Definition

In [3]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()

        # "Other" group components, 2 fully connected layers
        self.other_fc1 = nn.Linear(80, 256)
        self.other_ReLU1 = nn.ReLU()
        self.other_batch_norm1 = nn.BatchNorm1d(256)
        self.other_dropout1 = nn.Dropout(0.8)
        self.other_fc2 = nn.Linear(256, 128)
        self.other_ReLU2 = nn.ReLU()
        self.other_batch_norm2 = nn.BatchNorm1d(128)
        self.other_dropout2 = nn.Dropout(0.6)

        # Top 49 PCA features components + feature_0 + weights
        self.pca_fc = nn.Linear(51, 256)
        self.pca_ReLU = nn.ReLU() # duplicate cause bad code nice
        self.pca_batch_norm = nn.BatchNorm1d(256)
        self.pca_dropout = nn.Dropout(0.6)

        # Final layer over concatenated results
        self.final_fc1 = nn.Linear(384, 128)
        self.final_ReLU = nn.ReLU()
        self.final_batch_norm = nn.BatchNorm1d(128)
        self.final_dropout = nn.Dropout(0.2)
        self.final_fc2 = nn.Linear(128, 5)
        self.final_sigmoid = nn.Sigmoid()

    def forward(self, pca_x, other_x):
        # "Other" feature computations
        other_ret = self.other_fc1(other_x)
        other_ret = self.other_ReLU1(other_ret)
        other_ret = self.other_batch_norm1(other_ret)
        other_ret = self.other_dropout1(other_ret)
        other_ret = self.other_fc2(other_ret)
        other_ret = self.other_ReLU2(other_ret)
        other_ret = self.other_batch_norm2(other_ret)
        other_ret = self.other_dropout2(other_ret)

        # Top 49 PCA features components + feature_0 + weights
        pca_ret = self.pca_fc(pca_x)
        pca_ret = self.pca_ReLU(pca_ret)
        pca_ret = self.pca_batch_norm(pca_ret)
        pca_ret = self.pca_dropout(pca_ret)

        # Final fully connected layer
        x = torch.cat((pca_ret, other_ret), dim=1)
        ret = self.final_fc1(x)
        ret = self.final_ReLU(ret)
        ret = self.final_batch_norm(ret)
        ret = self.final_dropout(ret)
        ret = self.final_fc2(ret)
        ret = self.final_sigmoid(ret)

        return ret 


# Custom Binary Cross Entropy

In [4]:
# Custom weighted binary cross entropy function
def weighted_binary_cross_entropy(y_pred, y_true, device):
    epsilon = 1e-9
    floated_true = (y_true > 0).float()
    floated_pred = (y_pred > 0.5).float()
    correct = (floated_true == floated_pred).float()

    # 1.02 tuning factor for case where predicted action is to trade but the true action is not to trade
    loss = (floated_true * torch.log(y_pred + epsilon)) + \
           1.02 * ((1 - floated_true) * torch.log(1 - y_pred + epsilon))

    # Equal weighting between resp and resp_{1, 2, 3, 4} and penalizes wrong judgments more based on W_resp
    loss = loss * torch.tensor([[0.5, 0.125, 0.125, 0.125, 0.125]]).to(device) * ((1 - correct) * torch.abs(y_true) + 1)
    return torch.neg(torch.sum(loss))

# Loading Data

In [5]:
# PyTorch class for loading data
class Dataset(torch.utils.data.Dataset):
  def __init__(self, x1, x2, y):
        self.x1 = torch.Tensor(x1.values)
        self.x2 = torch.Tensor(x2.values)
        self.y = torch.Tensor(y)

  def __len__(self):
        return len(self.y)

  def __getitem__(self, index):
        # Load data and get label
        return self.x1[index], self.x2[index], self.y[index]

# Loads training data from path and normalizes + splits it into the "PCA" / "Other" buckets
def load_data(path):
    print("loading data...")
    train = pd.read_csv(path)
    
    # Divide the features into the top 49 PCA features components + feature_0 + weights and the Other features
    all_features = ['feature_'+str(i) for i in range(0,130)]
    pca_features = ['feature_56', 'feature_24', 'feature_87', 'feature_126', 'feature_50', 'feature_77', 'feature_12', 
                    'feature_63', 'feature_2', 'feature_1', 'feature_59', 'feature_123', 'feature_3', 'feature_44', 
                    'feature_49', 'feature_58', 'feature_55', 'feature_8', 'feature_60', 'feature_18', 'feature_51', 
                    'feature_78', 'feature_35', 'feature_41', 'feature_52', 'feature_121', 'feature_54', 'feature_40', 
                    'feature_57', 'feature_6', 'feature_74', 'feature_98', 'feature_83', 'feature_70', 'feature_82', 
                    'feature_9', 'feature_16', 'feature_43', 'feature_34', 'feature_125', 'feature_45', 'feature_72', 
                    'feature_69', 'feature_92', 'feature_65', 'feature_4', 'feature_28', 'feature_36', 'feature_117', 
                    'feature_0', 'weight']
    other_features= [f for f in all_features if f not in pca_features]
    
    # Normalize data based on entire dataset, fill in any blanks with the mean
    mean = train.mean()
    abs_max = train.abs().max(axis=0)
    train = train[train["weight"]!=0]
    train = train.fillna(mean)/abs_max
    train = train.reset_index(drop=True)

    # Split data into x1, x2, y
    pca_x = train[pca_features]
    other_x = train[other_features]
    
    # y is calculated as weight * resp
    x_train_weights = np.expand_dims(train['weight'].values, axis=-1) 
    x_train_resps = train[['resp', 'resp_1', 'resp_2', 'resp_3', 'resp_4']].values
    y = (x_train_weights * x_train_resps)

    return pca_x, other_x, y

# Train Model

In [6]:
DATA_LOCATION = "train.csv" # Replace with train data path
EPOCHS = 15
BATCH_SIZE = 4096
LR = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_model(save_name):
    
    pca_x, other_x, y = load_data(DATA_LOCATION)

    model = MLP()
    model.to(DEVICE)
    model.train()
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR)

    training_set = Dataset(pca_x, other_x, y)
    training_generator = DataLoader(training_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

    print("training model...") 
    for epoch in range(EPOCHS): 
        print(f"[epoch {epoch}]")
        
        for pca_batch, other_batch, y_batch in training_generator:
            optimizer.zero_grad()
            preds = model(pca_batch.to(DEVICE), other_batch.to(DEVICE))
            loss = weighted_binary_cross_entropy(preds, y_batch.to(DEVICE), DEVICE)
            loss.backward()
            optimizer.step()

    torch.save(model, save_name)

train_model("model1.pth")
train_model("model2.pth")
train_model("model3.pth")

loading data...
training model...
[epoch 0]
[epoch 1]
[epoch 2]
[epoch 3]
[epoch 4]
[epoch 5]
[epoch 6]
[epoch 7]
[epoch 8]
[epoch 9]
[epoch 10]
[epoch 11]
[epoch 12]
[epoch 13]
[epoch 14]
loading data...
training model...
[epoch 0]
[epoch 1]
[epoch 2]
[epoch 3]
[epoch 4]
[epoch 5]
[epoch 6]
[epoch 7]
[epoch 8]
[epoch 9]
[epoch 10]
[epoch 11]
[epoch 12]
[epoch 13]
[epoch 14]
loading data...
training model...
[epoch 0]
[epoch 1]
[epoch 2]
[epoch 3]
[epoch 4]
[epoch 5]
[epoch 6]
[epoch 7]
[epoch 8]
[epoch 9]
[epoch 10]
[epoch 11]
[epoch 12]
[epoch 13]
[epoch 14]
