In [1]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
from dataset import Dataset, to_device
from model import ResNet28
from time import time
from tqdm.notebook import tqdm
import json
from sklearn.preprocessing import StandardScaler

# Model Saving/Loading Methods

In [2]:
def load_model_only(config):
    path = config.get('model_path', '')
    f = f"{path}/{config['model_identifier']}.pth"
    print("Loading existing model")
    checkpoint = torch.load(f)
    net = checkpoint['net']
    mean_losses = checkpoint['mean_losses']
    return net, mean_losses

def init_h1_weights(m):
    if type(m) == nn.Linear:
        nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
        m.bias.data.fill_(0.01)
        
def load_model_with_config(config, X_train=None, model_width=None, force_train=False):
    # a bit hacky, but in the training phase, we never load and use the minmax scalers
    # just putting it here for when we want to load the model elsewhere THEN revert scaling
    # probably better to have the scalers saved separately....

    path = config.get('model_path', '')
    f = f"{path}/{config['model_identifier']}.pth"
    if os.path.exists(f) and not force_train:
        print("Loading existing model")
        checkpoint = torch.load(f)
        net = checkpoint['net']
        next_epoch = checkpoint['next_epoch']
        loss_func = checkpoint['loss_func']
        optimizer = checkpoint['optimizer']
        scaler = checkpoint['scaler']
        mean_losses = checkpoint['mean_losses']
    else:
        if X_train is None:
            raise Exception('Cannot create model without X_train')
        print("New model created")
        net = ResNet28(input_size=model_width[0], output_size=model_width[1], width=config['model_width'])
        net.apply(init_h1_weights)
        loss_func = nn.BCEWithLogitsLoss()
        opt_config=config['optimizer']
        optimizer = torch.optim.SGD(net.parameters(), lr=opt_config['lr'], momentum=opt_config['momentum'])
        scaler = StandardScaler().fit(X_train)
        mean_losses = []
        next_epoch = 0
        save_model_with_config(config, net=net, loss_func=loss_func, optimizer=optimizer,
                               scaler=scaler,
                               mean_losses=mean_losses, next_epoch=next_epoch,
                              )
        # blank scaler when creating new model
    return net, loss_func, optimizer, scaler, mean_losses, next_epoch

def save_model_with_config(config, **kwargs):
    path = config.get('model_path', '')

    f = f"{path}/{config['model_identifier']}.pth"
    torch.save(kwargs, f)

# Model Training/Predicting Methods

In [3]:
def train_model(X_train, y_train, X_test, y_test, configurations, force_train=False):

    path = configurations.get('model_path', None)
    torch.manual_seed(configurations.get('random_seed',0))
    device = configurations.get('device','cpu')
    pyt_device = torch.device(device)

    training_set = Dataset(X_train, y_train)
    training_generator = torch.utils.data.DataLoader(training_set, **configurations['train_params'])
    testing_set = Dataset(X_test, y_test)
    testing_generator = torch.utils.data.DataLoader(testing_set, **configurations['test_params'])

    model_width = (training_set.num_X, training_set.num_y)
    net, loss_func, optimizer, scaler, mean_losses, next_epoch, = load_model_with_config(configurations,
                                                                                         X_train,
                                                                                         model_width,
                                                                                         force_train)

    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    to_device(net, pyt_device)
    net.train()
    print(net)

    if next_epoch == configurations['max_epochs']:
        print("Model finished training. To retrain set force_train = True ")
        net.eval()
        return net, mean_losses

    epbar = tqdm(range(next_epoch, configurations['max_epochs']))
    for epoch in epbar:
        epbar.set_description(f"Epoch {epoch+1}")

        running_eloss = 0
        running_vloss = 0

        ipbar = tqdm(training_generator, leave=False)
        ipbar.set_description(f"Training")

        for i, (x, y) in enumerate(ipbar):
            x = to_device(x, pyt_device)
            y = to_device(y, pyt_device)

            optimizer.zero_grad()
            prediction = net(x)     # input x and predict based on x
            loss = loss_func(prediction, y)     # must be (1. nn output, 2. target)
            loss.backward()         # backpropagation, compute gradients
            optimizer.step()        # apply gradients
            running_eloss += loss.item()

        net.eval()
        mean_vlosses = 0
        if configurations['do_validate']:
            with torch.set_grad_enabled(False):
                vpbar = tqdm(testing_generator, leave=False)
                vpbar.set_description("Validating")
                for i, (x, y) in enumerate(vpbar):
                    x = to_device(x, pyt_device)
                    y = to_device(y, pyt_device)
                    prediction = net(x)
                    loss = loss_func(prediction, y)
                    running_vloss += loss.item()
            mean_vlosses = running_vloss / len(testing_generator)

        mean_elosses = running_eloss / len(training_generator)
        mean_losses.append((mean_elosses, mean_vlosses))
        save_model_with_config(configurations, net=net, loss_func=loss_func, optimizer=optimizer,
                               mean_losses=mean_losses, next_epoch=epoch+1,)
        net.train()

        epbar.set_postfix({'train_loss':f"{mean_elosses:.9f}", 'val_loss':f"{mean_vlosses:.9f}"})
    net.eval()
    return net, mean_losses

# Load and Clean Data

In [4]:
def train_test_split(X, y, train_idx=None, test_idx=None):
    X_train = X.loc[train_idx]
    y_train = y.loc[train_idx]
    X_test = X.loc[test_idx]
    y_test = y.loc[test_idx]
    return (X_train, y_train, X_test, y_test)

def load_split_data(suffix=None, split=False):
    if suffix==None:
        suffix='DEFAULT'
        
    X = pd.read_pickle(f'../data/X_{suffix}.pkl')
    y = pd.read_pickle(f'../data/y_{suffix}.pkl')
    
    # Drop NA rows:
    na_rows = X.isna().any(axis=1)
    X = X[~na_rows]
    y = y[~na_rows]
        
    if split:
        X_train, y_train, X_test, y_test = train_test_split(X, y, X.loc[:'2019'].index, X.loc['2020':].index)
        return X_train, y_train, X_test, y_test
    else:
        return X, y
    
X_train, y_train, X_test, y_test = load_split_data(suffix='20210726', split=True)

#  Model Creation

In [5]:
model_id = 'alpha3'

config = {
    'model_identifier' : model_id,
    'model_path' : './models',
    'device' : 'cuda',
    'random_seed' : 0,
    'max_epochs' : 20,
    'do_validate' : True,
    'model_width' : 128,
    'optimizer' : {
        'lr' : 0.001,
        'weight_decay' : 0.00025,
        'momentum' : 0.9,
    },
    'train_params' : {
        'batch_size': 2000,
        'shuffle': True,
        'num_workers': 5,
        'pin_memory': True,
    },
    'test_params' : {
        'batch_size': 20000,
        'num_workers': 5,
        'pin_memory': True,
    },
}


model_id=config['model_identifier']
with open(f'models/{model_id}.cfg', 'w') as f:
    json.dump(config, f)

# Train

In [6]:
net, mean_losses = train_model(X_train, y_train, X_test, y_test, config, force_train=True)

New model created
ResNet28(
  (stack1): ResNetStack(
    (stack): Sequential(
      (dense_1): ResnetDenseBlock(
        (dense1): Linear(in_features=382, out_features=128, bias=True)
        (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU()
        (dense2): Linear(in_features=128, out_features=128, bias=True)
        (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (dense3): Linear(in_features=128, out_features=128, bias=True)
        (bn3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (shortcut): Sequential(
          (dense_sc): Linear(in_features=382, out_features=128, bias=True)
          (bn_sc): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (identity_1a): ResnetIdentityBlock(
        (dense1): Linear(in_features=128, out_features=128, bias=True)
        (bn1): BatchNorm1

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

In [10]:
y_train.sum()

buy    25150
dtype: int64