# Import libraries

In [None]:
import numpy as np
import pickle
import os
from sklearn.model_selection import GroupKFold
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import random
from torch.utils.data import Dataset, DataLoader
import gc
from sklearn.metrics import precision_score, roc_auc_score
import pickle
import pandas as pd
import datatable as dt
import matplotlib.pylab as plt
import time

In [None]:
DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

# Competition score

In [None]:
def utility_score_numba(date, weight, resp, action):

    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / len(Pi))
    u = min(max(t, 0), 6) * np.sum(Pi)
    return u

# Random Ensemble Neural Net Architecture 

In [None]:
################################# ENSEMBLE MODELS STRUCTURE GENERATION #####################################

def ensemble_structure(number_models, num_features, MODEL_ROOT):

    if number_models == 1:

        hidden_dims = [150]
        number_of_dims = [num_features]
        input_dims = [np.arange(num_features)]

    elif os.path.isfile(MODEL_ROOT + "/" + 'hidden_dims.pkl'):
        with open(MODEL_ROOT + "/" + 'hidden_dims.pkl', 'rb') as handle:
            hidden_dims = pickle.load(handle)

        with open(MODEL_ROOT + "/" + 'number_of_dims.pkl', 'rb') as handle:
            number_of_dims = pickle.load(handle)

        with open(MODEL_ROOT + "/" + 'input_dims.pkl', 'rb') as handle:
            input_dims = pickle.load(handle)

    else:

        hidden_dims = np.random.randint(96, 160, number_models)
        number_of_dims = np.random.randint(int(num_features*0.7), num_features, number_models)
        input_dims = []
        for i in range(number_models):
            input_dims.append(np.random.randint(0, num_features, number_of_dims[i]))

        with open(MODEL_ROOT + "/" + 'hidden_dims.pkl', 'wb') as handle:
            pickle.dump(hidden_dims, handle, protocol=pickle.HIGHEST_PROTOCOL)

        with open(MODEL_ROOT + "/" + 'number_of_dims.pkl', 'wb') as handle:
            pickle.dump(number_of_dims, handle, protocol=pickle.HIGHEST_PROTOCOL)

        with open(MODEL_ROOT + "/" + 'input_dims.pkl', 'wb') as handle:
            pickle.dump(input_dims, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return hidden_dims, number_of_dims, input_dims


# Fold Selection

In [None]:
################################# FOLD SELECTION #####################################
def fold_selection(folds, kf=0, random = False,
                   date=450, Debug=False):

    folds['fold']=int(0)

    if Debug==True:

        val = folds.sample(n=int(folds.shape[0]/10))
        folds.loc[val.index, 'fold'] = int(1)

    else:
        if random==True:
            val = folds.sample(n=int(folds.shape[0] / 10))
            folds.loc[val.index, 'fold'] = int(1)

        elif kf>0:

            group_kfold = GroupKFold(n_splits=kf)
            group = folds['date']

            f = 1
            for train_index, test_index in group_kfold.split(folds, folds['resp'], group):
                folds.loc[test_index, 'fold'] = int(f)
                f = f+1

        else:
            folds.loc[folds['date']>date, 'fold'] = int(1)

    folds['fold'] = folds['fold'].astype(int)

    return folds

# Modelling

In [None]:
############## Models ######################################################################
class simple_linear_layer(nn.Module):
    def __init__(self, input_dim, out_dim):
        super(simple_linear_layer, self).__init__()
        
        self.dense = nn.Linear(input_dim, out_dim)
        self.batch_norm = nn.BatchNorm1d(out_dim)
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        
        x = self.dense(x)
        x = self.batch_norm(x)
        x = self.dropout(x)
        
        return x

class MLP_Model(nn.Module):

    def __init__(self, input_dim, hidden_dim, out_dim=4):
        super(MLP_Model, self).__init__()

        self.block1 = simple_linear_layer(input_dim, hidden_dim)
        self.block2 = simple_linear_layer(hidden_dim, hidden_dim)
        self.block3 = nn.Linear(int(hidden_dim), out_dim)

    def forward(self, x):

        v = F.relu(self.block1(x))
        v = F.relu(self.block2(v))
        out = self.block3(v)

        return out

class ResidualBlock(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, downsample=None):
        super(ResidualBlock, self).__init__()
        
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)
       
        
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.bn2 = nn.BatchNorm1d(output_dim)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.2)
        self.downsample = downsample

    def forward(self, x):
        residual = x
        
        out = self.fc1(x)
        out = self.bn1(out)
        out = self.relu1(out)
        out = self.dropout1(out)
        
        
        out1 = self.fc2(out)
        out1 = self.bn2(out1)
        out1 = self.relu2(out1)
        out1 = self.dropout2(out1)
        
        if self.downsample:
            residual = self.downsample(residual)
            
        out1 = torch.cat([out1, residual], dim=1)
        
        return out1

class ResNet(nn.Module):

    def __init__(self, input_dim, hidden_dim, block, num_classes=4):
        super(ResNet, self).__init__()

        self.layer1 = self.make_layer(block, input_dim, hidden_dim, hidden_dim)
        self.layer2 = self.make_layer(block, 2*hidden_dim, hidden_dim, hidden_dim)
        self.out = nn.Linear(2*hidden_dim, num_classes)

    def make_layer(self, block, input_dim, hidden_dim, output_dim):
        downsample = None
        if (input_dim != output_dim):
            downsample = nn.Sequential(
                nn.Linear(input_dim, output_dim))

        layer=block(input_dim, hidden_dim, output_dim, downsample)

        return layer

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.out(out)
        return out


####################### Final Ensemble Model ######################################
class Ensemble_MoaModel(nn.Module):

    def __init__(self, input_dims, number_of_dims, hidden_dims,
                 model_name, out_dim=4):
        super(Ensemble_MoaModel, self).__init__()

        self.models = torch.nn.ModuleList()
        self.input_dims = input_dims
        self.model_name = model_name

        for i in range(len(hidden_dims)):
            if self.model_name=='Simple_MLP':
                self.models.append(MLP_Model(input_dim=number_of_dims[i],
                                                    hidden_dim=hidden_dims[i], out_dim=out_dim))

            elif self.model_name=="ResNet":
                self.models.append(ResNet(input_dim=number_of_dims[i],
                                          hidden_dim=hidden_dims[i],
                                          block=ResidualBlock,
                                          num_classes=out_dim))
            else:
                print("Please check model name. There is no this model!!!")

    def forward(self, x):

        out = []

        for i in range(len(self.input_dims)):
            temp = self.models[i](x[:, self.input_dims[i]])
            out.append(temp.unsqueeze(0))

        out = torch.cat(out, dim=0)
        out = out.permute(1, 0, 2)

        return out

# Pytorch Dataset

In [None]:
class JaneDataset:
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return (self.features.shape[0])

    def __getitem__(self, idx):
        dct = {
            'x': torch.tensor(self.features[idx, :], dtype=torch.float),
            'y': torch.tensor(self.targets[idx], dtype=torch.float),
        }
        return dct

class TestDataset:
    def __init__(self, features):
        self.features = features

    def __len__(self):
        return (self.features.shape[0])

    def __getitem__(self, idx):
        dct = {
            'x': torch.tensor(self.features[idx, :], dtype=torch.float)
        }
        return dct

# Utils for training

In [None]:
################################# training inference #####################################
def train_fn(model, optimizer, scheduler,
             loss_fn, dataloader, device):

    model.train()
    final_loss = 0
    train_preds = []
    train_y = []
    for data in dataloader:
        optimizer.zero_grad()
        inputs, targets = data['x'].to(device), data['y'].to(device)
        train_y.append(targets.detach().cpu().numpy())
        outputs = model(inputs)
        loss = 0

        for i in range(outputs.shape[1]):
            loss = loss + loss_fn(outputs[:, i],
                                  targets.to(device))

        loss.backward()
        optimizer.step()

        final_loss += loss.item()
        train_preds.append(np.mean(outputs.sigmoid().detach().cpu().numpy(), axis=1))

    final_loss /= len(dataloader)
    train_preds = np.concatenate(train_preds)
    train_y = np.concatenate(train_y)

    auc = roc_auc_score(train_y, train_preds)

    return auc

In [None]:
################################# validation inference #####################################
def valid_fn(model, df, dataloader, device,
             scheduler=None, loss_fn=None):

    model.eval()
    final_loss = 0
    valid_preds = []
    val_y = []

    for data in dataloader:
        inputs, targets = data['x'].to(device), data['y'].to(device)
        val_y.append(targets.detach().cpu().numpy())

        outputs = model(inputs)

        if loss_fn==None:
            valid_preds.append(np.mean(outputs.sigmoid().detach().cpu().numpy(), axis=1))
            final_loss = 0

        else:
            loss = loss_fn(torch.mean(outputs, dim=1), targets.to(device))
            final_loss += loss.item()
            valid_preds.append(np.mean(outputs.sigmoid().detach().cpu().numpy(), axis=1))

    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    val_y = np.concatenate(val_y)


    actual = np.where(valid_preds>=0.5, 1, 0)
    score = utility_score_numba(df['date'].values, df['weight'].values,
                                df['resp'].values, actual[:, 0])

    auc = roc_auc_score(val_y, valid_preds)
    if scheduler!=None:
        scheduler.step(auc)

    return auc, score, valid_preds

In [None]:
################################# Model Training Phase #####################################
def run_training(folds, target, feature_cols, target_cols,
                 input_dims, number_of_dims, hidden_dims,
                 model_name, BATCH_SIZE, LEARNING_RATE, WEIGHT_DECAY,
                 EPOCHS, EARLY_STOP, EARLY_STOPPING_STEPS, fold, seed, MODEL_ROOT):

    seed_everything(seed)

    train_df = folds.reset_index(drop=True) #folds[folds['fold'] != fold].reset_index(drop=True)
    valid_df = folds[folds['fold'] == fold].reset_index(drop=True)

    x_train, y_train = train_df[feature_cols].values, train_df[target].values
    x_valid, y_valid = valid_df[feature_cols].values, valid_df[target].values

    train_dataset = JaneDataset(x_train, y_train)
    valid_dataset = JaneDataset(x_valid, y_valid)
    trainloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    validloader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

    if os.path.isfile(MODEL_ROOT + "/"+ f"FOLD_{fold}_{seed}.pth"):

        model_new = torch.load(MODEL_ROOT + "/"+ f"FOLD_{fold}_{seed}.pth")
        model_new.to(DEVICE)
        model_new.eval()
        valid_tpr, valid_score, valid_preds = valid_fn(model_new, valid_df, validloader, DEVICE, scheduler=None, loss_fn=None)
        oof = valid_preds

    else:
        model_new = Ensemble_MoaModel(input_dims, number_of_dims, hidden_dims, model_name, out_dim=len(target_cols))
        print(model_new)
        model_new.to(DEVICE)

        optimizer = torch.optim.Adam(model_new.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
        # scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3,
        #                                           max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader))
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='max', factor=0.1, patience=3, verbose=True)

        loss_fn = nn.BCEWithLogitsLoss()

        early_stopping_steps = EARLY_STOPPING_STEPS
        early_step = 0

        best_tpr = -np.inf

        for epoch in range(EPOCHS):

            train_loss = train_fn(model_new, optimizer, scheduler
                                  , loss_fn, trainloader, DEVICE)

            valid_tpr, valid_score, valid_preds = valid_fn(model_new, valid_df, validloader, DEVICE, scheduler=scheduler, loss_fn=loss_fn)
            print(f"FOLD: {fold}, EPOCH: {epoch}, train_auc: {train_loss}, valid_auc: {valid_tpr},  valid_score: {valid_score}")

            if valid_tpr > best_tpr:

                best_tpr = valid_tpr
                oof = valid_preds
                early_step = 0
                torch.save(model_new, MODEL_ROOT + "/"+ f"FOLD_{fold}_{seed}.pth")

            elif (EARLY_STOP == True):

                early_step += 1
                if (early_step >= early_stopping_steps):
                    break

            del train_loss, valid_tpr, valid_score
            gc.collect()

        model_new = torch.load(MODEL_ROOT + "/"+ f"FOLD_{fold}_{seed}.pth")
        model_new.to(DEVICE)

    return oof, valid_df

# Running Phase

In [None]:
######################## MAIN ####################################
# HyperParameters
EPOCHS = 50
BATCH_SIZE = 5000
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
EARLY_STOPPING_STEPS = 10
EARLY_STOP = True
Date = 475
Debug = False
Random = True
kf=0

In [None]:
target_cols = ['resp', 'resp_1', 'resp_2', 'resp_3', 'resp_4']
useless_cols = ['date', 'weight', 'ts_id']
target = ['action', 'action_1', 'action_2', 'action_3', 'action_4']

In [None]:
inference = True
if inference==True:
    import pickle
    from tqdm import tqdm
    
    model = torch.load('../input/prerained-model-with-50-epoch/ResNet_5/FOLD_1_10.pth')#, map_location=torch.device('cpu'))
    f_mean = np.load('../input/prerained-model-with-50-epoch/fmean.npy')
    with open('../input/prerained-model-with-50-epoch/feature_cols.pkl', 'rb') as handle:
        feature_cols = pickle.load(handle)
        
    import janestreet
    env = janestreet.make_env()

    for (test_df, pred_df) in tqdm(env.iter_test()):

        if test_df['weight'].item() > 0:

            x_tst = test_df.loc[:, feature_cols].values

            if np.isnan(x_tst[:, :].sum()):
                x_tst[:, 1:] = np.nan_to_num(x_tst[:, 1:]) + np.isnan(x_tst[:, 1:]) * f_mean

            pred = np.zeros((1, len(target_cols)))
            pred = np.mean(model(torch.tensor(x_tst, dtype=torch.float).to(DEVICE)).sigmoid().detach().cpu().numpy(), axis=1)
            pred = np.mean(pred, axis=1)
            pred_df.action = np.where(pred >= 0.5, 1, 0).astype(int)

        else:

            pred_df.action = 0
            
        env.predict(pred_df)
    
else:
    train = dt.fread('../input/jane-street-market-prediction/train.csv').to_pandas()

    feature_cols = list(train.drop(useless_cols + target_cols, axis=1))

    for action, col in zip(target, target_cols):
        train[action] = np.where(train[col]>0, 1, 0)

    for col in feature_cols:
        if train[col].isnull().sum()/train.shape[0]>0.05:
            feature_cols.remove(col)

    with open('feature_cols.pkl', 'wb') as handle:
            pickle.dump(feature_cols, handle, protocol=pickle.HIGHEST_PROTOCOL)

    train = train[train['weight'] != 0]
    train = train.loc[train['date']>85]
    train = train.reset_index(drop=True)

    train.fillna(train.mean(),inplace=True)
    f_mean = np.mean(train[feature_cols[1:]].values,axis=0)
    np.save('fmean.npy', f_mean)

    list_number_of_models=[5]
    model_names = ["ResNet"] #ResNet
    SEED = 10

    num_features=len(feature_cols)
    num_targets=len(target_cols)

    weights_mod = [0.5, 0.5]

    oof_ensemble = np.zeros((int(train.shape[0]/5), len(target_cols)))
    for m, model_name in enumerate(model_names):
        for w, number_of_models in enumerate(list_number_of_models):
            MODEL_ROOT = "%s_%s" % (model_name, number_of_models)
            fold_name = 'fold.pkl'

            if not os.path.exists(MODEL_ROOT):
                os.makedirs(MODEL_ROOT)

            train = fold_selection(train, kf=kf, random=Random, date=Date, Debug=Debug)
            hidden_dims, number_of_dims, input_dims = ensemble_structure(number_of_models, num_features, MODEL_ROOT)

            if kf==0:
                oof, val_df = run_training(folds=train, target=target, feature_cols=feature_cols,
                                                target_cols=target_cols, input_dims=input_dims,
                                                number_of_dims=number_of_dims, hidden_dims=hidden_dims,
                                                model_name=model_name, BATCH_SIZE=BATCH_SIZE, LEARNING_RATE=LEARNING_RATE,
                                                WEIGHT_DECAY=WEIGHT_DECAY, EPOCHS=EPOCHS, EARLY_STOP=EARLY_STOP,
                                                EARLY_STOPPING_STEPS=EARLY_STOPPING_STEPS, fold=1, seed=SEED,
                                                MODEL_ROOT=MODEL_ROOT)

                for th in range(490, 520):
                    action = np.where(oof>th/1000, 1, 0)
                    print(model_name, th, 'score:', utility_score_numba(date=val_df['date'].values, weight=val_df['weight'].values,
                                              resp=val_df['resp'].values, action=action[:, 0]))

            else:
                for fold in range(kf):
                    oof, val_df = run_training(folds=train, target=target, feature_cols=feature_cols,
                                                    target_cols=target_cols, input_dims=input_dims,
                                                    number_of_dims=number_of_dims, hidden_dims=hidden_dims,
                                                    model_name=model_name, BATCH_SIZE=BATCH_SIZE, LEARNING_RATE=LEARNING_RATE,
                                                    WEIGHT_DECAY=WEIGHT_DECAY, EPOCHS=EPOCHS, EARLY_STOP=EARLY_STOP,
                                                    EARLY_STOPPING_STEPS=EARLY_STOPPING_STEPS, fold=fold+1, seed=SEED,
                                                    MODEL_ROOT=MODEL_ROOT)

                for th in range(490, 520):
                    action = np.where(oof>th/1000, 1, 0)
                    print(model_name, th, 'score:', utility_score_numba(date=val_df['date'].values, weight=val_df['weight'].values,
                                              resp=val_df['resp'].values, action=action[:, 0]))