# Import

In [None]:
!nvidia-smi

In [None]:
import pandas as pd
import numpy as np
import cupy as cp
import os
import gc
import time
import torch
import torchvision
from torch import nn
import torch.nn.functional as F
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader
print(torch.__version__)
import matplotlib.pyplot as plt
from numba import njit
%matplotlib inline
from janest_model import MLPNet , CustomDataset, train_model
from utils import PurgedGroupTimeSeriesSplit

In [None]:
#%%writefile test.py
#print('hello world!')

## Parameter setting

In [None]:
TRAINING = True
USE_FINETUNE = True     
FOLDS = 5
GROUP_GAP = 20
SEED = 66
INPUTPATH = '../../input'
NUM_EPOCH = 500
BATCH_SIZE = 16384
PATIANCE = 15
LR = 0.001
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)
MDL_PATH  = '../models'
MDL_NAME = 'mlp'
NUM_LYR = 5
VER = 'cv_base'

## Import Data 

In [None]:
%%time
train = pd.read_parquet(f'{INPUTPATH}/train.parquet')
test_df = pd.read_csv(f'{INPUTPATH}/example_test.csv')
pred_df  = pd.read_csv(f'{INPUTPATH}/example_sample_submission.csv')

In [None]:
test_df.head()

In [None]:
train.head()

# Features

In [None]:
%%time
train = train.query('date > 85').reset_index(drop = True) 
print(train.shape)
train.fillna(train.mean(),inplace=True)
train = train.query('weight > 0').reset_index(drop = True)
train['action'] =  \
(  (train['resp_1'] > 0.00001 ) & \
   (train['resp_2'] > 0.00001 ) & \
   (train['resp_3'] > 0.00001 ) & \
   (train['resp_4'] > 0.00001 ) & \
   (train['resp'] > 0.00001 )   ).astype('int')

features = [c for c in train.columns if 'feature' in c]

resp_cols = ['resp_1', 'resp_2', 'resp_3', 'resp', 'resp_4']

X = train[features].values
y = np.stack([(train[c] > 0.000001).astype('int') for c in resp_cols]).T
#X = cp.array(train[features].values)
#y = cp.array(np.stack([(train[c] > 0.000001).astype('int') for c in resp_cols]).T) #Multitarget
#f_mean = np.mean(train[features[1:]].values,axis=0)

f_mean = np.load( f'{INPUTPATH}/f_mean.npy')

In [None]:
#%%time
#np.save( f'{INPUTPATH}/f_mean.npy',f_mean)

In [None]:
print(X.shape[-1])
print(y.shape[-1])

# Trainning

## Data Split

In [None]:
gkf =  PurgedGroupTimeSeriesSplit(n_splits = FOLDS,  group_gap = GROUP_GAP)

## MLP

### CV 

In [None]:
model = MLPNet(input_size = X.shape[-1], output_size = y.shape[-1]).to(DEVICE)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(
    model.parameters(), lr=LR, weight_decay=1e-5)
sts = time.time()
learn_hist_list = []
save_path_list = []
for fold, (tr, vl) in enumerate(gkf.split(train['action'].values, train['action'].values, train['date'].values)):
    print('Fold : {}'.format(fold+1))
    
    X_tr, X_val = X[tr], X[vl]
    y_tr, y_val = y[tr], y[vl]
    trn_dat = CustomDataset(X_tr, y_tr)
    val_dat = CustomDataset(X_val, y_val)
    trn_loader = DataLoader(trn_dat , batch_size=BATCH_SIZE, shuffle=False)
    val_loader = DataLoader(val_dat , batch_size=BATCH_SIZE, shuffle=False)
    loaders = {'train':trn_loader, 'valid': val_loader}
    trained_model, learn_hist, save_path =\
        train_model(model, criterion, optimizer, loaders, DEVICE, NUM_EPOCH, PATIANCE, \
                MDL_PATH, MDL_NAME, VER, fold+1)
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    plt.plot(learn_hist.epoch, learn_hist.valid_bce_loss, color = 'blue')
    ax2 = ax1.twinx()
    plt.plot(learn_hist.epoch, learn_hist.train_bce_loss, color = 'red')
    ax1.set_ylabel('Valid BCE Loss')
    ax2.set_ylabel('Train BCE Loss')
    plt.xlabel('Epochs')
    plt.title('Learning Curve')
    plt.show()
    learn_hist['Fold'] = fold+1
    learn_hist_list.append(learn_hist)
    save_path_list.append(save_path)
all_hist = pd.concat(learn_hist_list, axis=0)
all_hist.reset_index(inplace=True, drop=True)
all_hist.to_csv(f'{MDL_PATH}/{MDL_NAME}_{VER}/{MDL_NAME}_learning_history.csv', index=False)
ed = time.time()
print('Training process takes {:.2f} min.'.format((ed-sts)/60))

In [None]:
@njit(fastmath = True)
def utility_score_numba(date, weight, resp, action):
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / len(Pi))
    u = min(max(t, 0), 6) * np.sum(Pi)
    return u

#https://www.kaggle.com/gogo827jz/jane-street-super-fast-utility-score-function

In [None]:
torch.cuda.empty_cache()

In [None]:
th = 0
loop = int(np.round(len(X)/BATCH_SIZE))
pred_all = np.array([])
for n in tqdm(range(loop)):
    x_tt = X[BATCH_SIZE*n:BATCH_SIZE*(n+1),:]
    if np.isnan(x_tt[:, 1:].sum()):
        x_tt[:, 1:] = np.nan_to_num(x_tt[:, 1:]) + np.isnan(x_tt[:, 1:]) * f_mean
    pred = 0.0
    X_test = torch.FloatTensor(x_tt).to(DEVICE)
    pred= model(X_test).cpu().detach().numpy()
    if len(pred_all) == 0:
        pred_all = pred.copy()
    else:
        pred_all = np.vstack([pred_all, pred]).copy()

In [None]:
X_test.shape

In [None]:
date = train['date'].values
weight = train['weight'].values
resp = train['resp'].values
train['action'] = (train['resp'] > 0).astype('int')
action_ans = train['action'].values

In [None]:
th=0.5
action = np.where(pred_all[:,0] >= th, 1, 0).astype(int).copy()
utility_score_numba(date, weight, resp, action)

In [None]:
utility_score_numba(date, weight, resp, action_ans)

In [None]:
gc.collect()

## Predict Test 

In [None]:
print(f'{MDL_PATH}/{MDL_NAME}_{VER}')

In [None]:
#!kaggle datasets init -p ../models/autoencoder_test

In [None]:
%%writefile ../models/mlp_base/dataset-metadata.json
{
    "title": "Jane-Street",
    "id": "shinsei66/Jane-Street",
    "subtitle": "",
    "description": "",
    "isPrivate": true,
    "licenses": [
        {
            "name": "unknown" 
        }
    ],
    "keywords": [],
    "collaborators": [],
    "data": [
        {
            "description": null,
            "name": "autoencoder_99.pth",
            "totalBytes": 848,
            "columns": []
        },
        {
            "description": null,
            "name": "autoencoder_254.pth",
            "totalBytes": 856,
            "columns": []
        },
        {
            "description": null,
            "name": "mlp_base_984.pth",
            "totalBytes": 1316,
            "columns": []
        }
    ]
}

In [None]:
#!kaggle datasets create -p  ../models/autoencoder_early_stopping

In [None]:
!du ../models/mlp_base/mlp_984.pth

In [None]:
!kaggle datasets version -p  ../models/mlp_base -m "mlp base 984 epoch"