In [None]:
import sys
!cp ../input/rapids/rapids.0.15.0 /opt/conda/envs/rapids.tar.gz
!cd /opt/conda/envs/ && tar -xzvf rapids.tar.gz > /dev/null
sys.path = ["/opt/conda/envs/rapids/lib/python3.7/site-packages"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib/python3.7"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib"] + sys.path 
!cp /opt/conda/envs/rapids/lib/libxgboost.so /opt/conda/lib/

## <font size='4' color='blue'><a> Imports </a></font>

In [None]:
# import cudf
import torch
import joblib
import janestreet
import numpy as np
# import cupy as cp
from time import time
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from contextlib import contextmanager
from sklearn.metrics import roc_auc_score
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import QuantileTransformer
# from cupyx.scipy.special import erfinv as cupy_erfinv

import gc
import datatable as dt


In [None]:
EPOCHS = 10
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
EARLY = 4
DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
import random
import os

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

## <font size='4' color='blue'><a> Read Data </a></font>

In [None]:
@contextmanager
def timer(name):
    t0 = time()
    yield
    print(f'[{name}] done in {time() - t0:.2f} s')


In [None]:
%%time
train_dt = dt.fread('../input/jane-street-market-prediction/train.csv')
test_dt = dt.fread("../input/jane-street-market-prediction/example_test.csv")


In [None]:
%%time
import pandas as pd
train = train_dt.to_pandas()
test_ = test_dt.to_pandas()


In [None]:
test_

In [None]:
train[train["resp"]>0]

## <font size='4' color='blue'><a> Dataset </a></font>

In [None]:
class janeDataset(Dataset):
    
    def __init__(self,df,num_features, cat_features,label,mode="train"):
        self.df = df[num_features+cat_features].values
        self.mode = mode
        if self.mode == 'train':
            self.label = label.values
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        x = torch.FloatTensor(self.df[idx])
        if self.mode=="train":
            y = torch.torch.tensor(self.label[idx]).float()
            return x,y#train
        return x #test

## <font size='4' color='blue'><a> Model </a></font>

In [None]:
class TabularNN(nn.Module):
    def __init__(self, cfg):
        super().__init__()

        self.mlp = nn.Sequential(
            nn.BatchNorm1d(len(cfg.num_features)),
            nn.Dropout(cfg.dropout),
            nn.utils.weight_norm(nn.Linear(len(cfg.num_features), cfg.hidden_size)),
            nn.PReLU(),

            
            nn.BatchNorm1d(cfg.hidden_size),
            nn.Dropout(cfg.dropout),
            nn.utils.weight_norm(nn.Linear(cfg.hidden_size, cfg.hidden_size)),
            nn.PReLU(),

                        
            nn.BatchNorm1d(cfg.hidden_size),
            nn.Dropout(cfg.dropout),
            nn.utils.weight_norm(nn.Linear(cfg.hidden_size,1)),

        )
            
    def forward(self,x):
        y = self.mlp(x)
        return y


### <font size='4' ><a> Preprocess </a></font>

In [None]:
train=train[train['weight']!=0]
target_col = "action"
train['action'] = (train['resp'] > 0).astype('int')
train.shape

In [None]:
from sklearn.preprocessing import StandardScaler

def do_preprocess(train,mode=1):
    
    features = [f'feature_{i}' for i in range(1,130)]+['weight']
    
    def to_labels(x):
        if x==1:
            return 0
        else:
            return 1
    
    
    for col in features :
        
        train[col].fillna(train[col].mean(),inplace=True)
        
    if mode:

            transformer = StandardScaler()
            train[[col]] = transformer.fit_transform(train[[col]])
            joblib.dump(transformer,f'{col}.pkl')
        
    else:
            transformer = joblib.load(f'{col}.pkl')
            train[[col]] = transformer.transform(train[[col]])

            

   
    train.fillna(-1,inplace=True)
    
    

        
        
    return train



train = do_preprocess(train)


In [None]:
num_features = [c for c in test_.columns if c not in ["data","ts_id","action"]]

In [None]:
class CFG:
    max_grad_norm=1000
    gradient_accumulation_steps=1
    hidden_size=512
    dropout=0.3
    lr=1e-4
    batch_size=128
    epochs=1
    weight_decay=1e-5
    num_features=num_features
    cat_features=[]
    target_col=target_col


In [None]:
def train_fn(train_loader, model, optimizer, scheduler, device):
    model.train()
    losses = 0
    preds = list()
    for i, (x,y) in enumerate(train_loader):
        x,y = x.to(device),y.to(device)
        pred = model(x)
#         print(pred.shape)
#         print(y.shape)
        loss = nn.BCEWithLogitsLoss()(pred,y.view(-1,1))
        loss.backward()
        losses += loss.item()

#         scheduler.step(loss.item())
        optimizer.step()
        optimizer.zero_grad()
        preds.append(pred.sigmoid().detach().cpu().numpy())

    preds = np.concatenate(preds)
    return losses/len(train_loader),preds

def valid_fn(valid_loader, model, device):
    model.eval()
    losses=0
    val_preds = []
    for i,(x,y) in enumerate(valid_loader):
        x,y = x.to(device),y.to(device)
        with torch.no_grad():
            pred = model(x)
        loss = nn.BCEWithLogitsLoss()(pred, y.view(-1,1))
        losses+=loss.item()
        val_preds.append(pred.sigmoid().detach().cpu().numpy())

    pred = np.concatenate(val_preds)
    return losses/len(valid_loader),pred
    
    
    
def inference_fn(test_loader, model, device):
    model.eval()
    preds = []

    for step, (x) in enumerate(test_loader):
        x = x.to(device)
        with torch.no_grad():
            pred = model(x)
        preds.append(pred.sigmoid().detach().cpu().numpy())

    preds = np.concatenate(preds)
    return preds

## <font size='4' color='blue'><a> Training </a></font>

In [None]:
import time
def run_single_nn(cfg, train, device, fold_num, seed=42, shallow=False):
    seed_everything(seed)
    trn_idx = train[train["fold"]!=fold_num].index
    val_idx =train[train["fold"]==fold_num].index
    
    train_fold = train.iloc[trn_idx].reset_index(drop=True)
    valid_fold = train.iloc[val_idx].reset_index(drop=True)
    
    train_target = train[cfg.target_col][trn_idx]
    valid_target = train[cfg.target_col][val_idx]
    
    train_dataset = janeDataset(train_fold,cfg.num_features, cfg.cat_features,train_target,mode="train")
    valid_dataset = janeDataset(valid_fold, cfg.num_features, cfg.cat_features, valid_target, mode="train")
    
    train_loader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, 
                              num_workers=4, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, batch_size=cfg.batch_size, shuffle=False, 
                              num_workers=4, pin_memory=True, drop_last=False)
    
    model = TabularNN(cfg)
    
    model.to(device)
    optimizer = optim.Adam(model.parameters(),lr=cfg.lr, weight_decay=cfg.weight_decay)
    scheduler = None
    best_loss = np.inf
    
    for epoch in range(cfg.epochs):
        start = time.time()
        train_loss = train_fn(train_loader, model, optimizer, scheduler, device)
        valid_loss, val_preds = valid_fn(valid_loader, model,device)

        if valid_loss<best_loss:
            print(f'epoch {epoch} best loss : {valid_loss} ...time| {time.time()-start}')
            best_loss = valid_loss
            oof = np.zeros((len(train),1))
            oof[val_idx] = val_preds
            torch.save(model.state_dict(),f'fold{fold_num}_seed{seed}.pth')
        else:
            print(f'epoch {epoch} loss : {valid_loss} ...time| {time.time()-start}')
    
#     model = TabularNN(cfg)
#     model.loadstate_dict(torch.load(f"fold{fold}_seed{seed}"))
#     model.to(device)
#     predictions = inference_fn(test_loader,model, device)

    torch.cuda.empty_cache()
    return oof

In [None]:
def run_kfold_nn(cfg, train, device,n_folds=5, seed=42,):
    oof = np.zeros((len(train),1))
    predictions = np.zeros((len(train),1))
    for _fold in range(n_folds):
        print('fold',_fold)
        _oof = run_single_nn(cfg, train, device, fold_num=_fold, seed=seed, )
        oof+= _oof
#         predictions = _predictions/n_folds
        
    return oof

In [None]:
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

def separate_for_folds(folds, n_split=5,target_col=None):
    folds = folds.reset_index(drop=True)
    kfold=StratifiedKFold(n_splits=n_split)
    for n,(tr_idx,val_idx) in enumerate(kfold.split(folds,folds[target_col])):
        folds.loc[val_idx, "fold"] = int(n)
    folds["fold"]=folds["fold"].astype(int)
    return folds


In [None]:
if "fold" not in train.columns:
    SEED = [0]

    train = separate_for_folds(train, n_split=2,target_col=target_col)
gc.collect()

In [None]:
oof = np.zeros((len(train),1))
for i,seed in enumerate(SEED):
    print(f'set seed {seed}')
    _oof = run_kfold_nn(CFG, train, DEVICE, n_folds=1, seed=seed)
    oof += _oof/len(SEED)

## <font size='4' color='blue'><a> Inference </a></font>

In [None]:
def inference_fold(cfg, test, device,seed,folds=1):
    predictions = np.zeros((len(test),1))
    for fold in range(folds):
        model = TabularNN(cfg)
#         model.load_state_dict(torch.load("jane_model.pth"))
        model.load_state_dict(torch.load(f"fold{fold}_seed{seed}.pth"))
        _predictions = inference_fn(test, model, device)
        predictions += _predictions/folds
    return predictions

In [None]:
# model = JaneModel()
# model.load_state_dict(torch.load("jane_model.pth"))
# model.to(DEVICE)
# test = None

In [None]:
env = janestreet.make_env() 
iter_test = env.iter_test()

In [None]:
from tqdm import tqdm

for (test,sample_pred) in tqdm(iter_test):
#     test["flg"] = test["weight"].apply(lambda x:1 if x>0 else 0)
#     zero_flg = test[test["flg"]==0].index
#     sample_pred.action[zero_flg]=0
    if test["weight"].item()>0:
        test = test.loc[:, CFG.num_features]#.values
#         test = test[CFG.num_features+CFG.cat_features]
        test = do_preprocess(test,mode=0)
        test_ = janeDataset(test,CFG.num_features, CFG.cat_features,None,mode="test")
        test_ = DataLoader(test_,batch_size=2**12,shuffle=False)
        predictions = 0.
        for seed in SEED:
            _predictions = inference_fold(CFG,test_,DEVICE,seed)
            predictions+=_predictions/len(SEED)
        sample_pred.action = predictions.reshape(1,-1)
        sample_pred.action = sample_pred.action.apply(lambda x:1 if x>0.5 else 0)
    else:
        sample_pred.action=0
    env.predict(sample_pred)
