In [None]:
# project 
import sys
sys.path.append('../input/moa-models')
sys.path.append('../input/moa-scripts')
from pytorch import *
seed_everything(seed=42)

from moa import *
from moa_pipeline import *
from inference import *
from validation import *
from pretraining import *
from metrics import logloss

# np, pd
import numpy as np
import pandas as pd 
from scipy import stats 

# misc
import warnings
from tqdm.auto import tqdm
import os
import random
import warnings 
import copy
import joblib
import gc 
from functools import partial
import glob 

# sklearn
from sklearn import preprocessing
from sklearn.model_selection import KFold

# torch 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# viz 
import matplotlib.pyplot as plt
import seaborn as sns

os.environ["CUDA_LAUNCH_BLOCKING"] = '1'
warnings.simplefilter('ignore')

In [None]:
## load raw data
# X, y, genes, cells, classnames, features, X_test, test_control, submission = load()
X, y, y_n, X_test, test_control, drug_id, genes, cells, classnames, features, submission = load2()
print(X.shape, y.shape)

## load splits
splits = sorted(glob.glob("../input/moa-pipelines/x*"))

# ...
folds, pipes = list(zip(*[joblib.load(split) for split in splits]))
print(splits)
input_dim = len(pipes[0][0].columns_)
seeds = np.arange(len(folds))

In [None]:
%%time

fitter_params = dict(
    activation = nn.GELU
    ,dropout=[0.3, 0.3]
    ,estimator=MLPmem
    ,memory=16
    ,smoothing=4e-4, clipping=4e-4
    ,hidden_size=(128, 128)
    ,early_stopping=8
)

fitter, train_params = get_params(
    input_dim, y.shape[1], epochs=100,
    optim=partial(LookaheadAdamW, k=10),
    optim_params=dict(weight_decay=1e-5, lr=1e-2),
    scheduler=optim.lr_scheduler.ReduceLROnPlateau,
    sched_params=dict(mode='min', factor=0.1, patience=2),     
    bn_momentum=0.05,
    verbose=100,
    **fitter_params
)

fitter_nonscored, train_params_nonscored = get_params(
    input_dim, y_n.shape[1], epochs=10, 
    optim=partial(LookaheadAdam, k=10),
    optim_params=dict(lr=5e-3, weight_decay=1e-5),
    scheduler=optim.lr_scheduler.ReduceLROnPlateau,
    sched_params=dict(mode='min', factor=0.1, patience=2), 
    bn_momentum=0.05,
    verbose=0,
    **fitter_params
)

# fitter_pseudo, train_params_pseudo = get_params(
#     input_dim, y_n.shape[1], epochs=5, 
#     optim=torch.optim.SGD, 
#     optim_params=dict(lr=1e-3, nesterov=True, momentum=0.9),
#     scheduler=optim.lr_scheduler.ReduceLROnPlateau,
#     sched_params=dict(mode='min', factor=0.1, patience=1),
#     verbose=100,
#     **fitter_params
# )
fitter_pseudo, train_params_pseudo = None, None

oof_train, oof_test = mlp_oof_pretraining(
    X, y, y_n, X_test[~test_control], 
    (fitter, fitter_nonscored, fitter_pseudo), 
    (train_params, train_params_nonscored, train_params_pseudo),
    folds, seeds, pipes
)

In [None]:
preds_clipping = 1e-5
oof_train = np.clip(oof_train, preds_clipping, 1 - preds_clipping)
oof_test  = np.clip(oof_test, preds_clipping, 1 - preds_clipping)
oof_result(y, oof_train, oof_test)

In [None]:
prediction = join_control_group(oof_test, test_control, submission, classnames)
submit_preds(prediction, submission, test_control, classnames)
joblib.dump(oof_train, "oof_train.pkl")
pd.read_csv("submission.csv").iloc[:5, :5]