In [None]:
%%time
import warnings, sys
warnings.filterwarnings("ignore")

# Thanks to Chris's RAPIDS dataset, it only takes around 1 min to install offline
!cp ../input/rapids/rapids.0.15.0 /opt/conda/envs/rapids.tar.gz
!cd /opt/conda/envs/ && tar -xzvf rapids.tar.gz > /dev/null
sys.path = ["/opt/conda/envs/rapids/lib/python3.7/site-packages"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib/python3.7"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib"] + sys.path 
!cp /opt/conda/envs/rapids/lib/libxgboost.so /opt/conda/lib/

In [None]:
import sys
sys.path.append("../input/moa-scripts")
from moa import load_datasets, preprocess, split, submit, submit_preds
from metrics import logloss
from oof import OOFTrainer, update_dict
from multilabel import MultiLabel

import pandas as pd 
import numpy as np 
import joblib
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings("ignore")

# from sklearn.linear_model import LogisticRegression
from cuml import LogisticRegression

## Preprocessing

In [None]:
X, y, y_nonscored, test, submission = load_datasets("../input/lish-moa")
X, y, test, test_control = preprocess(X, y, test, standard=False, onehot=True)
X, y, X_holdout, y_holdout, split_index, index, classnames, features = split(X, y, n_folds=5, holdout=False)
joblib.dump((index, split_index, features), "index.pkl") # for ensemble sanity check

In [None]:
np.random.seed(42)

## Finetune params for every class

In [None]:
# def crossval(X, y, hparams, split_index, loss_function):
#     hp = hparams.copy()
#     history = []
#     for i in np.unique(split_index):
#         it, iv = split_index!=i, split_index==i
#         xt, yt, xv, yv = X[it], y[it], X[iv], y[iv]
#         model = LogisticRegression(**hparams)
#         model.fit(xt, yt)
#         p = model.predict_proba(xv)[:, 1]
#         history.append(loss_function(p, yv))
#     return np.mean(history)

# multilabel_lr = MultiLabel(LogisticRegression, {}, 1)

# # find best `C` by grid search individually for each label
# grid = [
#     update_dict(lr_hparams,  {'C':1e-4})
#     ,update_dict(lr_hparams, {'C':1e-3})
#     ,update_dict(lr_hparams, {'C':2.5e-3})
#     ,update_dict(lr_hparams, {'C':5e-3})
#     ,update_dict(lr_hparams, {'C':7.5e-3})
#     ,update_dict(lr_hparams, {'C':1e-2})
#     ,update_dict(lr_hparams, {'C':2.5e-2})
#     ,update_dict(lr_hparams, {'C':5e-2})
#     ,update_dict(lr_hparams, {'C':7.5e-3})
#     ,update_dict(lr_hparams, {'C':1e-2})
#     ,update_dict(lr_hparams, {'C':5e-2})
#     ,update_dict(lr_hparams, {'C':1.0})
#     ,update_dict(lr_hparams, {'C':5.0})
# ]
# multilabel_lr.grid_search(X, y, grid, split_index, logloss)
# hparams = multilabel_lr.hparams_

## OOF predict

In [None]:
hparams = \
[{'C': 0.05}, {'C': 0.0025}, {'C': 0.0001}, {'C': 0.0025}, {'C': 0.001}, {'C': 0.001}, {'C': 0.001}, {'C': 0.0025}, {'C': 0.05}, {'C': 0.005}, {'C': 0.001}, {'C': 0.01}, {'C': 0.05}, {'C': 0.01}, {'C': 0.001}, {'C': 0.01}, {'C': 0.001}, {'C': 0.001}, {'C': 0.001}, {'C': 0.001}, {'C': 0.0025}, {'C': 0.0025}, {'C': 0.0025}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.01}, {'C': 0.01}, {'C': 0.01}, {'C': 0.005}, {'C': 0.0025}, {'C': 0.0025}, {'C': 0.005}, {'C': 0.005}, {'C': 0.025}, {'C': 0.05}, {'C': 0.005}, {'C': 0.05}, {'C': 0.025}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.01}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.0025}, {'C': 0.0075}, {'C': 0.05}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.005}, {'C': 0.001}, {'C': 0.0075}, {'C': 0.01}, {'C': 0.01}, {'C': 0.001}, {'C': 0.0025}, {'C': 0.005}, {'C': 0.0025}, {'C': 0.005}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.001}, {'C': 0.01}, {'C': 0.0075}, {'C': 0.005}, {'C': 0.025}, {'C': 0.005}, {'C': 0.005}, {'C': 0.0025}, {'C': 0.0025}, {'C': 0.005}, {'C': 0.0025}, {'C': 0.0025}, {'C': 0.0075}, {'C': 0.01}, {'C': 0.01}, {'C': 0.005}, {'C': 0.0025}, {'C': 0.0025}, {'C': 0.0025}, {'C': 0.025}, {'C': 0.005}, {'C': 0.0025}, {'C': 0.005}, {'C': 0.0075}, {'C': 0.025}, {'C': 0.0025}, {'C': 0.0075}, {'C': 0.01}, {'C': 0.025}, {'C': 0.001}, {'C': 0.0075}, {'C': 0.0025}, {'C': 0.001}, {'C': 0.01}, {'C': 0.01}, {'C': 0.01}, {'C': 0.001}, {'C': 0.001}, {'C': 0.0025}, {'C': 0.0075}, {'C': 0.0025}, {'C': 0.01}, {'C': 0.001}, {'C': 0.001}, {'C': 0.05}, {'C': 0.01}, {'C': 0.005}, {'C': 0.025}, {'C': 0.01}, {'C': 0.01}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.005}, {'C': 0.0025}, {'C': 0.01}, {'C': 0.005}, {'C': 0.025}, {'C': 0.025}, {'C': 0.01}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.001}, {'C': 0.005}, {'C': 0.025}, {'C': 0.05}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.0025}, {'C': 0.001}, {'C': 0.001}, {'C': 0.025}, {'C': 0.0075}, {'C': 0.0025}, {'C': 0.005}, {'C': 0.005}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.005}, {'C': 0.001}, {'C': 0.01}, {'C': 0.01}, {'C': 0.001}, {'C': 0.0025}, {'C': 0.005}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.01}, {'C': 0.01}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.005}, {'C': 0.0075}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.001}, {'C': 0.01}, {'C': 0.001}, {'C': 0.0075}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.001}, {'C': 0.05}, {'C': 0.001}, {'C': 0.01}, {'C': 0.005}, {'C': 0.0001}, {'C': 0.005}, {'C': 0.05}, {'C': 0.01}, {'C': 0.025}, {'C': 0.01}, {'C': 0.01}, {'C': 0.005}, {'C': 0.0025}, {'C': 0.001}, {'C': 0.0025}, {'C': 0.005}, {'C': 0.0025}, {'C': 0.0025}, {'C': 0.01}, {'C': 0.001}, {'C': 0.005}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.025}, {'C': 0.0075}, {'C': 0.0075}, {'C': 0.001}, {'C': 0.005}, {'C': 0.0075}, {'C': 0.01}, {'C': 0.001}, {'C': 0.0075}, {'C': 0.01}, {'C': 0.005}, {'C': 0.01}, {'C': 0.0025}, {'C': 0.01}, {'C': 0.01}, {'C': 0.001}, {'C': 0.05}, {'C': 0.005}]
joblib.dump(hparams, 'hparams.pkl');

In [None]:
%%time
estimator_params = \
dict(
    estimator=LogisticRegression,
    hparams=hparams,
    verbose=0
)
oof_model = OOFTrainer(MultiLabel, estimator_params, logloss, save_models=False)
oof_model.fit(X, y, test, split_index, verbose=False)
print(f'logloss={logloss(y, oof_model.train_prediction_):.6f}')
joblib.dump(oof_model, "oof.pkl");

## Post-processing

In [None]:
# # hack logloss 1: filter
# n = y.shape[1]
# t_lower = [0] * n

# for i in tqdm(range(n)):
#     baseline = logloss(y[:, i], oof_model.train_prediction_[:, i])
#     best_loss = baseline
#     pred = oof_model.train_prediction_[:, i].copy()
#     for t in [1e-7, 1e-6, 1e-5] + list(np.arange(1e-4, 1e-2, 1e-4)):
#         pred = np.where(pred <= t, 0, pred)
#         loss = logloss(y[:, i], pred)
#         if loss < best_loss:
#             t_lower[i] = t
#             best_loss = loss
            
# filt_pred = oof_model.train_prediction_.copy()
# test_pred = oof_model.test_prediction_.copy()

# for i,tl in zip(range(n), t_lower):
#     pred = filt_pred[:, i].copy()
#     filt_pred[:, i] = np.where(pred <= tl, 0, pred)
#     pred = test_pred[:, i].copy()
#     test_pred[:, i] = np.where(pred <= tl, 0, pred)

# print(f'post processing: logloss={logloss(y, filt_pred):.6f}')
# joblib.dump(t_lower, 't_lower.pkl');
# submit_preds(test_pred, submission, test_control, classnames)

In [None]:
# submit
submit_preds(oof_model.test_prediction_, submission, test_control, classnames)

In [None]:
pd.read_csv('submission.csv').head().iloc[:, :8]