In [None]:
# !pip install autograd --quiet

In [None]:
import datetime
import pandas as pd
from time import time
import tensorflow as tf
# from autograd import grad
# import autograd.numpy as np
import numpy as np
np.set_printoptions(suppress = True)
from numba import njit
from scipy.optimize import minimize, fsolve
from tqdm.notebook import tqdm

import lightgbm as lgb
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

# Objective Function

In [None]:
def log_loss_metric(y_true, y_pred):
    y_pred_clip = np.clip(y_pred, 1e-15, 1 - 1e-15)
    return - np.mean(y_true * np.log(y_pred_clip) + (1 - y_true) * np.log(1 - y_pred_clip))

def log_loss_numpy(y_pred):
    y_true_ravel = np.asarray(y_true).ravel()
    y_pred = np.asarray(y_pred).ravel()
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = np.where(y_true_ravel == 1, - np.log(y_pred), - np.log(1 - y_pred))
    return loss.mean()

# Model OOF Scores

In [None]:
train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv', index_col = 'sig_id').values
train = pd.read_csv('../input/lish-moa/train_features.csv')
y_true = train_targets.copy()
# y_true = train_targets[train['cp_type'] == 'trt_cp']
print(y_true.shape)

In [None]:
# oof_dict = {
#             'MLP 2': '../input/groupcvbestmlp/MLP_2_oof.npy',##
#             'MLP 3L 2': '../input/groupcvbestmlp/MLP_3L_2_oof.npy',##
#             'MLP 4L 0': '../input/groupcvbestmlp/MLP_4L_0_oof.npy',##
#             'RTN 2': '../input/groupcvbestmlp/RTN_oof.npy',##
#             'TabNet': '../input/groupcv-tabnet/tabnet_oof.npy',##
#            }

oof_dict = {
#             'MLP V2': '../input/groupcv-v2/EModel_oof.npy',##
#             'MLP V3': '../input/groupcv-v3/EModel_Stack_oof.npy', 
#             'MLP': '../input/moa-mlp/EModel_Stack_oof.npy',##
#             'ResNet': '../input/groupcv-v4/EModel_Stack_oof.npy',
#             'RTNS': '../input/groupcv-rethinknet-single/Model0_oof.npy',##
#             'TabNet': '../input/groupcv-tabnet/tabnet_oof.npy',##
#             'TabNet': '../input/groupcv-1803pre-tabnet/tabnet_oof.npy',##
#             'GrowNet': '../input/groupcv-5foldgrownet/grownet_oof.npy',
            'MLP 2': '../input/groupcv-pbestpre-mlp/MLP_2_oof.npy',##
            'MLP 3L 2': '../input/groupcv-pbestpre-mlp/MLP_3L_2_oof.npy',##
            'MLP 4L 0': '../input/groupcv-pbestpre-mlp/MLP_4L_0_oof.npy',##
            'RTN': '../input/groupcv-pbestpre-mlp/RTN_oof.npy',##
            'TabNet': '../input/groupcv-pbestpre-tabnet/tabnet_oof.npy',##
            'GrowNet': '../input/groupcv-7foldpbestpre-grownet/grownet_oof.npy',
            'ResDT': '../input/groupcv-pbestpre-decisiontree/Net_oof.npy',
           }

oof = np.zeros((len(oof_dict), train_targets.shape[0], train_targets.shape[1]))
for i in range(oof.shape[0]):
    oof[i][train['cp_type'] == 'trt_cp'] = np.load(list(oof_dict.values())[i])

In [None]:
log_loss_scores = {}
for n, key in enumerate(oof_dict.keys()):
    score_oof = log_loss_numpy(oof[n])
    log_loss_scores[key] = score_oof
    print(f'{key} OOF:\t', score_oof)

In [None]:
for w in sorted(log_loss_scores, key = log_loss_scores.get, reverse = False):
    print(w, log_loss_scores[w])

# Scipy

In [None]:
def func(weights):
    oof_blend = np.tensordot(weights, oof, axes = ((0), (0))) 
    return log_loss_numpy(oof_blend)

@njit
def grad_func(weights):
    oof_clip = np.minimum(1 - 1e-15, np.maximum(oof, 1e-15))
    gradients = np.zeros(oof.shape[0])
    for i in range(oof.shape[0]):
        a, b, c = y_true, oof_clip[i], np.zeros((oof.shape[1], oof.shape[2]))
        for j in range(oof.shape[0]):
            if j != i:
                c += weights[j] * oof_clip[j]
        gradients[i] = -np.mean((-a*b+(b**2)*weights[i]+b*c)/((b**2)*(weights[i]**2)+2*b*c*weights[i]-b*weights[i]+(c**2)-c))
    return gradients

In [None]:
tol = 1e-10
init_guess = [1 / oof.shape[0]] * oof.shape[0]
bnds = [(0, 1) for _ in range(oof.shape[0])]
cons = {'type': 'eq', 
        'fun': lambda x: np.sum(x) - 1,
        'jac': lambda x: [1] * len(x),
       }

print('Inital Blend OOF:', func(init_guess))
start_time = time()
res_scipy = minimize(fun = func, 
                     x0 = init_guess, 
                     method = 'SLSQP', 
                     jac = grad_func, 
                     bounds = bnds, 
                     constraints = cons, 
                     tol = tol, 
                     options = {'disp': True})
print(f'[{str(datetime.timedelta(seconds = time() - start_time))[2:7]}] Optimised Blend OOF:', res_scipy.fun)
print('Optimised Weights:', res_scipy.x)

In [None]:
weights = np.array(res_scipy.x)
weights = np.where(weights <= 1e-6, 0., weights)
print(weights.sum())
weights = weights / np.sum(weights)
print(weights)

# Test

In [None]:
# weights = np.array([0.05, 0.05, 0.25, 0.15, 0.2, 0.3])
print(weights.sum())

In [None]:
for n, key in enumerate(oof_dict.keys()):
    print(f'{key} OOF:\t', weights[n])

In [None]:
oof_blend = np.tensordot(weights, oof, axes = ((0), (0))) 
score = log_loss_numpy(oof_blend)
print(score)

In [None]:
# 0.015273820378635507

In [None]:
# train_features = pd.read_csv('../input/lish-moa/train_features.csv')
# tar_nonctr = train_targets[train_features['cp_type'] == 'trt_cp']
# print(log_loss_metric(tar_nonctr, oof_blend[train_features['cp_type'] == 'trt_cp']))

In [None]:
def post_process(pred, low, high):
    pred_copy = pred.copy()
    idx = []
    for i in range(pred_copy.shape[0]):
        flag = np.zeros(pred_copy.shape[1])
        array = pred_copy[i].copy()
        for j in range(pred_copy.shape[1]):
            if (pred_copy[i, j] <= low) or (pred_copy[i, j] >= high):
                flag[j] = 1
            array[j] = round(array[j])
        if flag.all() and pred_copy[i].any(): #array.any()
            pred_copy[i] = array
            idx.append(i)
    return pred_copy, idx

@njit
def post_process_jit(pred, low, high):
    pred_copy = pred.copy()
    for i in range(pred_copy.shape[0]):
        flag = np.zeros(pred_copy.shape[1])
        array = pred_copy[i].copy()
        for j in range(pred_copy.shape[1]):
            if (pred_copy[i, j] <= low) or (pred_copy[i, j] >= high):
                flag[j] = 1
            array[j] = round(array[j])
        if flag.all() and pred_copy[i].any(): #array.any()
            pred_copy[i] = array
    return pred_copy

In [None]:
best_low = np.inf
best_high = 0
best_score = score
for low in tqdm(np.arange(0.001, 0.021, 0.001)):
    for high in np.arange(0.98, 1, 0.001):
#         oof_blend_pp, idx = post_process(oof_blend, low, high)
        oof_blend_pp = post_process_jit(oof_blend, low, high)
        score_pp = log_loss_numpy(oof_blend_pp)
        if score_pp < best_score:
            best_score = score_pp
            best_low = low
            best_high = high
            print(best_low, best_high, best_score, best_score - score)

In [None]:
print(best_low, best_high, best_score, best_score - score)

In [None]:
# 0.015257296886519013

In [None]:
# oof_blend_pp = post_process_jit(oof_blend, 0.015, 0.987)
# score_pp = log_loss_numpy(oof_blend_pp)
# print(score_pp)

In [None]:
# from sklearn.metrics import roc_auc_score

# print(score_pp)
# for average in ['micro', 'macro', 'weighted']:
#     roc_auc = roc_auc_score(y_true, oof_blend_pp, average = average)
#     print(f'{average} ROC AUC Score:\t', roc_auc)

In [None]:
# @njit
# def pp_bycol(pred, low, high):
#     pred_copy = pred.copy()
#     pred_copy[pred_copy <= low] = 0
#     pred_copy[pred_copy >= high] = 1
#     return pred_copy

# @njit
# def pp(pred, low, high):
#     pred_copy = pred.copy()
#     for i in range(pred_copy.shape[1]):
#         pred_copy[:, i] = pp_bycol(pred_copy[:, i], low[i], high[i])
#     return pred_copy

# def log_loss(y_t, y_p):
#     y_pred_clip = np.clip(y_p, 1e-15, 1 - 1e-15)
#     loss = - np.mean(y_t * np.log(y_pred_clip) + (1 - y_t) * np.log(1 - y_pred_clip))
#     return loss

In [None]:
# bins = 50

# best_low = np.zeros(oof_blend.shape[1])
# best_high = np.ones(oof_blend.shape[1])
# for col in tqdm(range(oof_blend.shape[1])):
#     start_time = time()
#     best_score = log_loss(y_true[:, col], oof_blend[:, col])
#     low_bound = oof_blend[:, col].min()
#     high_bound = oof_blend[:, col].max()
#     gap = high_bound - low_bound
#     for low in np.arange(low_bound, high_bound, gap / bins):
#         for high in np.arange(low, high_bound, gap / bins):
#             oof_blend_col_pp = pp_bycol(oof_blend[:, col], low, high)
#             score_pp = log_loss(y_true[:, col], oof_blend_col_pp)
#             if score_pp < best_score:
#                 best_score = score_pp
#                 best_low[col] = low
#                 best_high[col] = high
# #                 print(f'Column {col}:', best_low[col], best_high[col])
#     print(f'[{str(datetime.timedelta(seconds = time() - start_time))[2:7]}] Column {col}:', best_low[col], best_high[col])

In [None]:
# np.save('best_low.npy', best_low)
# np.save('best_high.npy', best_high)

In [None]:
# oof_blend_col_pp = pp(oof_blend, best_low, best_high)
# score_bol_pp = log_loss_numpy(oof_blend_col_pp)
# print(score)
# print(score_bol_pp)
# print(score_bol_pp - score)

# Adversarial Validation

In [None]:
# def preprocess(df):
#     df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
#     df.loc[:, 'cp_time'] = df.loc[:, 'cp_time'].map({24: 0, 48: 1, 72: 2})
#     df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
#     del df['sig_id']
#     return df

In [None]:
# x_train = pd.read_csv('../input/lish-moa/train_features.csv')
# x_test = pd.read_csv('../input/lish-moa/test_features.csv')

# x_train = preprocess(x_train)
# x_test = preprocess(x_test)

# GENES = [col for col in x_train.columns if col.startswith('g-')]
# CELLS = [col for col in x_train.columns if col.startswith('c-')]

In [None]:
# from sklearn.preprocessing import QuantileTransformer

# qt = QuantileTransformer(output_distribution = 'normal', random_state = 42)
# data0 = pd.concat([x_train, x_test])
# qt.fit(data0[GENES+CELLS])

In [None]:
# x_train[GENES+CELLS] = qt.transform(x_train[GENES+CELLS])
# x_test[GENES+CELLS] = qt.transform(x_test[GENES+CELLS])

In [None]:
# np.random.seed(42)

# std = 0.05
# data0[GENES+CELLS] += np.random.normal(0, std, size = data0[GENES+CELLS].shape)
# data0[GENES+CELLS] = qt.transform(data0[GENES+CELLS])

In [None]:
# param = {'objective': 'binary', 
#          'metric': 'binary_logloss', 
#          'device_type': 'cpu', 
#          'num_thread': 4, 
#          'verbosity': -1, 
#          'bagging_fraction': 0.9811046327087707, 
#          'feature_fraction': 0.8933337503617897, 
#          'learning_rate': 0.010969545979403403, 
#          'max_bin': 24, 
#          'max_depth': 26, 
#          'min_data_in_leaf': 51, 
#          'min_sum_hessian_in_leaf': 7.765264256486626, 
#          'num_leaves': 54,                   
#         }

# data = pd.concat([x_train, x_test, data0]).reset_index(drop = True)
# targets = np.zeros(x_train.shape[0] + x_test.shape[0] + data0.shape[0])
# targets[x_train.shape[0] + x_test.shape[0]:] = 1

# res = targets.copy()

# skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 42)
# for n, (tr, te) in enumerate(skf.split(targets, targets)):
#     x_tr, x_val = data.values[tr], data.values[te]
#     y_tr, y_val = targets[tr], targets[te]
    
#     lgb_tr = lgb.Dataset(x_tr, label = y_tr, categorical_feature = [0, 1, 2])
#     lgb_val = lgb.Dataset(x_val, label = y_val, categorical_feature = [0, 1, 2])
    
#     clf = lgb.train(param, lgb_tr, 1000, [lgb_val], ['eval'], early_stopping_rounds = 25, 
#                     verbose_eval = 0, categorical_feature = [0, 1, 2])
    
#     res[te] = clf.predict(x_val)
#     fold_score = roc_auc_score(y_val, res[te])
#     print(f'Fold {n}:\t', fold_score)
    
# oof_score = roc_auc_score(targets, res)
# print('-' * 30)
# print('OOF:\t', oof_score)

In [None]:
# plt.hist(res)
# plt.show()

In [None]:
# length = x_test.shape[0]
# # length = 1000
# res_tr = -res[targets == 1]
# idx = res_tr.argsort()[-length:][::-1]
# print(idx.shape)

In [None]:
# np.save('res.npy', res)
# data.to_csv('data_all.csv', index = False)

In [None]:
# def log_loss_numpy2(y_true2, y_pred2):
#     loss2 = 0
#     y_pred_clip2 = np.clip(y_pred2, 1e-15, 1 - 1e-15)
#     for i in range(y_pred2.shape[1]):
#         loss2 += - np.mean(y_true2[:, i] * np.log(y_pred_clip2[:, i]) + (1 - y_true2[:, i]) * np.log(1 - y_pred_clip2[:, i]))
#     return loss2 / y_pred2.shape[1]

In [None]:
# for i in range(oof.shape[0]):
#     print(log_loss_numpy2(y_true[idx], oof[i][idx]))

# Blending Weights Optimisation

In [None]:
# def Lagrange_func(params):
#     w1, w2, w3, w4, w5, _lambda = params
#     oof_blend = w1 * oof1 + w2 * oof2 + w3 * oof3 + w4 * oof4 + w5 * oof5
#     return log_loss_numpy(oof_blend) - _lambda * (w1 + w2 + w3 + w4 + w5 - 1)

In [None]:
# grad_L = grad(Lagrange_func)

In [None]:
# def Lagrange_obj(params):
#     w1, w2, w3, w4, w5, _lambda = params
#     dLdw1, dLdw2, dLdw3, dLdw4, dLdw5, dLdlam = grad_L(params)
#     return [dLdw1, dLdw2, dLdw3, dLdw4, dLdw5, w1 + w2 + w3 + w4 + w5 - 1]

In [None]:
# start_time = time()
# w1, w2, w3, w4, w5, _lambda = fsolve(Lagrange_obj, [0.1, 0.1, 0.3, 0.3, 0.2, 1.0])
# print(f'[{str(datetime.timedelta(seconds = time() - start_time))[2:7]}] Optimised Weights:', [w1, w2, w3, w4, w5])
# oof_b = w1 * oof1 + w2 * oof2 + w3 * oof3 + w4 * oof4 + w5 * oof5
# print('Optimised Blend OOF:', log_loss_numpy(oof_b))

In [None]:
# print('Check Condition (1a):', w1 + w2 + w3 + w4 + w5 + w6)
# if w1 + w2 + w3 + w4 + w5 + w6 - 1 <= 1e-10:
#     print('Great! The sum of all weights equals to 1!')
# else:
#     print('Manual adjustion is needed to modify the weights.')