In [None]:
import gc
from glob import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

import category_encoders as ce
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from scipy import special
from scipy import optimize

import optuna
from optuna.visualization import (
    plot_contour
    , plot_edf
    , plot_intermediate_values
    , plot_optimization_history
    , plot_parallel_coordinate
    , plot_param_importances
    , plot_slice
)

import sys
sys.path.append("../utils")
from metrics import compute_recall_at4, compute_normalized_gini, compute_amex_metric

np.random.seed(2112)
pd.set_option('display.max_columns', None)

***
## load and prepare data

In [None]:
!ls ../data/processed/dsv02

In [None]:
train = pd.read_parquet("../data/processed/dsv02/train.parquet")
train_labels = pd.read_csv("../data/raw/train_labels.csv", index_col="customer_ID")

In [None]:
input_feats = train.columns.tolist()
categ_feats = [
    'B_30_first', 'B_38_first', 'D_114_first', 'D_116_first', 'D_117_first', 
    'D_120_first', 'D_126_first', 'D_63_first', 'D_64_first', 'D_66_first', 'D_68_first',
    'B_30_last', 'B_38_last', 'D_114_last', 'D_116_last', 'D_117_last', 
    'D_120_last', 'D_126_last', 'D_63_last', 'D_64_last', 'D_66_last', 'D_68_last',
]
len(input_feats)

In [None]:
train = pd.merge(train, train_labels, how="inner", left_index=True, right_index=True)
train = train.reset_index()

del train_labels
gc.collect()

***
## loss tuning

In [None]:
skf = StratifiedKFold(n_splits=3, random_state=2112, shuffle=True)
skf_split = list(skf.split(train, train["target"].values))

In [None]:
model_params = {
    'metric': 'None',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'max_bin': 511,
    'bin_construct_sample_cnt': 100000000,
    'bagging_freq': 1,
    'bagging_fraction': 0.95,
    'feature_fraction': 0.15,
    'lambda_l1': 10.352308845012756,
    'lambda_l2': 1.569788743184169,
    'min_data_in_leaf': 2000,
    'path_smooth': 30.4965047619009,
    'seed': 2112,
    'force_col_wise': True,
    'feature_pre_filter': False,
    'verbosity': -1,
}

In [None]:
class FocalLoss:

    def __init__(self, gamma, alpha=None):
        self.alpha = alpha
        self.gamma = gamma

    def at(self, y):
        if self.alpha is None:
            return np.ones_like(y)
        return np.where(y, self.alpha, 1 - self.alpha)

    def pt(self, y, p):
        p = np.clip(p, 1e-15, 1 - 1e-15)
        return np.where(y, p, 1 - p)

    def __call__(self, y_true, y_pred):
        at = self.at(y_true)
        pt = self.pt(y_true, y_pred)
        return -at * (1 - pt) ** self.gamma * np.log(pt)

    def grad(self, y_true, y_pred):
        y = 2 * y_true - 1  # {0, 1} -> {-1, 1}
        at = self.at(y_true)
        pt = self.pt(y_true, y_pred)
        g = self.gamma
        return at * y * (1 - pt) ** g * (g * pt * np.log(pt) + pt - 1)

    def hess(self, y_true, y_pred):
        y = 2 * y_true - 1  # {0, 1} -> {-1, 1}
        at = self.at(y_true)
        pt = self.pt(y_true, y_pred)
        g = self.gamma

        u = at * y * (1 - pt) ** g
        du = -at * y * g * (1 - pt) ** (g - 1)
        v = g * pt * np.log(pt) + pt - 1
        dv = g * np.log(pt) + g + 1

        return (du * v + u * dv) * y * (pt * (1 - pt))

    def compute_init_score(self, y_true):
        res = optimize.minimize_scalar(
            lambda p: self(y_true, p).sum(),
            bounds=(0, 1),
            method='bounded'
        )
        p = res.x
        self.init_score = np.log(p / (1 - p))

    def lgb_obj(self, preds, train_data):
        y = train_data.get_label()
        p = special.expit(preds)
        return self.grad(y, p), self.hess(y, p)

    def lgb_eval(self, preds, train_data):
        y = train_data.get_label()
        p = special.expit(preds)
        is_higher_better = False
        return 'focal_loss', self(y, p).mean(), is_higher_better

In [None]:
def train_models(
        dataframe:pd.DataFrame, 
        split:list, 
        model_params:dict, 
        fl_params:dict,
        use_init_score:bool,
        num_boost_round:int,
    ) -> pd.DataFrame:
    
    # dataframe to store the oof predictions
    oof = dataframe[["target"]].copy()
    oof["pred"] = -1

    for train_idx,valid_idx in split:
        
        train_df = dataframe.loc[train_idx,:]
        valid_df = dataframe.loc[valid_idx,:]
        
        fl = FocalLoss(**fl_params)
        if use_init_score:
            fl.compute_init_score(train_df.loc[:,"target"].values)
            init_score = np.full_like(train_df.loc[:,"target"].values, fl.init_score)
        else:
            init_score = None
        
        train_dset = lgb.Dataset(
            data=train_df[input_feats],
            label=train_df["target"].values,
            categorical_feature=categ_feats,
            free_raw_data=True,
            init_score=init_score
        )        
        model = lgb.train(
            params=model_params,
            train_set=train_dset,
            fobj=fl.lgb_obj,
            num_boost_round=num_boost_round,
        )
        
        if use_init_score:
            predictions = special.expit(model.predict(valid_df[input_feats]) + fl.init_score)
        else:
            predictions = special.expit(model.predict(valid_df[input_feats]))
        oof.loc[valid_idx,"pred"] = predictions
            
        del train_dset,model
        gc.collect()
    
    return oof

In [None]:
def objective(trial):

    if trial.suggest_categorical("use_alpha", [False, True]):
        alpha = trial.suggest_float('alpha', 0, 1)
    else:
        alpha = None 
    gamma = trial.suggest_float('gamma', 0, 20)
    fl_params = dict(alpha=alpha, gamma=gamma)
    
    use_init_score = trial.suggest_categorical("use_init_score", [False, True])
    num_boost_round = trial.suggest_int('num_boost_round', 1000, 3000, 50)

    oof = train_models(
        train, 
        skf_split, 
        model_params, 
        fl_params, 
        use_init_score,
        num_boost_round,
    )
    metric = compute_amex_metric(oof.target.values, oof.pred.values)
    return metric

In [None]:
do_optimize = True

study = optuna.create_study(
    study_name="lgbm-focal-dsv02",
    direction='maximize',
    storage='sqlite:///lgbm-focal-dsv02.db',
    load_if_exists=True,
)

if do_optimize:
    study.optimize(
        objective, 
        n_trials=1000, 
        timeout=86400, # 1-days
        n_jobs=1, 
        gc_after_trial=True,
    ) 

In [None]:
study.trials_dataframe().sort_values("value", ascending=False).head(20)

In [None]:
plot_optimization_history(study)

In [None]:
plot_param_importances(study)

In [None]:
plot_slice(study)

In [None]:
plot_edf(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
dict(study.best_params)

***