# Libraries
---

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import pickle
import random
import time
import os

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

from scipy.optimize import minimize

import lightgbm as lgb
import xgboost as xgb
import catboost as ctb
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression

from scipy.optimize import minimize

import matplotlib.pyplot as plt
import seaborn as sns

pd.options.display.max_rows = 120
pd.options.display.max_columns = 100

import warnings
warnings.simplefilter('ignore')

# Parameters
---

In [None]:
N_SPLITS = 10
N_ESTIMATORS = 10000
EARLY_STOPPING_ROUNDS = 200
VERBOSE = 1000
SEED = 299792458

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(SEED)

# Datasets
---

In [None]:
INPUT = Path("../input/tabular-playground-series-aug-2021")
INPUT_PRED = Path("../input/tps08-pred")

train = pd.read_csv(INPUT / "train.csv")
train['pred'] = np.load(INPUT_PRED / "oof.npy")

test = pd.read_csv(INPUT / "test.csv")
test['pred'] = np.load(INPUT_PRED / "pred.npy")

submission = pd.read_csv(INPUT / "sample_submission.csv")

scale_features = [col for col in test.columns if 'f' in col]
features = scale_features + ['pred']
target = 'loss'

In [None]:
mul_list = [('f13', 'f31'),
            ('f13', 'f46'),
            ('f13', 'f74'),
            ('f25', 'f34'),
            ('f25', 'f58'),
            ('f25', 'f63'),
            ('f25', 'f66'),
            ('f25', 'f73'),
            ('f25', 'f96')
           ]

div_list = [('f13', 'f58'),
            ('f25', 'f9'),
            ('f25', 'f12'),
            ('f25', 'f30'),
            ('f25', 'f46'),
            ('f25', 'f64'),
            ('f25', 'f78'),
            ('f25', 'f79'),
            ('f25', 'f84')
           ]

for cols in mul_list:
    train[f'mul_{cols[0]}_{cols[1]}'] = train[cols[0]] * train[cols[1]]
    test[f'mul_{cols[0]}_{cols[1]}'] = test[cols[0]] * test[cols[1]]
    
for cols in div_list:
    train[f'div_{cols[0]}_{cols[1]}'] = train[cols[0]] / train[cols[1]]
    test[f'div_{cols[0]}_{cols[1]}'] = test[cols[0]] / test[cols[1]]
    
fe_features = [col for col in test.columns if ('mul_' in col) or ('div_' in col)]

scale_features += fe_features
features = scale_features + ['pred']

In [None]:
ss = StandardScaler()
train[scale_features] = ss.fit_transform(train[scale_features])
test[scale_features] = ss.transform(test[scale_features])

In [None]:
train.shape, test.shape

# LGBM/XGB/CatBoost
---

In [None]:
lgb_params = {
    'objective': 'regression',
    'metric': 'rmse',
    'n_estimators': N_ESTIMATORS,
    'random_state': SEED,
    'learning_rate': 5e-3,
    'subsample': 0.8,
    'subsample_freq': 1,
    'colsample_bytree': 0.6,
    'reg_alpha': 6.4,
    'reg_lambda': 1.8,
    'min_child_weight': 256,
    'min_child_samples': 20,
    'importance_type': 'gain',
    'device': 'gpu',
    'gpu_platform_id': 0,
    'gpu_device_id': 0
}

xgb_params = {
    'objective': 'reg:squarederror',
    'learning_rate': 5e-3,
    'seed': SEED,
    'subsample': 0.8,
    'colsample_bytree': 0.6,
    'n_estimators': N_ESTIMATORS,
    'max_depth': 11,
    'alpha': 20,
    'lambda': 9,
    'min_child_weight': 256,
    'importance_type': 'total_gain',
    'tree_method': 'gpu_hist'
}

ctb_params = {
    'bootstrap_type': 'Poisson',
    'loss_function': 'RMSE',
    'eval_metric': 'RMSE',
    'random_seed': SEED,
    'task_type': 'GPU',
    'max_depth': 8,
    'learning_rate': 5e-3,
    'n_estimators': N_ESTIMATORS,
    'max_bin': 280,
    'min_data_in_leaf': 64,
    'l2_leaf_reg': 0.01,
    'subsample': 0.8
}

In [None]:
mlp_oof = np.zeros(train.shape[0])
lgb_oof = np.zeros(train.shape[0])
xgb_oof = np.zeros(train.shape[0])
ctb_oof = np.zeros(train.shape[0])

mlp_pred = np.zeros(test.shape[0])
lgb_pred = np.zeros(test.shape[0])
xgb_pred = np.zeros(test.shape[0])
ctb_pred = np.zeros(test.shape[0])

kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)

for fold, (trn_idx, val_idx) in enumerate(kf.split(X=train[features], y=train[target])):
    print(f"===== fold {fold} =====")
    X_train, y_train = train[features].iloc[trn_idx], train[target].iloc[trn_idx]
    X_valid, y_valid = train[features].iloc[val_idx], train[target].iloc[val_idx]
    X_test = test[features]
    
    start = time.time()
    model = MLPRegressor(
        hidden_layer_sizes=50,
        early_stopping=True,
        n_iter_no_change=100,
        solver='adam',
        shuffle=True,
        random_state=SEED
    )
    model.fit(X_train,y_train)

    mlp_oof[val_idx] = model.predict(X_valid)
    mlp_pred += model.predict(X_test) / N_SPLITS
    
    elapsed = time.time() - start
    
    rmse = mean_squared_error(y_valid, mlp_oof[val_idx], squared=False)
    print(f"fold {fold} - mlp rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")
    
    start = time.time()
    model = lgb.LGBMRegressor(**lgb_params)
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_valid, y_valid)],
        eval_metric='rmse',
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE,
    )
    
    lgb_oof[val_idx] = model.predict(X_valid)
    lgb_pred += model.predict(X_test) / N_SPLITS
    
    elapsed = time.time() - start
    rmse = mean_squared_error(y_valid, lgb_oof[val_idx], squared=False)
    print(f"fold {fold} - lgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")

    start = time.time()
    model = xgb.XGBRegressor(**xgb_params)
    model.fit(
        X_train, 
        y_train,
        eval_set=[(X_valid, y_valid)],
        eval_metric='rmse',
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE
    )

    xgb_oof[val_idx] = model.predict(X_valid)
    xgb_pred += model.predict(X_test) / N_SPLITS

    elapsed = time.time() - start
    rmse = mean_squared_error(y_valid, xgb_oof[val_idx], squared=False)
    print(f"fold {fold} - xgb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")

    start = time.time()
    model = ctb.CatBoostRegressor(**ctb_params)
    model.fit(
        X_train,
        y_train,
        eval_set=[(X_valid, y_valid)],
        use_best_model=True,
        early_stopping_rounds=EARLY_STOPPING_ROUNDS,
        verbose=VERBOSE
    )

    ctb_oof[val_idx] = model.predict(X_valid)
    ctb_pred += model.predict(X_test) / N_SPLITS

    elapsed = time.time() - start
    rmse = mean_squared_error(y_valid, ctb_oof[val_idx], squared=False)
    print(f"fold {fold} - ctb rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")
        
print(f"oof mlp_rmse = {mean_squared_error(train[target], mlp_oof, squared=False)}")
print(f"oof lgb_rmse = {mean_squared_error(train[target], lgb_oof, squared=False)}")
print(f"oof xgb_rmse = {mean_squared_error(train[target], xgb_oof, squared=False)}")
print(f"oof ctb_rmse = {mean_squared_error(train[target], ctb_oof, squared=False)}")

np.save("mlp_oof.npy", mlp_oof)
np.save("mlp_pred.npy", mlp_pred)
np.save("lgb_oof.npy", lgb_oof)
np.save("lgb_pred.npy", lgb_pred)
np.save("xgb_oof.npy", xgb_oof)
np.save("xgb_pred.npy", xgb_pred)
np.save("ctb_oof.npy", ctb_oof)
np.save("ctb_pred.npy", ctb_pred)

# Post process
---

## Ensemble

In [None]:
def class_optimizer(X, a0, a1, a2, a3):
    oof = X[0]*a0 + X[1]*a1 + X[2]*a2 + (1-X[0]-X[1]-X[2])*a3
    return mean_squared_error(train[target], oof, squared=False)

res = minimize(
    fun=class_optimizer,
    x0=[0.1, 0.3, 0.3],
    args=tuple([mlp_oof, lgb_oof, xgb_oof, ctb_oof]),
    method='Nelder-Mead',
    options={'maxiter': 300})

print(res)
print(f"coef0 {res.x[0]}, coef1 {res.x[1]}, coef2 {res.x[2]}, coef3 {1-res.x[0]-res.x[1]-res.x[2]}")

In [None]:
ensemble_oof = res.x[0] * mlp_oof + res.x[1] * lgb_oof + res.x[2] * xgb_oof + (1-res.x[0]-res.x[1]-res.x[2]) * ctb_oof
ensemble_pred = res.x[0] * mlp_pred + res.x[1] * lgb_pred + res.x[2] * xgb_pred + (1-res.x[0]-res.x[1]-res.x[2]) * ctb_pred

## Stacking

In [None]:
train_npy = np.concatenate([mlp_oof.reshape(-1, 1),
                            lgb_oof.reshape(-1, 1),
                            xgb_oof.reshape(-1, 1),
                            ctb_oof.reshape(-1, 1),
                            ensemble_oof.reshape(-1, 1)], axis=1)
y_npy = train[target].to_numpy()
test_npy = np.concatenate([mlp_pred.reshape(-1, 1),
                           lgb_pred.reshape(-1, 1),
                           xgb_pred.reshape(-1, 1),
                           ctb_pred.reshape(-1, 1),
                           ensemble_pred.reshape(-1, 1)], axis=1)

In [None]:
stack_oof = np.zeros(train.shape[0])
stack_pred = np.zeros(test.shape[0])

kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)

for fold, (trn_idx, val_idx) in enumerate(kf.split(X=train_npy, y=y_npy)):
    print(f"===== fold {fold} =====")
    X_train, y_train = train_npy[trn_idx], y_npy[trn_idx]
    X_valid, y_valid = train_npy[val_idx], y_npy[val_idx]
    X_test = test_npy

    start = time.time()
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    with open(f"stacking_model{fold}.pkl", 'wb') as f:
        pickle.dump(model, f)
    
    stack_oof[val_idx] = model.predict(X_valid)
    stack_pred += model.predict(X_test) / N_SPLITS
    
    elapsed = time.time() - start
    rmse = mean_squared_error(y_valid, stack_oof[val_idx], squared=False)
    print(f"fold {fold} - stack rmse: {rmse:.6f}, elapsed time: {elapsed:.2f}sec\n")
        
print(f"oof stack_rmse = {mean_squared_error(train[target], stack_oof, squared=False)}")

np.save("stack_oof.npy", stack_oof)
np.save("stack_pred.npy", stack_pred)

## Blending

In [None]:
best_pred = np.load(INPUT_PRED / "best_pred.npy")

final_pred = 0.7 * best_pred + 0.3 * stack_pred

# Submission
---

In [None]:
submission['loss'] = final_pred
submission.to_csv("submission.csv", index=False)

submission