In [None]:
!pip install pytorch-tabnet
!pip install rgf_python

In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder, MinMaxScaler, RobustScaler, QuantileTransformer
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, KFold
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge, BayesianRidge, LinearRegression, ElasticNet
from scipy.optimize import minimize
from pytorch_tabnet.tab_model import TabNetRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.tree import DecisionTreeRegressor
from rgf.sklearn import RGFRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn import model_selection
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import category_encoders as ce
import catboost
import lightgbm as lgbm
import xgboost as xgb
import optuna
import tqdm
import pickle
import warnings
import sklearn.exceptions
warnings.filterwarnings('ignore', category=UserWarning)

In [None]:
input_dir = Path('../input/tabular-playground-series-aug-2021/')
train_df = pd.read_csv(input_dir / 'train.csv')
test_df = pd.read_csv(input_dir / 'test.csv')
sample_submission = pd.read_csv(input_dir / 'sample_submission.csv')

In [None]:
X = train_df.drop(['id', 'loss'], axis=1).values
y = train_df['loss'].values
X_test = test_df.drop(['id'], axis=1).values

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_test = scaler.transform(X_test)

# XGBoost Hyperparameter Tuning with Optuna

In [None]:
def objectivexgb(trial):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

    params = {
        'max_depth': trial.suggest_int('max_depth', 6, 12),
        'subsample': trial.suggest_discrete_uniform('subsample', 0.05, 1.0, 0.1),
        'n_estimators': trial.suggest_int('n_estimators', 500, 2000, 100),
        'eta': trial.suggest_discrete_uniform('eta', 0.01, 0.1, 0.01),
        'learning_rate': trial.suggest_discrete_uniform('learning_rate', 0.01, 0.1, 0.01),
        'reg_alpha': trial.suggest_int('reg_alpha', 1, 50),
        'reg_lambda': trial.suggest_int('reg_lambda', 5, 100),
        'min_child_weight': trial.suggest_int('min_child_weight', 5, 20),
    }

    reg = xgb.XGBRegressor(tree_method='gpu_hist', **params)
    reg.fit(X_train, y_train,eval_set=[(X_valid, y_valid)], eval_metric='rmse',verbose=False)
    
    y_preds = reg.predict(X_valid)
    loss = np.sqrt(mean_squared_error(y_valid, y_preds))
    
    return loss

In [None]:
study = optuna.create_study(direction='minimize', study_name='XGBoostOptuna')
study.optimize(objectivexgb, n_trials=50)

print('Number of finished trials:', len(study.trials))
print('Best trial: score {}, params {}'.format(study.best_trial.value, study.best_trial.params))

In [None]:
xgb_params = study.best_trial.params
xgb_params['objective'] = 'reg:squarederror'

In [None]:
def objectivecatb(trial):
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)
    params = {'iterations':trial.suggest_int("iterations", 1000, 12000),
              'od_wait':trial.suggest_int('od_wait', 500, 2000),
              'loss_function':'RMSE',
              'task_type':"GPU",
              'eval_metric':'RMSE',
              'learning_rate' : trial.suggest_uniform('learning_rate',0.01,0.3),
              'reg_lambda': trial.suggest_uniform('reg_lambda',1e-5,100),
              'subsample': trial.suggest_uniform('subsample',0,1),
              'random_strength': trial.suggest_uniform('random_strength',1,50),
              'depth': trial.suggest_int('depth',3,14),
              'min_data_in_leaf': trial.suggest_int('min_data_in_leaf',1,30),
              'leaf_estimation_iterations': trial.suggest_int('leaf_estimation_iterations',1,15),
               }
    model = CatBoostRegressor(**params)  
    model.fit(X_train,y_train,eval_set=[(X_test,y_test)],early_stopping_rounds=100,verbose=False)
        
    y_preds = model.predict(X_test)
    loss = np.sqrt(mean_squared_error(y_test, y_preds))
    
    return loss


In [None]:
study2 = optuna.create_study(direction='minimize', study_name='CatBoostOptuna')
study2.optimize(objectivecatb, n_trials=50)

print('Number of finished trials:', len(study2.trials))
print('Best trial: score {}, params {}'.format(study2.best_trial.value, study2.best_trial.params))

In [None]:
catb_params = study2.best_trial.params
catb_params['loss_function'] = 'RMSE'
catb_params['eval_metric'] = 'RMSE'
catb_params['leaf_estimation_method'] = 'Newton'
catb_params['random_state'] = 42

# LightGBM Hyperparameter Tuning with Optuna

In [None]:
def objectivelgbm(trial):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,random_state=42)
    params = {
        "metric": "RMSE",
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'learning_rate' : trial.suggest_uniform('learning_rate',0.01,0.5),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.1, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.1, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 0, 15),
        'min_child_samples': trial.suggest_int('min_child_samples', 1, 100),
        'num_threads': trial.suggest_int('num_threads', 1, 10),
        "verbosity": -1,
        "boosting_type": "gbdt",
    }
    model = lgbm.LGBMRegressor(**params,device = 'gpu',random_state=42)
    model.fit(X_train,y_train,eval_set=[(X_test,y_test)],verbose = False)
        
    y_preds = model.predict(X_test)
    loss = np.sqrt(mean_squared_error(y_test, y_preds))
    
    return loss

In [None]:
study3 = optuna.create_study(direction='minimize', study_name='LGBMOptuna')
study3.optimize(objectivelgbm, n_trials=50)

print('Number of finished trials:', len(study3.trials))
print('Best trial: score {}, params {}'.format(study3.best_trial.value, study3.best_trial.params))

In [None]:
lgb_params=study3.best_trial.params

# Tuned parameter results

In [None]:
xgb_params

In [None]:
catb_params

In [None]:
lgb_params

In [None]:
#Previous results are gathered after many hours of tuning.

#You can also use parameters below.

xgb_params={'max_depth': 11,
 'subsample': 0.6500000000000001,
 'n_estimators': 1700,
 'eta': 0.02,
 'learning_rate': 0.01,
 'reg_alpha': 7,
 'reg_lambda': 32,
 'min_child_weight': 19,
 'objective': 'reg:squarederror'}

catb_params={'iterations': 8195,
 'od_wait': 2000,
 'learning_rate': 0.01039421755643651,
 'reg_lambda': 95.14582565179668,
 'subsample': 0.6044381624463067,
 'random_strength': 15.077418882976177,
 'depth': 12,
 'min_data_in_leaf': 5,
 'leaf_estimation_iterations': 4,
 'loss_function': 'RMSE',
 'eval_metric': 'RMSE',
 'leaf_estimation_method': 'Newton',
 'random_state': 42}


lgb_params={'lambda_l1': 0.19673487505279366,
 'lambda_l2': 6.205681774095499e-05,
 'num_leaves': 20,
 'learning_rate': 0.1229039615047327,
 'feature_fraction': 0.8566649457461354,
 'bagging_fraction': 0.9999164419693399,
 'bagging_freq': 10,
 'min_child_samples': 92,
 'num_threads': 5}

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)
final_test_preds = []

X = pd.DataFrame(X)
y = pd.DataFrame(y)

xgb_base_model = xgb.XGBRegressor(**xgb_params, gpu_id=0, tree_method = 'gpu_hist')
ctb_base_model = catboost.CatBoostRegressor(**catb_params, task_type='GPU')
lgb_base_model = lgbm.LGBMRegressor(**lgb_params, device = 'gpu', gpu_platform_id = 0, gpu_device_id = 0)

meta_estimator1 = LinearRegression()
meta_estimator2 = BayesianRidge()
meta_estimator3 = ElasticNet()
final_estimator = Ridge()

for fold, (train_idx, test_idx) in enumerate(kf.split(X, y)):
    tbn_base_model = TabNetRegressor(verbose=0)
    print('*'*15, f'Fold {fold+1}', '*'*15, '\n')
    print('Stage 1 Training/Predictions', '\n')
    X_train, X_valid = X.iloc[train_idx].to_numpy(), X.iloc[test_idx].to_numpy()
    y_train, y_valid = y.iloc[train_idx].to_numpy(), y.iloc[test_idx].to_numpy()
    
    tbn_base_model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], patience=3, )
    print(f'Stage 1 Model 1: TabNet Regressor | Fold {fold+1} Loss: {mean_squared_error(y_valid, tbn_base_model.predict(X_valid), squared=False)}')
    
    y_train, y_valid = y_train.squeeze(), y_valid.squeeze()
    
    lgb_base_model.fit(X_train, y_train)
    print(f'Stage 1 Model 2: LightGBM Regressor | Fold {fold+1} Loss: {mean_squared_error(y_valid, lgb_base_model.predict(X_valid), squared=False)}')
    
    ctb_base_model.fit(X_train, y_train, verbose=False)
    print(f'Stage 1 Model 3: CatBoost Regressor | Fold {fold+1} Loss: {mean_squared_error(y_valid, ctb_base_model.predict(X_valid), squared=False)}')
    
    xgb_base_model.fit(X_train, y_train, verbose=False)
    print(f'Stage 1 Model 4: XGBoost Regressor | Fold {fold+1} Loss: {mean_squared_error(y_valid, xgb_base_model.predict(X_valid), squared=False)}')
    
    print('\n', '*'*15, 'Stage 2 Training/Predictions', '*'*15, '\n')
    
    blend_train = np.c_[lgb_base_model.predict(X_valid), ctb_base_model.predict(X_valid), xgb_base_model.predict(X_valid), tbn_base_model.predict(X_valid)]
    blend_test = np.c_[lgb_base_model.predict(X_test), ctb_base_model.predict(X_test), xgb_base_model.predict(X_test), tbn_base_model.predict(X_test)]
    meta_estimator1.fit(blend_train, y_valid)
    meta_valid1 = meta_estimator1.predict(blend_train)
    meta_test1 = meta_estimator1.predict(blend_test)
    
    print(f'Meta Estimator 1: Linear Regression | Score: {mean_squared_error(y_valid, meta_valid1, squared=False)}')
    
    meta_estimator2.fit(blend_train, y_valid)
    meta_valid2 = meta_estimator2.predict(blend_train)
    meta_test2 = meta_estimator2.predict(blend_test)

    print(f'Meta Estimator 2: Bayesian Ridge Regressor | Score: {mean_squared_error(y_valid, meta_valid2, squared=False)}')
    
    meta_estimator3.fit(blend_train, y_valid)
    meta_valid3 = meta_estimator3.predict(blend_train)
    meta_test3 = meta_estimator3.predict(blend_test)
    
    print(f'Meta Estimator 3: ElasticNet Regressor | Score: {mean_squared_error(y_valid, meta_valid3, squared=False)}')
    
    print('\n', '*'*15, 'Stage 3 Training/Predictions', '*'*15, '\n')
    
    blend_train = np.c_[meta_valid1, meta_valid2, meta_valid3]
    blend_test = np.c_[meta_test1, meta_test2, meta_test3]
    final_estimator.fit(blend_train, y_valid)
    print(f'Final Meta Estimator: Ridge Regressor | Score: {mean_squared_error(y_valid, final_estimator.predict(blend_train), squared=False)}')
    final_test_preds.append(final_estimator.predict(blend_test))
    print('\n')

# Final results

In [None]:
sample_submission['loss'] = sum(final_test_preds)/5
sample_submission.to_csv('submission.csv', index=False)

In [None]:
# It has public score of 7.87552

* Thank you for this valuable contribution for Stacking Ensemble Method! Please upvote notebook below!

[Two Stage Stacking Ensemble](https://www.kaggle.com/ryanbarretto/two-stage-stacking-ensemble)