 A lot of what I have implemented here is creditted to 
 1. https://www.kaggle.com/michael127001/xgbregressor-with-optuna-tuning
 2. https://www.kaggle.com/pranjalverma08/tps-08-cb-lgbm-xgb-starter
 3. https://www.kaggle.com/dmitryuarov/falling-below-7-87-voting-cb-xgb-lgbm

In [None]:
# import libraries
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from optuna.samplers import TPESampler
from sklearn.preprocessing import RobustScaler, StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, KFold
from xgboost import cv
from sklearn.ensemble import VotingRegressor
import xgboost as xgb
import lightgbm as lgbm
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor


In [None]:
# import data
train_df = pd.read_csv('../input/tabular-playground-series-aug-2021/train.csv')
test_df = pd.read_csv('../input/tabular-playground-series-aug-2021/test.csv')
submission = pd.read_csv('../input/tabular-playground-series-aug-2021/sample_submission.csv')

# separate data
X = train_df.drop(['loss', 'id'], axis=1)
y = train_df['loss']
X_test = test_df.drop(['id'], axis=1)

XGB_OPTUNA = False
CAT_OPTUNA = True
LGBM_OPTUNA = False

EARLY_OPTUNA = 30
EARLY_FIT = 100

In [None]:
# scale data
scaler = StandardScaler()
scaler.fit(pd.concat([X, X_test]))
X = scaler.transform(X)
X_test = scaler.transform(X_test)

In [None]:
def xgb_objective(trial,data=X,target=y):
    X_train, X_valid, y_train, y_valid = train_test_split(data, target, test_size=0.4,random_state=42)
    param_grid = {'max_depth': trial.suggest_int('max_depth', 4, 10),
                  'n_estimators': trial.suggest_int('n_estimators', 1200, 4800, 400), 
                  'eta': trial.suggest_float('eta', 0.006, 0.05),
                  'subsample': trial.suggest_discrete_uniform('subsample', 0.3, 0.9, 0.05),
                  'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1.0, 0.1),
                  'colsample_bylevel': trial.suggest_discrete_uniform('colsample_bylevel', 0.3, 0.7, 0.1),
                  'min_child_weight': trial.suggest_loguniform('min_child_weight', 1e-3, 1e2),
                  'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 1e4),
                  'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 1e4),
                  'gamma': trial.suggest_loguniform('gamma', 1e-6, 1e3)} 
    
    model = xgb.XGBRegressor(tree_method='gpu_hist',
                             predictor='gpu_predictor',
                             n_jobs=4,
                             **param_grid)
    
    model.fit(X_train, y_train,
              eval_set=[(X_valid, y_valid)],
              eval_metric='rmse',
              early_stopping_rounds=EARLY_OPTUNA,
              verbose=False)

    return mean_squared_error(y_valid, model.predict(X_valid), squared=False)

In [None]:
def cat_objective(trial,data=X,target=y):
    X_train, X_valid, y_train, y_valid = train_test_split(data, target, test_size=0.4,random_state=42)
    params = {'iterations':trial.suggest_int("iterations", 1000, 10000),
              'od_wait':trial.suggest_int('od_wait', 500, 2000),
              'learning_rate' : trial.suggest_uniform('learning_rate',0.02,0.5),
              'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 1e2),
              'subsample': trial.suggest_discrete_uniform('subsample', 0.3, 1.0, 0.05),
              'random_strength': trial.suggest_uniform('random_strength',10,50),
              'depth': trial.suggest_int('depth',5,15),
              'min_data_in_leaf': trial.suggest_int('min_data_in_leaf',5,35),
              'max_bin': trial.suggest_int('max_bin', 1, 300),
              'leaf_estimation_iterations': trial.suggest_int('leaf_estimation_iterations',1,15)}
    
    model = CatBoostRegressor(loss_function='RMSE',
                              task_type="GPU",
                              eval_metric='RMSE',
                              leaf_estimation_method='Newton',
                              bootstrap_type= 'Bernoulli',
                              **params)  
    
    model.fit(X_train,y_train,
              eval_set=[(X_valid,y_valid)],
              early_stopping_rounds=EARLY_OPTUNA,
              verbose=False)
        
    return mean_squared_error(y_valid, model.predict(X_valid), squared=False)

In [None]:
def lgbm_objective(trial,data=X,target=y):
    X_train, X_valid, y_train, y_valid = train_test_split(data, target, test_size=0.4,random_state=42)
    params = {
        'reg_alpha': trial.suggest_loguniform("reg_alpha", 1e-3, 1e3),
        'reg_lambda': trial.suggest_loguniform("reg_lambda", 1e-3, 1e3),
        'num_leaves': trial.suggest_int("num_leaves", 100, 500),
        'colsample_bytree': trial.suggest_float("colsample_bytree", 0.4, 1.0),
        'subsample': trial.suggest_float("subsample", 0.3, 0.6),
        'subsample_freq': trial.suggest_int("subsample_freq", 0, 5),
        'min_child_samples': trial.suggest_int("min_child_samples", 5, 80),
        'max_depth': trial.suggest_int('max_depth', 4, 10),
        'n_estimators': trial.suggest_int('n_estimators', 1200, 10000, 400),
        'learning_rate' : trial.suggest_uniform('learning_rate',0.01,0.5)}
    
    model = lgbm.LGBMRegressor(device = 'gpu',
                               boosting_type = 'gbdt',
                               random_state=42,
                               metric= "RMSE",
                               verbosity= -1,
                               n_jobs=-1,
                               **params)
    
    model.fit(X_train,y_train,
              eval_set=[(X_valid,y_valid)],
              early_stopping_rounds=EARLY_OPTUNA,
              verbose = False)
    
    return mean_squared_error(y_valid, model.predict(X_valid), squared=False)

In [None]:
def create_optuna_study(objective, study_name, train_time):
    study = optuna.create_study(direction='minimize', 
                                sampler=TPESampler(), 
                                study_name=study_name)
    study.optimize(objective, 
                   timeout=train_time)
    trial = study.best_trial
    
    print('Number of finished trials: ', len(study.trials))
    print('Best trial:')
    print('\tValue: {}'.format(trial.value))
    print('\tParams: ')
    for key, value in trial.params.items():
        print('\t\t{}: {}'.format(key, value))
    
    return trial, study

In [None]:
# Optimize for 30 minutes
train_time = 1 * 60 * 10

# XGB Optimize
if XGB_OPTUNA:
    xgb_trial, xgb_study = create_optuna_study(xgb_objective, 'XGBRegressor', train_time)
    xgb_params = xgb_trial.params

else:
    # 	Value: 7.845515259148512
    xgb_params = {'max_depth': 10,
                'n_estimators': 4800,
                'eta': 0.006004080448420365,
                'subsample': 0.7,
                'colsample_bytree': 0.8,
                'colsample_bylevel': 0.5,
                'min_child_weight': 26.692885575205427,
                'reg_lambda': 30.107616169341984,
                'reg_alpha': 0.055043751121261995,
                'gamma': 1.7568597718693732e-05}
xgb_params['tree_method'] = 'gpu_hist'
xgb_params['predictor'] = 'gpu_predictor'
xgb_params['n_jobs'] = 4

# Catboost Optimize
if CAT_OPTUNA:
    cat_trial, cat_study = create_optuna_study(cat_objective, 'CatRegressor', train_time)
    cat_params = cat_trial.params
else:
    # 	Value: 7.8525427867211395
    cat_params = {'iterations': 5249,
                    'od_wait': 1455,
                    'learning_rate': 0.027166443415647373,
                    'reg_lambda': 0.034058831747453534,
                    'subsample': 0.8500000000000001,
                    'random_strength': 18.348082769035948,
                    'depth': 5,
                    'min_data_in_leaf': 13,
                    'leaf_estimation_iterations': 7}
cat_params['loss_function'] = 'RMSE'
cat_params['eval_metric'] = 'RMSE'
cat_params['bootstrap_type']= 'Bernoulli'
cat_params['leaf_estimation_method'] = 'Newton'
cat_params['random_state'] = 0
cat_params['task_type']='GPU'


# LGBM Optimize
if LGBM_OPTUNA:
    lgbm_trial, lgbm_study = create_optuna_study(lgbm_objective, 'LGBMRegressor', train_time)
    lgbm_params = lgbm_trial.params
else:
    # 	Value: 7.848009541262737
    lgbm_params = {'reg_alpha': 196.9980774975926,
                'reg_lambda': 1.0086558093083937,
                'num_leaves': 482,
                'colsample_bytree': 0.49585597524837915,
                'subsample': 0.5964181419864539,
                'subsample_freq': 3,
                'min_child_samples': 78,
                'max_depth': 5,
                'n_estimators': 6000,
                'learning_rate': 0.01373955979023822}
            
lgbm_params['metric'] = 'RMSE'
lgbm_params['random_state'] = 0
lgbm_params['device'] = 'gpu'
lgbm_params['n_jobs'] = -1



In [None]:
#optuna.visualization.plot_param_importances(xgb_study)

In [None]:
optuna.visualization.plot_param_importances(cat_study)

In [None]:
#optuna.visualization.plot_param_importances(lgbm_study)

In [None]:
'''optuna.visualization.plot_optimization_history(study)
optuna.visualization.plot_param_importances(study)
optuna.visualization.plot_parallel_coordinate(study)
optuna.visualization.plot_slice(study)'''

In [None]:
test_preds = np.zeros(len(X_test))
n_splits = 10

kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0)

for fold, (train_idx, valid_idx) in enumerate(kf.split(X, y), 1):
    X_train, y_train = X[train_idx], y[train_idx]
    X_valid, y_valid = X[valid_idx], y[valid_idx]
    
    xgb_model = xgb.XGBRegressor(**xgb_params)
    xgb_model.fit(X_train, y_train,
                  eval_set=[(X_valid, y_valid)],
                  early_stopping_rounds=EARLY_FIT,
                  verbose=False)
    
    lgbm_model = LGBMRegressor(**lgbm_params)
    lgbm_model.fit(X_train, y_train,
                   eval_set=[(X_valid, y_valid)],
                   early_stopping_rounds=EARLY_FIT,
                   verbose=False)
    
    cat_model = CatBoostRegressor(**cat_params)
    cat_model.fit(X_train, y_train,
                  eval_set=[(X_valid, y_valid)],
                  early_stopping_rounds=EARLY_FIT,
                  verbose=False)
    
    test_preds += xgb_model.predict(X_test) * 0.7
    test_preds += lgbm_model.predict(X_test) * 0.15
    test_preds += cat_model.predict(X_test) * 0.15
    
    xgb_rmse = mean_squared_error(y_valid, xgb_model.predict(X_valid), squared=False)
    lgbm_rmse = mean_squared_error(y_valid, lgbm_model.predict(X_valid), squared=False)
    cat_rmse = mean_squared_error(y_valid, cat_model.predict(X_valid), squared=False)
    
    print(f'Fold {fold}/{n_splits}\n\txgb: {xgb_rmse}\n\tlbgm: {lgbm_rmse}\n\tcat: {cat_rmse}\n')

test_preds /= n_splits 

submission['loss'] = test_preds
submission.to_csv('submission.csv', index=False)