In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Import Relavant Libraries

In [None]:
# plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns
# ML Libraries
from xgboost import XGBRegressor
# Omptimization Library
import optuna
# Sk-Learn metrics and model selection
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_squared_error, make_scorer

## Import Data

In [None]:
X_train = pd.read_csv("/kaggle/input/tabular-playground-series-aug-2021/train.csv")
X_test  = pd.read_csv("/kaggle/input/tabular-playground-series-aug-2021/test.csv")
sub     = pd.read_csv("/kaggle/input/tabular-playground-series-aug-2021/sample_submission.csv")

## Make train, test DataFrame and target Series

In [None]:
y_train = X_train.loss

X_train.drop(labels=['id', 'loss'], axis=1, inplace=True)

X_test.drop( labels=['id'], axis=1, inplace=True)

# Model Creation
## Make Custom Root Mean Squares Error scorer

In [None]:
root_mean_square_error = make_scorer(mean_squared_error, greater_is_better=False, **{'squared' : False})

## Make Shuffled Kfold With Random Seed For Repeatablity

In [None]:
sKF = KFold(n_splits=4, shuffle=True, random_state=3141)

## Optimization of XGBoostRegressor Params with Optuna

In [None]:
def Objective(trial):
    # The Paramameter Dict
    param_grid = {
        'n_estimators'    : trial.suggest_int('n_estimators', 100, 5000),
        'max_depth'       : trial.suggest_int('max_depth', 3, 15),
        'learning_rate'   : trial.suggest_float('learning_rate',0.0001, 2, log=True),
        'gamma'           : trial.suggest_float('gamma', 0, 1),
        'subsample'       : trial.suggest_float('subsample', 0.2, 1),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.2, 1),
        'reg_alpha'       : trial.suggest_float('reg_alpha', 0, 100),
        'reg_lambda'      : trial.suggest_float('reg_lambda', 0, 100),
        'tree_method'     : 'gpu_hist',
        'random_state'    : 3141,
        'verbosity'       : 1,
    }
    # Form xgb model with param dict fed.
    regr  = XGBRegressor(**param_grid)
    # mean of 4 Kfold shuffeled neg RMSE 
    SCORE = cross_val_score(regr,X_train,y_train, scoring=root_mean_square_error, cv=sKF).mean()
    return SCORE

In [None]:
# Create a Optuna stydy. for maximizing neg RMSE
study = optuna.create_study(direction='maximize')

## From Model with Optimal Parameters

In [None]:
%%time

# Forming Param Dict
final_param_list = {
    'n_estimators'    : 3493,
    'max_depth'       : 7,
    'learning_rate'   : 0.006848867349350412,
    'gamma'           : 0.6737971005366712,
    'subsample'       : 0.6101403456639767,
    'colsample_bytree': 0.6053687986044185,
    'reg_alpha'       : 87.3846454719566,
    'reg_lambda'      : 65.78414200721654,
    'tree_method'     : 'gpu_hist',
    'random_state'    : 3141,
    'verbosity'       : 1,
} # best_scaore : 7.838040400806658

final_param_list_2 = {
    'n_estimators'    : 2496,
    'max_depth'       : 9,
    'learning_rate'   : 0.01107053835431393,
    'gamma'           : 0.028684498045221676,
    'subsample'       : 0.7037432500659588,
    'colsample_bytree': 0.2755633622133354,
    'reg_alpha'       : 99.26591991981083,
    'reg_lambda'      : 91.6583006084785,
    'tree_method'     : 'gpu_hist',
    'random_state'    : 2021,
    'verbosity'       : 1,
}

# Form class instance of xgb regr
regr_model = XGBRegressor(**final_param_list)

# fit Data on training Dataset
regr_model.fit(X_train, y_train)

In [None]:
%%time

# Predict target on test data
y_pred = regr_model.predict(X_test)

In [None]:
# Form FataFrame
sub.loss = y_pred

In [None]:
sub.to_csv('submission.csv', index=False)