In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train = pd.read_csv("../input/tabular-playground-series-aug-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-aug-2021/test.csv")

In [None]:
x_train = train.drop(['id','loss'],axis=1)
y_train = train['loss']
test_id = test['id']
x_test = test.drop(['id'],axis=1)

In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from warnings import filterwarnings

filterwarnings("ignore", category=DeprecationWarning) 
filterwarnings("ignore", category=FutureWarning) 
filterwarnings("ignore", category=UserWarning)

In [None]:
def check_rmse(model,x_val,y_val):
    pred = model.predict(x_val)
    return np.sqrt(mean_squared_error(y_val,pred))

In [None]:
X_train,X_valid,Y_train,Y_valid = train_test_split(x_train,y_train,random_state=26)

In [None]:
%%time
base_model = XGBRegressor(n_estimators=2000,learning_rate=0.08, colsample_bytree= 0.22, 
                     subsample=0.99, random_state=1, reg_alpha = 19,tree_method = 'gpu_hist')

base_model.fit(X_train, Y_train, early_stopping_rounds = 70, eval_set=[(X_valid, Y_valid)], verbose=False)

In [None]:
print(f"Base Model rmse : {check_rmse(base_model,X_valid,Y_valid)}")

In [None]:
from sklearn.model_selection import GridSearchCV
def  Hyperparameter_tuning(params):
    params = params
    model = XGBRegressor(n_estimators=3000,learning_rate=0.14,
                             colsample_bytree= 0.5, subsample=0.99, random_state=1,
                             reg_alpha = 19, tree_method = 'gpu_hist')
    model_cv = GridSearchCV(estimator=model, 
                           param_grid=params,
                           scoring='neg_mean_absolute_error', 
                           verbose=False)
    model_cv.fit(X_train, Y_train)
    print("Best parameters:", model_cv.best_params_)
    print("Lowest MAE: ", (-model_cv.best_score_))
    print(f"RMSE on valid = {check_rmse(model_cv,X_valid,Y_valid)}")
    return model_cv.best_params_

In [None]:
param = { 'max_depth': list(range(2,5)),
           'learning_rate': list(np.arange(0.07, 0.13, 0.005)),
           'colsample_bytree': [ 0.24,0.2,0.22]}

In [None]:
%%time
best_params = Hyperparameter_tuning(param)

In [None]:
tuned_model = XGBRegressor(n_estimators=3000,
                          learning_rate=best_params['learning_rate'],
                          colsample_bytree= best_params['colsample_bytree'],
                          max_depth=best_params['max_depth'],
                     subsample=0.99, random_state=1, reg_alpha = 19,tree_method = 'gpu_hist')

tuned_model.fit(X_train, Y_train, early_stopping_rounds = 50, eval_set=[(X_valid, Y_valid)], verbose=False)

In [None]:
check_rmse(tuned_model,X_valid,Y_valid)

In [None]:
def submission(model,filename):
    pred = model.predict(x_test)
    pred = pd.DataFrame(pred,columns=['loss'])
    sub = pd.concat([test_id,pred],axis=1)
    sub.set_index('id',inplace=True)
    sub.to_csv(f"Submission_file_{filename}.csv")

In [None]:
submission(base_model,"base_xgboost")

In [None]:
submission(tuned_model,"tuned_xgboost")