This notebook contains the final submission to my first Kaggle competition, [Tabular Playground Series - Feb 2021 competition.](http://https://www.kaggle.com/c/tabular-playground-series-feb-2021)

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder

from sklearn.dummy import DummyRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
        
input_path = Path('/kaggle/input/tabular-playground-series-feb-2021/')

# Read in the data files

In [None]:
train = pd.read_csv(input_path / 'train.csv', index_col='id')
display(train.head())

In [None]:
test = pd.read_csv(input_path / 'test.csv', index_col='id')
display(test.head())

In [None]:
submission = pd.read_csv(input_path / 'sample_submission.csv', index_col='id')
display(submission.head())

## Encode the categoricals.


In [None]:
for c in train.columns:
    if train[c].dtype=='object': 
        lbl = LabelEncoder()
        lbl.fit(list(train[c].values) + list(test[c].values))
        train[c] = lbl.transform(train[c].values)
        test[c] = lbl.transform(test[c].values)
        
display(train.head())

## Pull out the target, and make a validation split

In [None]:
target = train.pop('target')
X_train, X_test, y_train, y_test = train_test_split(train, target, train_size=0.60)

# XGBoost model
and optimise model using GridSearchCV

In [None]:
from xgboost import XGBRegressor
from xgboost import XGBClassifier
from sklearn.metrics import mean_absolute_error 
from sklearn.model_selection import GridSearchCV
 
xgb_model = XGBRegressor(n_estimators=1000,learning_rate=0.05,max_depth=4)
optimization_dict = {'min_child_weight':range(1,6,2)}

model = GridSearchCV(xgb_model, optimization_dict, 
                     scoring='neg_mean_squared_error', verbose=1)
model.fit(X_train,y_train)
print(model.best_score_)
print(model.best_params_)

#'n_estimators':[250,300,350]
#'min_child_weight':range(1,6,2)
#'max_depth':[2,4,6]

Run model on the test data and save the results to a file 

In [None]:
from xgboost import XGBRegressor
from xgboost import XGBClassifier
from sklearn.metrics import mean_absolute_error

my_model = XGBRegressor(n_estimators=1000, learning_rate=0.05,max_depth=4,min_child_weight=5)
my_model.fit(X_train, y_train, 
             early_stopping_rounds=10, 
             eval_set = [(X_test, y_test)], 
             verbose = False)

predictions = my_model.predict(X_test)
print("RMSE"+ str(mean_squared_error(predictions,y_test)))

submission['target'] = my_model.predict(test)
submission.to_csv('xgbregressor.csv')