In [None]:
# import all libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import explained_variance_score

In [None]:
# import the dataset
df_train = pd.read_csv('../input/tabular-playground-series-aug-2021/train.csv')
df_test = pd.read_csv('../input/tabular-playground-series-aug-2021/test.csv')

In [None]:
df_train.head()

In [None]:
df_test.head()

In [None]:
# train test split the data
X = df_train.drop(['id','loss'], axis=1)
y = df_train['loss'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [None]:
# normalise the data 
scaler = StandardScaler()

# transform the data
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [None]:
# check the data
X_train[0][0:5]

In [None]:
X_test[0][0:5]

In [None]:
# create the XGBoost model
# the params can be modified according to requirement
xgb = XGBRegressor(n_estimators=100, learning_rate=0.1, n_jobs=4)

In [None]:
xgb.fit(X_train, y_train,
       early_stopping_rounds=5,
       eval_set=[(X_test, y_test)],
       verbose=True)

In [None]:
# predict with test data
y_pred = xgb.predict(X_test)

In [None]:
# check the performance with several metrices
print("mean absolute error: ", mean_absolute_error(y_test, y_pred))
print("Root mean squared error: ", np.sqrt(mean_squared_error(y_test, y_pred)))
print("Explained variance: ", explained_variance_score(y_test, y_pred))

In [None]:
# validate the predictions and actual values
# plot predictions
plt.scatter(y_test, y_pred)

#perfect line
plt.plot(y_test, y_test, 'r')

In [None]:
# now predict the test dataset loss with the model
test = df_test.drop('id', axis=1)
test.head()

In [None]:
# normalise the dataset
test_scaled = scaler.fit_transform(test)

In [None]:
# predict the loss
test_pred = xgb.predict(test_scaled)

In [None]:
test_pred[0:5]

In [None]:
# create file for submission
sub = pd.read_csv('../input/tabular-playground-series-aug-2021/sample_submission.csv')

In [None]:
sub['loss'] = test_pred

In [None]:
sub.head()

In [None]:
sub.to_csv("submission.csv", index=False)