# XGBoost tutorial

This tutorial is based on the blog post "Using XGBoost in Python" by Manish Pathak
https://www.datacamp.com/community/tutorials/xgboost-in-python

In [None]:
from sklearn.datasets import load_boston
boston = load_boston()

In [None]:
print(boston.keys())

In [None]:
print(boston.data.shape)

In [None]:
print(boston.feature_names)

In [None]:
print(boston.DESCR)

In [None]:
import pandas as pd
data = pd.DataFrame(boston.data)
data.columns = boston.feature_names

In [None]:
data.head()

In [None]:
data['PRICE'] = boston.target

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

In [None]:
X, y = data.iloc[:,:-1],data.iloc[:,-1]

In [None]:
data_dmatrix = xgb.DMatrix(data=X,label=y)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
xg_reg = xgb.XGBRegressor(objective ='reg:linear', colsample_bytree = 0.3, learning_rate = 0.1,
                max_depth = 5, alpha = 10, n_estimators = 10)
xg_reg.fit(X_train,y_train)

In [None]:
preds = xg_reg.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, preds))
print("RMSE: %f" % (rmse))

In [None]:
params = {"objective":"reg:linear",'colsample_bytree': 0.3,'learning_rate': 0.1,
                'max_depth': 5, 'alpha': 10}

cv_results = xgb.cv(dtrain=data_dmatrix, params=params, nfold=3,
                    num_boost_round=50,early_stopping_rounds=10,metrics="rmse", as_pandas=True, seed=123)

cv_results.head()

In [None]:
xg_reg = xgb.train(params=params, dtrain=data_dmatrix, num_boost_round=10)

In [None]:
%matplotlib inline  

import matplotlib.pyplot as plt

xgb.plot_tree(xg_reg,num_trees=0)
plt.rcParams['figure.figsize'] = [5000, 1000]
plt.show()

In [None]:
xgb.plot_importance(xg_reg)
plt.rcParams['figure.figsize'] = [5, 5]
plt.show()