In [None]:
import pandas as pd
# import time
import numpy as np
# import seaborn as sns
import matplotlib.pyplot as plt

import xgboost as xgb
from sklearn.model_selection import train_test_split

# Train a base decision tree regressor model on the data
from sklearn.tree import DecisionTreeRegressor

import warnings
warnings.filterwarnings('ignore')

### Load Boston Dataset

In [None]:
from sklearn.datasets import load_boston
boston = load_boston()

In [None]:
bos = pd.DataFrame(boston.data)
bos.columns = boston.feature_names
bos['PRICE'] = boston.target
bos.head()

### Train Test Split

In [None]:
# Split Train/Test Set
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(bos.drop(["PRICE"], axis=1), bos["PRICE"],random_state=10, test_size=0.25)

In [None]:
# Examine our dataset
X_train_2.head()

In [None]:
# Examine shape of the dataset
X_train_2.shape

### Check for any missing values

In [None]:
# Check for any missing values
X_train_2.isnull().any()

### Tree Ensemble (Boosting)

In [None]:
# Train first base decision tree regressor model on the data
from sklearn.tree import DecisionTreeRegressor
# Fit model
tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(X_train_2,y_train_2)

In [None]:
# Compute errors/residuals on first tree
r1 = y_train_2 - tree_reg1.predict(X_train_2)

In [None]:
# Train second decision tree regressor model on the data
tree_reg2 = DecisionTreeRegressor(max_depth=2)
tree_reg2.fit(X_train_2,r1)

In [None]:
# Compute errors/residuals on second tree
r2 = r1 - tree_reg2.predict(X_train_2)

In [None]:
# Train third decision tree regressor model on the data
tree_reg3 = DecisionTreeRegressor(max_depth=2)
tree_reg3.fit(X_train_2,r2)

### ENSEMBLE: Combine all three tree predictions

In [None]:
# Add up the predictions of each tree model, which is our ensemble of three trees
y_pred = sum(tree.predict(X_train_2) for tree in (tree_reg1, tree_reg2, tree_reg3))

In [None]:
# First 10 ENSEMBLE Predictions
y_pred[:10]

In [None]:
#actual values
y_train_2[:10]

In [None]:
# model prediction
tree_reg1.predict(X_train_2)[:10]

In [None]:
# Create dataframe of all predictions
predictions = pd.DataFrame(tree_reg1.predict(X_train_2)[:10], columns=['Model_1'])
predictions['Model_2'] = pd.DataFrame(tree_reg2.predict(X_train_2)[:10])
predictions['Model_3'] = pd.DataFrame(tree_reg3.predict(X_train_2)[:10])
predictions['Ensemble'] = pd.DataFrame(y_pred[:10])
predictions['Actual'] = y_train_2.head(10).reset_index()['PRICE']

# Display predictions
predictions

In [None]:
errors = []
for n_estimators in [1,2,3,4,5,6,7,8,9,10]:
    clf = xgb.XGBRegressor(max_depth=2, n_estimators=n_estimators)
    clf.fit(X_train_2, y_train_2, verbose=False)
    errors.append(
        {
            'Tree Count': n_estimators,
            'Average Error': np.average(y_train_2 - clf.predict(X_train_2)),
        })
    
n_estimators_lr = pd.DataFrame(errors).set_index('Tree Count').sort_index()
n_estimators_lr

In [None]:
n_estimators_lr.plot(title="Max_Depth Learning Curve")