- Sequential correction of predecessor's errors.
- Does not tweak the weights of training instances, unlike AdaBoost
- Fit each predictor is trained using its predecessor's residual errors as labels.
- Gradient Boosted Trees: a decision tree is used as a base learner.


### Import modules

In [90]:
# Manipulation
import numpy as np
import pandas as pd
# Visualization
import seaborn as sns
import matplotlib.pyplot as plt
# Selection
from sklearn.model_selection import train_test_split
# Metrics
from sklearn.metrics import mean_squared_error as MSE
# Models
from sklearn.tree import DecisionTreeClassifier
# Ensemble
from sklearn.ensemble import GradientBoostingRegressor

### Load data

In [91]:
# Set seed for reproducibility
SEED=1

# read csv into df
df = pd.read_csv('bikes.csv')

# Get features of interest and target labels
X = df.drop('cnt',axis=1)
y = df['cnt']

# Split dataset into 80% train, 20% test
X_train, X_test, y_train, y_test= train_test_split(X, y, 
                                                   test_size=0.2, 
                                                   random_state=SEED)

### Define the Gradient Boosting regressor

In [92]:
# Instantiate gb
gb = GradientBoostingRegressor(n_estimators=200, 
            max_depth=4,
            random_state=2)

### Train the GB classifier

In [86]:
# Fit gb to the training set
gb.fit(X_train, y_train)

# Predict test set labels
y_pred = gb.predict(X_test)

### Evaluate the GB classifier

In [88]:
# Compute MSE
mse_test = MSE(y_test, y_pred)

# Compute RMSE
rmse_test = mse_test ** (1/2)

# Print RMSE
print('Test set RMSE of gb: {:.3f}'.format(rmse_test))

Test set RMSE of gb: 43.113
