# An Illustration of Gradient Boosting

In [102]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import export_graphviz
import pydotplus
from sklearn.externals.six import StringIO
from IPython.display import Image
%matplotlib inline

In [104]:
X = np.linspace(0, 20, 201)
y = np.sin(X)
plt.plot(X, y);

### Step 1 

Here, all we are doing is using an average of our values as a model of our data. Note that the "actual function" of our data is a sine curve. We are going to try to approximate that by way of gradient boosting and a decision tree regressor!

In [106]:
plt.plot(X, y)
f0 = y.mean()
plt.hlines(f0, 0, 20);

##### OK, so we have our extremely basic, extremely inaccurate model. Let's go ahead and build off of that.

In [108]:
# Residuals

e0 = y - f0
e0

In [72]:
## Plotting residuals

plt.scatter(X, e0);

### Step 2: fitting a "stump" to the residuals

In [110]:
data = X.reshape(-1, 1)
f1 = DecisionTreeRegressor(max_depth=1)

f1.fit(data, e0)
ensemble_preds_1 = f1.predict(data) + f0

plt.scatter(X, y)
plt.scatter(X, ensemble_preds_1);

In [74]:
dot_file = StringIO()

export_graphviz(f1, out_file=dot_file, filled=True,
               rounded=True)

image=pydotplus.graph_from_dot_data(dot_file.getvalue())
Image(image.create_png())

In [75]:
e0[:29].mean()

In [76]:
e0[29:].mean()

In [77]:
f1.predict(data)

### Step 3: fitting another "stump" to the residuals of the previous model

In [33]:
e1 = np.sin(X) - (f0 + f1.predict(data))

In [78]:
f2 = DecisionTreeRegressor(max_depth=1)
f2.fit(data, e1)
ensemble_preds_2 = f1.predict(data) + f2.predict(data) + f0
plt.scatter(X, e1)
plt.scatter(X, ensemble_preds_2)
plt.title('fitting to residuals (y - (f0(x) + f1(x)))');

In [79]:
plt.plot(X, y)
plt.plot(X, ensemble_preds_2)
plt.title('Model v. our data');

### Fitting yet another "stump"

In [80]:
e2 = np.sin(X) - (f2.predict(data) + f1.predict(data) + f0)
f3 = DecisionTreeRegressor(max_depth=1)
f3.fit(data, e2)
plt.scatter(X, e2)
ensemble_preds_3 = f3.predict(data) + f2.predict(data) + f1.predict(data) + f0
plt.scatter(X, ensemble_preds_3)
plt.title('fitting to residuals (y - (f0(x) + f1(x) + f2(x)))');

In [81]:
plt.plot(X, np.sin(X))
plt.plot(X, ensemble_preds_3)
plt.title('Model v. our data');

### Another one

In [82]:
e3 = np.sin(X) - (f3.predict(data) + f2.predict(data) + f1.predict(data) + f0)
f4 = DecisionTreeRegressor(max_depth=1)
f4.fit(data, e3)
plt.scatter(X, e3)
ensemble_preds_4 = f4.predict(data) + f3.predict(data) + f2.predict(data) + f1.predict(data) + f0
plt.scatter(X, ensemble_preds_4);

In [83]:
plt.plot(X, np.sin(X))
plt.plot(X, ensemble_preds_4)
plt.title('Model v. our data');

### and another

In [84]:
e4 = np.sin(X) - (f4.predict(data) + f3.predict(data) + f2.predict(data) + f1.predict(data) + f0)
f5 = DecisionTreeRegressor(max_depth=1)
f5.fit(data, e4)
plt.scatter(X, e4)
ensemble_preds_5 = f5.predict(data) + f4.predict(data) + f3.predict(data) + f2.predict(data) + f1.predict(data) + f0
plt.scatter(X, ensemble_preds_5);

In [85]:
plt.plot(X, np.sin(X))
plt.plot(X, ensemble_preds_5)
plt.title('Model v. our data');

### Let's make a function already!

In [53]:
def simple_boosting_algorithm(X, y, n_learners, learner, learning_rate, show_each_step=True):
    """Performs a simple ensemble boosting model 
    params: show_each_step - if True, will show with each additional learner"""
    f0 = y.mean()
    residuals = y - f0
    ensemble_predictions = np.full(len(y), fill_value=f0)
    plt.figure(figsize=(20, 10))
    for i in range(n_learners):
        residuals = y - ensemble_predictions
        f = learner.fit(X.reshape(-1,1), residuals)
        ensemble_predictions = learning_rate * f.predict(X.reshape(-1, 1)) + ensemble_predictions
        if show_each_step:
            plt.plot(X, y)
            plt.plot(X, ensemble_predictions)
            
    plt.plot(X, y)
    plt.plot(X, ensemble_predictions)
            
    plt.title('With ' + str(n_learners) + ' learners with a depth of '+ str(learner.max_depth) +\
              ' and a learning rate of '+ str(learning_rate))
    
   

In [86]:
simple_boosting_algorithm(X, y, 1, DecisionTreeRegressor(max_depth=1), 0.001, False)

In [87]:
simple_boosting_algorithm(X, y, 100, DecisionTreeRegressor(max_depth=1), 0.01, False)

In [88]:
simple_boosting_algorithm(X, y, 10000, DecisionTreeRegressor(max_depth=1), 0.001, False)

In [89]:
simple_boosting_algorithm(X, y, 100000, DecisionTreeRegressor(max_depth=1), 0.01, False)

In [63]:
# This takes a while!
simple_boosting_algorithm(X, y, 10000, DecisionTreeRegressor(max_depth=1), 0.01)

In [90]:
simple_boosting_algorithm(X, y, 20, DecisionTreeRegressor(max_depth=1), 0.1)

In [91]:
simple_boosting_algorithm(X, y, 60, DecisionTreeRegressor(max_depth=1), 0.1)

In [92]:
simple_boosting_algorithm(X, y, 80, DecisionTreeRegressor(max_depth=1), 0.1)

In [93]:
simple_boosting_algorithm(X, y, 200, DecisionTreeRegressor(max_depth=1), 0.1)