

Source: https://www.linkedin.com/feed/update/urn:li:activity:7174123337449652224/


# How does the training dynamics of Gradient Boosted Models look like?

These models utilize functional gradient descent which means on each iteration a new weak model ($f_k$) is added whose goal is to predict the negative gradient such that for regression case $F_k(X) = F_{k-1}(X) + f_k(X) + \epsilon$, where $F_k(X)$ is the new improved prediction and $F_{k-1}(X)$ is the current prediction.

If $loss = 0.5*(y - F_{k-1}(X))^2$,

then $\large \frac{dL}{dF_{k-1}} = -(y - F_{k-1}(X))$

$\implies f_k(X) = - \large \frac{dL}{dF_{k-1}}$

This can now be generalized to other loss functions.
The new $f_k(X)$ can simply be trained by regression to predict $\large - \frac{dL}{dF_{k-1}}$ which makes training these models easier.

We can visualize the gradients and the evolution of $F_k(X)$ for both classification and regression.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import HistGradientBoostingRegressor
from matplotlib import rc
rc('animation', html='jshtml')

In [None]:
n_samples = 500
rng = np.random.RandomState(0)
X = rng.randn(n_samples, 1)
noise = rng.normal(loc=0.0, scale=0.01, size=n_samples)
y = 5 * X[:, 0] + np.sin(10 * np.pi * X[:, 0]) - noise

In [None]:
model = HistGradientBoostingRegressor().fit(X, y)
y_pred = model.predict(X)

model

In [None]:
plt.plot(X[:, 0], y, "bo", ms=1)
plt.plot(X[:, 0], y_pred, "ro", ms=1)

# gbdt_cst = HistGradientBoostingRegressor(monotonic_cst=[1, 0]).fit(X, y)

In [None]:
y_pred_gen = model.staged_predict(X)
y_pred_gen

In [None]:
y_pred_gen = model.staged_predict(X)

plt.plot(X[:, 0], y, "bo", ms=1)
for i, y_pred_stage in enumerate(y_pred_gen, start=1):
  plt.plot(X[:, 0], y_pred_stage, "ro", ms=1, alpha=0.1)

In [None]:
from matplotlib.animation import FuncAnimation, ArtistAnimation

In [None]:
fig, ax = plt.subplots()
ax.plot(X[:, 0], y, "bo", ms=1)

y_pred_gen = model.staged_predict(X)


points, = ax.plot([], [], "ro", ms=1)
title = ax.set_title("Iteration: 0")
def init():
  points.set_xdata([])
  points.set_ydata([])
  return points,

def update(frame):
  i, y_pred_state = frame
  points.set_xdata(X[:, 0])
  points.set_ydata(y_pred_state)
  title.set_text(f"Iteration: {i}")
  return points,

ani = FuncAnimation(
  fig, update,
  # init_func=init,
  frames=list(enumerate(y_pred_gen, start=1)),
  # interval=100,
  interval=30,
  blit=True,
  repeat_delay=5000
)

ani.save("movie.mp4")

ani

In [None]:
from sklearn.datasets import make_blobs, make_moons
from sklearn.ensemble import HistGradientBoostingClassifier

In [None]:
X, y = make_blobs(n_samples, n_features=2)
X, y = make_moons(noise=0.3, random_state=0)

In [None]:
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
plot_step = 0.1
xx, yy = np.meshgrid(
    np.arange(x_min, x_max, plot_step), np.arange(y_min, y_max, plot_step)
)

In [None]:
model = HistGradientBoostingClassifier().fit(X, y)

In [None]:
cmap = plt.cm.RdYlBu
cmap_listed = plt.matplotlib.colors.ListedColormap(["r", "y", "b"])
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
cs = plt.contourf(xx, yy, Z, cmap=cmap)

cs.remove()

cs = plt.contourf(xx, yy, Z, cmap=cmap)

plt.scatter(
    X[:, 0],
    X[:, 1],
    c=y,
    cmap=cmap_listed,
    edgecolor="k",
    s=20,
)


In [None]:
cs.remove()

In [None]:
fig, ax = plt.subplots()


cmap = plt.cm.RdYlBu
cmap_listed = plt.matplotlib.colors.ListedColormap(["r", "y", "b"])
y_pred = model.predict(np.c_[xx.ravel(), yy.ravel()])
y_pred = y_pred.reshape(xx.shape)

cs = ax.contourf(xx, yy, y_pred, cmap=cmap)



ax.scatter(
    X[:, 0],
    X[:, 1],
    c=y,
    cmap=cmap_listed,
    edgecolor="k",
    s=20,
)



y_pred_gen = model.staged_predict(np.c_[xx.ravel(), yy.ravel()])

title = ax.set_title("Iteration: 0")


def update(frame):
  global cs
  i, y_pred_state = frame
  # cs.set_array(y_pred_state)
  y_pred_state = y_pred_state.reshape(xx.shape)
  cs.remove()
  cs = ax.contourf(xx, yy, y_pred_state, cmap=cmap, alpha=0.1)
  title.set_text(f"Iteration: {i}")
  return [title,]

ani = FuncAnimation(
  fig, update,
  # init_func=init,
  frames=list(enumerate(y_pred_gen, start=1)),
  # interval=100,
  interval=30,
  blit=True,
  repeat_delay=5000
)

ani.save("movie.mp4")

ani

# Combined plots

In [None]:
n_samples = 500
rng = np.random.RandomState(0)
X_reg = rng.randn(n_samples, 1)
noise = rng.normal(loc=0.0, scale=0.01, size=n_samples)
y_reg = 5 * X_reg[:, 0] + np.sin(10 * np.pi * X_reg[:, 0]) - noise

In [None]:
X_clf, y_clf = make_moons(noise=0.3, random_state=0)

x_min, x_max = X_clf[:, 0].min() - 1, X_clf[:, 0].max() + 1
y_min, y_max = X_clf[:, 1].min() - 1, X_clf[:, 1].max() + 1
plot_step = 0.1
xx, yy = np.meshgrid(
    np.arange(x_min, x_max, plot_step), np.arange(y_min, y_max, plot_step)
)

In [None]:
model_reg = HistGradientBoostingRegressor().fit(X_reg, y_reg)
model_clf = HistGradientBoostingClassifier().fit(X_clf, y_clf)

In [None]:
from itertools import zip_longest

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 5))


ax[0].plot(X_reg[:, 0], y_reg, "bo", ms=1)
points, = ax[0].plot([], [], "ro", ms=1)



cmap = plt.cm.RdYlBu
cmap_listed = plt.matplotlib.colors.ListedColormap(["r", "y", "b"])
y_pred_clf = model.predict(np.c_[xx.ravel(), yy.ravel()])
y_pred_clf = y_pred_clf.reshape(xx.shape)

cs = ax[1].contourf(xx, yy, y_pred, cmap=cmap)
ax[1].scatter(
    X_clf[:, 0],
    X_clf[:, 1],
    c=y_clf,
    cmap=cmap_listed,
    edgecolor="k",
    s=20,
)



y_pred_gen_reg = model_reg.staged_predict(X_reg)
y_pred_gen_clf = model_clf.staged_predict(np.c_[xx.ravel(), yy.ravel()])

title_reg = ax[0].set_title("Iteration: 0")
title_clf = ax[1].set_title("Iteration: 0")


def update(frame):
  global cs
  i, (y_pred_state_reg, y_pred_state_clf) = frame


  if y_pred_state_reg is not None:
    points.set_xdata(X_reg[:, 0])
    points.set_ydata(y_pred_state_reg)
    title_reg.set_text(f"Iteration: {i:2d}\nRegression")

  if y_pred_state_clf is not None:
    # cs.set_array(y_pred_state)
    y_pred_state_clf = y_pred_state_clf.reshape(xx.shape)
    cs.remove()
    cs = ax[1].contourf(xx, yy, y_pred_state_clf, cmap=cmap, alpha=0.1)
    title_clf.set_text(f"Iteration: {i:2d}\nClassification")
  return [points, title,]

ani = FuncAnimation(
  fig, update,
  # init_func=init,
  frames=list(enumerate(zip_longest(y_pred_gen_reg, y_pred_gen_clf), start=1)),
  # interval=100,
  interval=30,
  blit=True,
  repeat_delay=5000
)

ani.save("movie.mp4")

ani