# **Chapter 6 – Decision Trees**

_This notebook contains all the sample code and solutions to the exercises in chapter 6._

# Setup

First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed, as well as Scikit-Learn ≥0.20.

In [1]:

'''
# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
'''

# Where to save the figures

'''
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)
'''

'\ndef save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):\n    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)\n    print("Saving figure", fig_id)\n    if tight_layout:\n        plt.tight_layout()\n    plt.savefig(path, format=fig_extension, dpi=resolution)\n'

# Training and visualizing

In [2]:
'''
from matplotlib.colors import ListedColormap

def plot_decision_boundary(clf, X, y, axes=[0, 7.5, 0, 3], iris=True, legend=False, plot_training=True):
    x1s = np.linspace(axes[0], axes[1], 100)
    x2s = np.linspace(axes[2], axes[3], 100)
    x1, x2 = np.meshgrid(x1s, x2s)


    
    X_new = np.c_[x1.ravel(), x2.ravel()] 
    

    

    
    y_pred = clf.predict(X_new).reshape(x1.shape)
    

    

    
    # pintamos las zonas y fronteras
    
    custom_cmap = ListedColormap(['#fafab0','#9898ff'])
    plt.contourf(x1, x2, y_pred, alpha=0.3, cmap=custom_cmap)
    if not iris:
        custom_cmap2 = ListedColormap(['#7d7d58','#4c4c7f','#507d50'])
        plt.contour(x1, x2, y_pred, cmap=custom_cmap2, alpha=0.8)
    if plot_training:
        plt.plot(X[:, 0][y==0], X[:, 1][y==0], "yo", label="Iris setosa")
        plt.plot(X[:, 0][y==1], X[:, 1][y==1], "gs", label="Iris versicolor")
        plt.plot(X[:, 0][y==2], X[:, 1][y==2], "b^", label="Iris virginica")
        plt.axis(axes)
    if iris:
        plt.xlabel("Petal length", fontsize=14)
        plt.ylabel("Petal width", fontsize=14)
    else:
        plt.xlabel(r"$x_1$", fontsize=18)
        plt.ylabel(r"$x_2$", fontsize=18, rotation=0)
    if legend:
        plt.legend(loc="lower right", fontsize=14)

plt.figure(figsize=(8, 4))
plot_decision_boundary(tree_clf, X, y)
plt.plot([2.45, 2.45], [0, 3], "k-", linewidth=2)
plt.plot([2.45, 7.5], [1.75, 1.75], "k--", linewidth=2)
plt.plot([4.95, 4.95], [0, 1.75], "k:", linewidth=2)
plt.plot([4.85, 4.85], [1.75, 3], "k:", linewidth=2)
plt.text(1.40, 1.0, "Depth=0", fontsize=15)
plt.text(3.2, 1.80, "Depth=1", fontsize=13)
plt.text(4.05, 0.5, "(Depth=2)", fontsize=11)

save_fig("decision_tree_decision_boundaries_plot")
plt.show();
'''

'\nfrom matplotlib.colors import ListedColormap\n\ndef plot_decision_boundary(clf, X, y, axes=[0, 7.5, 0, 3], iris=True, legend=False, plot_training=True):\n    x1s = np.linspace(axes[0], axes[1], 100)\n    x2s = np.linspace(axes[2], axes[3], 100)\n    x1, x2 = np.meshgrid(x1s, x2s)\n\n\n    \n    X_new = np.c_[x1.ravel(), x2.ravel()] \n    \n\n    \n\n    \n    y_pred = clf.predict(X_new).reshape(x1.shape)\n    \n\n    \n\n    \n    # pintamos las zonas y fronteras\n    \n    custom_cmap = ListedColormap([\'#fafab0\',\'#9898ff\'])\n    plt.contourf(x1, x2, y_pred, alpha=0.3, cmap=custom_cmap)\n    if not iris:\n        custom_cmap2 = ListedColormap([\'#7d7d58\',\'#4c4c7f\',\'#507d50\'])\n        plt.contour(x1, x2, y_pred, cmap=custom_cmap2, alpha=0.8)\n    if plot_training:\n        plt.plot(X[:, 0][y==0], X[:, 1][y==0], "yo", label="Iris setosa")\n        plt.plot(X[:, 0][y==1], X[:, 1][y==1], "gs", label="Iris versicolor")\n        plt.plot(X[:, 0][y==2], X[:, 1][y==2], "b^", label

# Predicting classes and class probabilities

# Sensitivity to training set details
The main issue with Decision Trees is that they are very sensitive to small variations in the training data. For example, if you just remove the widest Iris versicolor from the iris training set (the one with petals 4.8 cm long and 1.8 cm wide) and train a new Decision Tree, you may get the model represented before. As you can see, it looks very different from the previous Decision Tree.

On the left the Decision Tree is trained with the default hyperparameters (i.e., no restrictions), and on the right it’s trained with min_samples_leaf=4. It is quite obvious that the model on the left is overfitting, and the model on the right will probably generalize better.

In [3]:
'''
angle = np.pi / 180 * 20
rotation_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])
Xr = X.dot(rotation_matrix)
'''


'\nangle = np.pi / 180 * 20\nrotation_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])\nXr = X.dot(rotation_matrix)\n'

In [4]:
'''
np.random.seed(6)
Xs = np.random.rand(100, 2) - 0.5
ys = (Xs[:, 0] > 0).astype(np.float32) * 2

angle = np.pi / 4
rotation_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])
Xsr = Xs.dot(rotation_matrix)
'''



'\nnp.random.seed(6)\nXs = np.random.rand(100, 2) - 0.5\nys = (Xs[:, 0] > 0).astype(np.float32) * 2\n\nangle = np.pi / 4\nrotation_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])\nXsr = Xs.dot(rotation_matrix)\n'

# Regression trees

In [40]:
# Quadratic training set + noise


DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,
                      max_leaf_nodes=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      presort=False, random_state=42, splitter='best')

In [5]:

'''
def plot_regression_predictions(tree_reg, X, y, axes=[0, 1, -0.2, 1], ylabel="$y$"):
    x1 = np.linspace(axes[0], axes[1], 500).reshape(-1, 1)
    y_pred = tree_reg.predict(x1)
    plt.axis(axes)
    plt.xlabel("$x_1$", fontsize=18)
    if ylabel:
        plt.ylabel(ylabel, fontsize=18, rotation=0)
    plt.plot(X, y, "b.")
    plt.plot(x1, y_pred, "r.-", linewidth=2, label=r"$\hat{y}$")

plt.figure(figsize=(11, 4))
plt.subplot(121)
plot_regression_predictions(tree_reg1, X, y)
for split, style in ((0.1973, "k-"), (0.0917, "k--"), (0.7718, "k--")):
    plt.plot([split, split], [-0.2, 1], style, linewidth=2)
plt.text(0.21, 0.65, "Depth=0", fontsize=15)
plt.text(0.01, 0.2, "Depth=1", fontsize=13)
plt.text(0.65, 0.8, "Depth=1", fontsize=13)
plt.legend(loc="upper center", fontsize=18)
plt.title("max_depth=2", fontsize=14)

plt.subplot(122)
plot_regression_predictions(tree_reg2, X, y, ylabel=None)
for split, style in ((0.1973, "k-"), (0.0917, "k--"), (0.7718, "k--")):
    plt.plot([split, split], [-0.2, 1], style, linewidth=2)
for split in (0.0458, 0.1298, 0.2873, 0.9040):
    plt.plot([split, split], [-0.2, 1], "k:", linewidth=1)
plt.text(0.3, 0.5, "Depth=2", fontsize=13)
plt.title("max_depth=3", fontsize=14)

save_fig("tree_regression_plot")
plt.show()
'''

'\ndef plot_regression_predictions(tree_reg, X, y, axes=[0, 1, -0.2, 1], ylabel="$y$"):\n    x1 = np.linspace(axes[0], axes[1], 500).reshape(-1, 1)\n    y_pred = tree_reg.predict(x1)\n    plt.axis(axes)\n    plt.xlabel("$x_1$", fontsize=18)\n    if ylabel:\n        plt.ylabel(ylabel, fontsize=18, rotation=0)\n    plt.plot(X, y, "b.")\n    plt.plot(x1, y_pred, "r.-", linewidth=2, label=r"$\\hat{y}$")\n\nplt.figure(figsize=(11, 4))\nplt.subplot(121)\nplot_regression_predictions(tree_reg1, X, y)\nfor split, style in ((0.1973, "k-"), (0.0917, "k--"), (0.7718, "k--")):\n    plt.plot([split, split], [-0.2, 1], style, linewidth=2)\nplt.text(0.21, 0.65, "Depth=0", fontsize=15)\nplt.text(0.01, 0.2, "Depth=1", fontsize=13)\nplt.text(0.65, 0.8, "Depth=1", fontsize=13)\nplt.legend(loc="upper center", fontsize=18)\nplt.title("max_depth=2", fontsize=14)\n\nplt.subplot(122)\nplot_regression_predictions(tree_reg2, X, y, ylabel=None)\nfor split, style in ((0.1973, "k-"), (0.0917, "k--"), (0.7718, "k--"

In [6]:

'''
plt.figure(figsize=(11, 4))

plt.subplot(121)
plt.plot(X, y, "b.")
plt.plot(x1, y_pred1, "r.-", linewidth=2, label=r"$\hat{y}$")
plt.axis([0, 1, -0.2, 1.1])
plt.xlabel("$x_1$", fontsize=18)
plt.ylabel("$y$", fontsize=18, rotation=0)
plt.legend(loc="upper center", fontsize=18)
plt.title("No restrictions", fontsize=14)

plt.subplot(122)
plt.plot(X, y, "b.")
plt.plot(x1, y_pred2, "r.-", linewidth=2, label=r"$\hat{y}$")
plt.axis([0, 1, -0.2, 1.1])
plt.xlabel("$x_1$", fontsize=18)
plt.title("min_samples_leaf={}".format(tree_reg2.min_samples_leaf), fontsize=14)

save_fig("tree_regression_regularization_plot")
plt.show()
'''

'\nplt.figure(figsize=(11, 4))\n\nplt.subplot(121)\nplt.plot(X, y, "b.")\nplt.plot(x1, y_pred1, "r.-", linewidth=2, label=r"$\\hat{y}$")\nplt.axis([0, 1, -0.2, 1.1])\nplt.xlabel("$x_1$", fontsize=18)\nplt.ylabel("$y$", fontsize=18, rotation=0)\nplt.legend(loc="upper center", fontsize=18)\nplt.title("No restrictions", fontsize=14)\n\nplt.subplot(122)\nplt.plot(X, y, "b.")\nplt.plot(x1, y_pred2, "r.-", linewidth=2, label=r"$\\hat{y}$")\nplt.axis([0, 1, -0.2, 1.1])\nplt.xlabel("$x_1$", fontsize=18)\nplt.title("min_samples_leaf={}".format(tree_reg2.min_samples_leaf), fontsize=14)\n\nsave_fig("tree_regression_regularization_plot")\nplt.show()\n'