In [None]:
# Copyright 2021 Google LLC
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.

# Author(s): Kevin P. Murphy (murphyk@gmail.com) and Mahmoud Soliman (mjs@aucegypt.edu)

<a href="https://opensource.org/licenses/MIT" target="_parent"><img src="https://img.shields.io/github/license/probml/pyprobml"/></a>

<a href="https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/figures//chapter18_figures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Figure 18.1:<a name='18.1'></a> <a name='regtree'></a> 


  A simple regression tree on two inputs. Adapted from Figure 9.2 of <a href='#HastieBook'>[HTF09]</a> .  
Figure(s) generated by [regtreeSurfaceDemo.m](https://github.com/probml/pmtk3/blob/master/demos/regtreeSurfaceDemo.m) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_image("/pyprobml/notebooks/figures/images/regtree.png")

In [None]:
pmlt.show_image("/pyprobml/notebooks/figures/images/regtreeSurfaceB.png")

## Figure 18.2:<a name='18.2'></a> <a name='dtreeClassif'></a> 


  (a) A set of shapes with corresponding binary labels. The features are: color (values ``blue'', ``red'', ``other''), shape (values ``ellipse'', ``other''), and size (real-valued). (b) A hypothetical classification tree fitted to this data. A leaf labeled as $(n_1,n_0)$ means that there are $n_1$ positive examples that fall into this partition, and $n_0$ negative examples. 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_image("/pyprobml/notebooks/figures/images/shapeClassificationTrain.png")

In [None]:
pmlt.show_image("/pyprobml/notebooks/figures/images/dtree.png")

## Figure 18.3:<a name='18.3'></a> <a name='dtreeUnstable'></a> 


  (a) A decision tree of depth 2 fit to the iris data, using just the petal length and petal width features. Leaf nodes are color coded according to the majority class. The number of training samples that pass from the root to each node is shown inside each box, as well as how many of these values fall into each class. This can be normalized to get a distribution over class labels for each node. (b) Decision surface induced by (a). (c) Fit to data where we omit a single data point (shown by red star). (d) Ensemble of the two models in (b) and (c).  
Figure(s) generated by [dree_sensitivity.py](https://github.com/probml/pyprobml/blob/master/scripts/dree_sensitivity.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/dree_sensitivity.py")

## Figure 18.4:<a name='18.4'></a> <a name='bagging'></a> 


  (a) A single decision tree. (b-c) Bagging ensemble of 10 and 50 trees. (d) Random forest of 50 trees. Adapted from Figure 7.5 of <a href='#Geron2019'>[Aur19]</a> .  
Figure(s) generated by [bagging_trees.py](https://github.com/probml/pyprobml/blob/master/scripts/bagging_trees.py) [rf_demo_2d.py](https://github.com/probml/pyprobml/blob/master/scripts/rf_demo_2d.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/bagging_trees.py")

In [None]:
pmlt.show_and_run("/pyprobml/scripts/rf_demo_2d.py")

## Figure 18.5:<a name='18.5'></a> <a name='spamCompare'></a> 


  Preditive accuracy vs size of tree ensemble for bagging, random forests and gradient boosting with log loss. Adapted from Figure 15.1 of <a href='#HastieBook'>[HTF09]</a> .  
Figure(s) generated by [spam_tree_ensemble_compare.py](https://github.com/probml/pyprobml/blob/master/scripts/spam_tree_ensemble_compare.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/spam_tree_ensemble_compare.py")

## Figure 18.6:<a name='18.6'></a> <a name='boostedRegrTrees'></a> 


  Illustration of boosting using a regression tree of depth 2 applied to a 1d dataset. Adapted from Figure 7.9 of <a href='#Geron2019'>[Aur19]</a> .  
Figure(s) generated by [boosted_regr_trees.py](https://github.com/probml/pyprobml/blob/master/scripts/boosted_regr_trees.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/boosted_regr_trees.py")

## Figure 18.7:<a name='18.7'></a> <a name='expLoss'></a> 


  Illustration of various loss functions for binary classification. The horizontal axis is the margin $m(\mathbf  x ) = \cc@accent  "707E  y F(\mathbf  x )$, the vertical axis is the loss. The log loss uses log base 2.  
Figure(s) generated by [hingeLossPlot.m](https://github.com/probml/pmtk3/blob/master/demos/hingeLossPlot.m) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_image("/pyprobml/notebooks/figures/images/expLoss.png")

## Figure 18.8:<a name='18.8'></a> <a name='rfFeatureImportanceMnist'></a> 


  Feature importance of a random forest classifier trained to distinguish MNIST digits from classes 0 and 8. Adapted from Figure 7.6 of <a href='#Geron2019'>[Aur19]</a> .  
Figure(s) generated by [rf_feature_importance_mnist.py](https://github.com/probml/pyprobml/blob/master/scripts/rf_feature_importance_mnist.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/rf_feature_importance_mnist.py")

## Figure 18.9:<a name='18.9'></a> <a name='spamPartialJoint'></a> 


  (a) Relative feature importance for the spam classification problem. Adapted from Figure 10.6 of <a href='#HastieBook'>[HTF09]</a> . (b) Partial dependence of log-odds of the spam class for 4 important predictors. The red ticks at the base of the plot are deciles of the empirical distribution for this feature. (c) Joint partial dependence of log-odds on the features hp and !. Adapted from Figure 10.6--10.8 of <a href='#HastieBook'>[HTF09]</a> .  
Figure(s) generated by [spam_tree_ensemble_interpret.py](https://github.com/probml/pyprobml/blob/master/scripts/spam_tree_ensemble_interpret.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/spam_tree_ensemble_interpret.py")

## References:
 <a name='Geron2019'>[Aur19]</a> G. Aur'elien "Hands-On Machine Learning with Scikit-Learn and TensorFlow: Concepts, Tools, and Techniques for BuildingIntelligent Systems (2nd edition)". (2019). 

<a name='HastieBook'>[HTF09]</a> T. Hastie, R. Tibshirani and J. Friedman. "The Elements of Statistical Learning". (2009). 

