In [None]:
# Copyright 2021 Google LLC
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.

# Author(s): Kevin P. Murphy (murphyk@gmail.com) and Mahmoud Soliman (mjs@aucegypt.edu)

<a href="https://opensource.org/licenses/MIT" target="_parent"><img src="https://img.shields.io/github/license/probml/pyprobml"/></a>

<a href="https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/figures//chapter4_figures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Figure 4.1:<a name='4.1'></a> <a name='irisCorr'></a> 


  (a) Covariance matrix for the features in the iris dataset from \cref  sec:iris . (b) Correlation matrix. We only show the lower triangle, since the matrix is symmetric and has a unit diagonal. Compare this to \cref  fig:irisPairs .  
Figure(s) generated by [iris_cov_mat.py](https://github.com/probml/pyprobml/blob/master/scripts/iris_cov_mat.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/iris_cov_mat.py")

## Figure 4.2:<a name='4.2'></a> <a name='hingeLoss'></a> 


  Illustration of various loss functions for binary classification. The horizontal axis is the margin $z=\cc@accent  "707E  y \eta $, the vertical axis is the loss. The log loss uses log base 2.  
Figure(s) generated by [hinge_loss_plot.py](https://github.com/probml/pyprobml/blob/master/scripts/hinge_loss_plot.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/hinge_loss_plot.py")

## Figure 4.3:<a name='4.3'></a> <a name='covshrinkDemo'></a> 


  Estimating a covariance matrix in $D=50$ dimensions using $N \in \ 100, 50, 25\ $ samples. We plot the eigenvalues in descending order for the true covariance matrix (solid black), the MLE (dotted blue) and the MAP estimate (dashed red), using \cref  eqn:covShrinkLedoit  with $\lambda =0.9$. We also list the condition number of each matrix in the legend. We see that the MLE is often poorly conditioned, but the MAP estimate is numerically well behaved. Adapted from Figure 1 of <a href='#Schafer05'>[SS05]</a> .  
Figure(s) generated by [shrinkcov_plots.py](https://github.com/probml/pyprobml/blob/master/scripts/shrinkcov_plots.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/shrinkcov_plots.py")

## Figure 4.4:<a name='4.4'></a> <a name='polyfitRidge'></a> 


  (a-c) Ridge regression applied to a degree 14 polynomial fit to 21 datapoints. (d) MSE vs strength of regularizer. The degree of regularization increases from left to right, so model complexity decreases from left to right.  
Figure(s) generated by [linreg_poly_ridge.py](https://github.com/probml/pyprobml/blob/master/scripts/linreg_poly_ridge.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/linreg_poly_ridge.py")

## Figure 4.5:<a name='4.5'></a> <a name='cv'></a> 


  Schematic of 5-fold cross validation. 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_image("/pyprobml/notebooks/figures/images/kfold.png")

## Figure 4.6:<a name='4.6'></a> <a name='polyfitRidgeVsLambda'></a> 


  Ridge regression is applied to a degree 14 polynomial fit to 21 datapoints shown in \cref  fig:polyfitRidge  for different values of the regularizer $\lambda $. The degree of regularization increases from left to right, so model complexity decreases from left to right. (a) MSE on train (blue) and test (red) vs $\qopname o log (\lambda )$. (b) 5-fold cross-validation estimate of test MSE in red; error bars are standard error of the mean. In black we plot the negative log evidence $-\qopname o log p( \mathcal  D  |\lambda )$. Both curves are scaled to lie in $[0,1]$.  
Figure(s) generated by [polyfitRidgeModelSel.m](https://github.com/probml/pmtk3/blob/master/demos/polyfitRidgeModelSel.m) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_image("/pyprobml/notebooks/figures/images/polyfitVsRidge.png")

In [None]:
pmlt.show_image("/pyprobml/notebooks/figures/images/linregPolyVsRegCvEvidence2.png")

## Figure 4.7:<a name='4.7'></a> <a name='imdbperf'></a> 


  Performance of a text classifier (a neural network applied to a bag of word embeddings using average pooling) vs number of training epochs on the IMDB movie sentiment dataset. Blue = train, red = validation. (a) Cross entropy loss. Early stopping is triggered at about epoch 25. (b) Classification accuracy.  
Figure(s) generated by [imdb_mlp_bow_tf.py](https://github.com/probml/pyprobml/blob/master/scripts/imdb_mlp_bow_tf.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/imdb_mlp_bow_tf.py")

## Figure 4.8:<a name='4.8'></a> <a name='polyfitN'></a> 


  MSE on training and test sets vs size of training set, for data generated from a degree 2 polynomial with Gaussian noise of variance $\sigma ^2=4$. We fit polynomial models of varying degree to this data.  
Figure(s) generated by [linreg_poly_vs_n.py](https://github.com/probml/pyprobml/blob/master/scripts/linreg_poly_vs_n.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/linreg_poly_vs_n.py")

## Figure 4.9:<a name='4.9'></a> <a name='EMA'></a> 


  Illustration of exponentially-weighted moving average with and without bias correction. (a) Short memory: $\beta =0.9$. (a) Long memory: $\beta =0.99$.  
Figure(s) generated by [ema_demo.py](https://github.com/probml/pyprobml/blob/master/scripts/ema_demo.py) 

In [None]:
#@title Setup
%%time
# If you run this for the first time it would take ~25/30 seconds
!git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null && git clone https://github.com/Sekhen/colab_powertoys.git &> /dev/null 
!pip3 install nbimporter -qqq 
%cd -q /content/colab_powertoys 
from colab_powertoys.probml_toys import probml_toys as pmlt
%cd -q /content/


In [None]:
pmlt.show_and_run("/pyprobml/scripts/ema_demo.py")

## References:
 <a name='Schafer05'>[SS05]</a> J. Schaefer and K. Strimmer. "A shrinkage approach to large-scale covariance matrix estimation and implications for functional genomics". In: Statist. Appl. Genet. Mol. Biol (2005). 

