In [None]:
# Copyright 2021 Google LLC
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.
# Notebook authors: Kevin P. Murphy (murphyk@gmail.com)
# and Mahmoud Soliman (mjs@aucegypt.edu)

# This notebook reproduces figures for chapter 4 from the book
# "Probabilistic Machine Learning: An Introduction"
# by Kevin Murphy (MIT Press, 2021).
# Book pdf is available from http://probml.ai

<a href="https://opensource.org/licenses/MIT" target="_parent"><img src="https://img.shields.io/github/license/probml/pyprobml"/></a>

<a href="https://colab.research.google.com/github/probml/pml-book/blob/main/pml1/figure_notebooks/chapter4_statistics_figures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Figure 4.1:<a name='4.1'></a> <a name='irisCorr'></a> 


  (a) Covariance matrix for the features in the iris dataset from \cref  sec:iris . (b) Correlation matrix. We only show the lower triangle, since the matrix is symmetric and has a unit diagonal. Compare this to \cref  fig:irisPairs .  
Figure(s) generated by [iris_cov_mat.py](https://github.com/probml/pyprobml/blob/master/scripts/iris_cov_mat.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run iris_cov_mat.py

## Figure 4.2:<a name='4.2'></a> <a name='hingeLoss'></a> 


  Illustration of various loss functions for binary classification. The horizontal axis is the margin $z=\cc@accent  "707E  y \eta $, the vertical axis is the loss. The log loss uses log base 2.  
Figure(s) generated by [hinge_loss_plot.py](https://github.com/probml/pyprobml/blob/master/scripts/hinge_loss_plot.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run hinge_loss_plot.py

## Figure 4.3:<a name='4.3'></a> <a name='EMA'></a> 


  Illustration of exponentially-weighted moving average with and without bias correction. (a) Short memory: $\beta =0.9$. (b) Long memory: $\beta =0.99$.  
Figure(s) generated by [ema_demo.py](https://github.com/probml/pyprobml/blob/master/scripts/ema_demo.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run ema_demo.py

## Figure 4.4:<a name='4.4'></a> <a name='covshrinkDemo'></a> 


  Estimating a covariance matrix in $D=50$ dimensions using $N \in \ 100, 50, 25\ $ samples. We plot the eigenvalues in descending order for the true covariance matrix (solid black), the MLE (dotted blue) and the MAP estimate (dashed red), using \cref  eqn:covShrinkLedoit  with $\lambda =0.9$. We also list the condition number of each matrix in the legend. We see that the MLE is often poorly conditioned, but the MAP estimate is numerically well behaved. Adapted from Figure 1 of <a href='#Schafer05'>[SS05]</a> .  
Figure(s) generated by [shrinkcov_plots.py](https://github.com/probml/pyprobml/blob/master/scripts/shrinkcov_plots.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run shrinkcov_plots.py

## Figure 4.5:<a name='4.5'></a> <a name='polyfitRidge'></a> 


  (a-c) Ridge regression applied to a degree 14 polynomial fit to 21 datapoints. (d) MSE vs strength of regularizer. The degree of regularization increases from left to right, so model complexity decreases from left to right.  
Figure(s) generated by [linreg_poly_ridge.py](https://github.com/probml/pyprobml/blob/master/scripts/linreg_poly_ridge.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run linreg_poly_ridge.py

## Figure 4.6:<a name='4.6'></a> <a name='cv'></a> 


  Schematic of 5-fold cross validation. 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

<img src="https://github.com/probml/pml-book/raw/main/pml1/figures/Figure_4.6.png" width="256"/>

## Figure 4.7:<a name='4.7'></a> <a name='polyfitRidgeVsLambda'></a> 


  Ridge regression is applied to a degree 14 polynomial fit to 21 datapoints shown in \cref  fig:polyfitRidge  for different values of the regularizer $\lambda $. The degree of regularization increases from left to right, so model complexity decreases from left to right. (a) MSE on train (blue) and test (red) vs $\qopname o log (\lambda )$. (b) 5-fold cross-validation estimate of test MSE; error bars are standard error of the mean.  
Figure(s) generated by [polyfitRidgeCV.py](https://github.com/probml/pyprobml/blob/master/scripts/polyfitRidgeCV.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run polyfitRidgeCV.py

## Figure 4.8:<a name='4.8'></a> <a name='imdbperf'></a> 


  Performance of a text classifier (a neural network applied to a bag of word embeddings using average pooling) vs number of training epochs on the IMDB movie sentiment dataset. Blue = train, red = validation. (a) Cross entropy loss. Early stopping is triggered at about epoch 25. (b) Classification accuracy.  
Figure(s) generated by [imdb_mlp_bow_tf.py](https://github.com/probml/pyprobml/blob/master/scripts/imdb_mlp_bow_tf.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run imdb_mlp_bow_tf.py

## Figure 4.9:<a name='4.9'></a> <a name='polyfitN'></a> 


  MSE on training and test sets vs size of training set, for data generated from a degree 2 polynomial with Gaussian noise of variance $\sigma ^2=4$. We fit polynomial models of varying degree to this data.  
Figure(s) generated by [linreg_poly_vs_n.py](https://github.com/probml/pyprobml/blob/master/scripts/linreg_poly_vs_n.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run linreg_poly_vs_n.py

## Figure 4.10:<a name='4.10'></a> <a name='betaPost1'></a> 


  Updating a Beta prior with a Bernoulli likelihood with sufficient statistics $N_1=4,N_0=1$. (a) Beta(2,2) prior. (b) Uniform Beta(1,1) prior.  
Figure(s) generated by [beta_binom_post_plot.py](https://github.com/probml/pyprobml/blob/master/scripts/beta_binom_post_plot.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run beta_binom_post_plot.py

## Figure 4.11:<a name='4.11'></a> <a name='sequentialCoinToss'></a> 


  Illustration of sequential Bayesian updating for the beta-Bernoulli model. Each colored box represents the predicted distribution $p(x_t|\mathbf  h _t)$, where $\mathbf  h _t=(N_ 1,t ,N_ 0,t )$ is the sufficient statistic derived from history of observations up until time $t$, namely the total number of heads and tails. The probability of heads (blue bar) is given by $p(x_t=1|\mathbf  h _t) = (N_ t,1  + 1)/(t+2)$, assuming we start with a uniform $\mathrm  Beta (\theta |1,1)$ prior. From Figure 3 of <a href='#Ortega2019'>[Ped+19]</a> . Used with kind permission of Pedro Ortega. 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

<img src="https://github.com/probml/pml-book/raw/main/pml1/figures/Figure_4.11.png" width="256"/>

## Figure 4.12:<a name='4.12'></a> <a name='BBpostpred'></a> 


  (a) Posterior predictive distributions for 10 future trials after seeing $N_1=4$ heads and $N_0=1$ tails. (b) Plug-in approximation based on the same data. In both cases, we use a uniform prior.  
Figure(s) generated by [beta_binom_post_pred_plot.py](https://github.com/probml/pyprobml/blob/master/scripts/beta_binom_post_pred_plot.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run beta_binom_post_pred_plot.py

## Figure 4.13:<a name='4.13'></a> <a name='mixBeta'></a> 


  A mixture of two Beta distributions.  
Figure(s) generated by [mixbetademo.py](https://github.com/probml/pyprobml/blob/master/scripts/mixbetademo.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run mixbetademo.py

## Figure 4.14:<a name='4.14'></a> <a name='dirichlet'></a> 


  (a) The Dirichlet distribution when $K=3$ defines a distribution over the simplex, which can be represented by the triangular surface. Points on this surface satisfy $0 \leq \theta _k \leq 1$ and $\DOTSB \sum@ \slimits@ _ k=1 ^3 \theta _k = 1$.  
Figure(s) generated by [dirichlet_3d_triangle_plot.py](https://github.com/probml/pyprobml/blob/master/scripts/dirichlet_3d_triangle_plot.py) [dirichlet_3d_spiky_plot.py](https://github.com/probml/pyprobml/blob/master/scripts/dirichlet_3d_spiky_plot.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run dirichlet_3d_triangle_plot.py

In [None]:
%run dirichlet_3d_spiky_plot.py

## Figure 4.15:<a name='4.15'></a> <a name='dirichletSamples'></a> 


  Samples from a 5-dimensional symmetric Dirichlet distribution for different parameter values. (a) $\oset  \smallsmile   \boldsymbol  \alpha    = (0.1,\ldots ,0.1)$. This results in very sparse distributions, with many 0s. (b) $\oset  \smallsmile   \boldsymbol  \alpha    = (1,\ldots ,1)$. This results in more uniform (and dense) distributions.  
Figure(s) generated by [dirichlet_samples_plot.py](https://github.com/probml/pyprobml/blob/master/scripts/dirichlet_samples_plot.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run dirichlet_samples_plot.py

## Figure 4.16:<a name='4.16'></a> <a name='gaussInferParamsMean1d'></a> 


  Inferring the mean of a univariate Gaussian with known $\sigma ^2$. (a) Using strong prior, $p(\mu ) = \mathcal  N (\mu |0,1)$. (b) Using weak prior, $p(\mu ) = \mathcal  N (\mu |0,5)$.  
Figure(s) generated by [gauss_infer_1d.py](https://github.com/probml/pyprobml/blob/master/scripts/gauss_infer_1d.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run gauss_infer_1d.py

## Figure 4.17:<a name='4.17'></a> <a name='gauss2dupdate2'></a> 


  Illustration of Bayesian inference for the mean of a 2d Gaussian. (a) The data is generated from $\mathbf  y _n \sim \mathcal  N (\boldsymbol  \mu  ,\boldsymbol  \Sigma  )$, where $\boldsymbol  \mu  =[0.5, 0.5]^  \mkern -1.5mu\mathsf  T   $ and $\boldsymbol  \Sigma  =0.1 [2, 1; 1, 1])$. (b) The prior is $p(\boldsymbol  \mu  ) = \mathcal  N (\boldsymbol  \mu  |\boldsymbol  0 ,0.1 \mathbf  I _2)$. (c) We show the posterior after 10 data points have been observed.  
Figure(s) generated by [gaussInferParamsMean2d.py](https://github.com/probml/pyprobml/blob/master/scripts/gaussInferParamsMean2d.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run gaussInferParamsMean2d.py

## Figure 4.18:<a name='4.18'></a> <a name='betaCI'></a> 


  (a) Central interval and (b) HPD region for a Beta(3,9) posterior. The CI is (0.06, 0.52) and the HPD is (0.04, 0.48). Adapted from Figure 3.6 of <a href='#Hoff09'>[Hof09]</a> .  
Figure(s) generated by [betaHPD.py](https://github.com/probml/pyprobml/blob/master/scripts/betaHPD.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run betaHPD.py

## Figure 4.19:<a name='4.19'></a> <a name='CIvsHPD'></a> 


  (a) Central interval and (b) HPD region for a hypothetical multimodal posterior. Adapted from Figure 2.2 of <a href='#Gelman04'>[Gel+04]</a> .  
Figure(s) generated by [postDensityIntervals.py](https://github.com/probml/pyprobml/blob/master/scripts/postDensityIntervals.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run postDensityIntervals.py

## Figure 4.20:<a name='4.20'></a> <a name='irisSepal'></a> 


  (a) Logistic regression for classifying if an Iris flower is versicolor ($y=1$) or setosa ($y=0$) using a single input feature $x$ corresponding to sepal length. Labeled points have been (vertically) jittered to avoid overlapping too much. Vertical line is the decision boundary.  
Figure(s) generated by [logreg_iris_1d.py](https://github.com/probml/pyprobml/blob/master/scripts/logreg_iris_1d.py) [logreg_iris_bayes_1d_pymc3.py](https://github.com/probml/pyprobml/blob/master/scripts/logreg_iris_bayes_1d_pymc3.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run logreg_iris_1d.py

In [None]:
%run logreg_iris_bayes_1d_pymc3.py

## Figure 4.21:<a name='4.21'></a> <a name='uncertaintyArrivalTime'></a> 


  Distribution of arrival times for two different shipping companies. ETA is the expected time of arrival. A's distribution has greater uncertainty, and may be too risky. From   https://bit.ly/39bc4XL . Used with kind permission of Brendan Hasz. 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

<img src="https://github.com/probml/pml-book/raw/main/pml1/figures/Figure_4.21.png" width="256"/>

## Figure 4.22:<a name='4.22'></a> <a name='bbPost'></a> 


  Approximating the posterior of a beta-Bernoulli model. (a) Grid approximation using 20 grid points. (b) Laplace approximation.  
Figure(s) generated by [beta_binom_approx_post_pymc3.py](https://github.com/probml/pyprobml/blob/master/scripts/beta_binom_approx_post_pymc3.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run beta_binom_approx_post_pymc3.py

## Figure 4.23:<a name='4.23'></a> <a name='bootstrapDemoBer'></a> 


  Bootstrap (top row) vs Bayes (bottom row). The $N$ data cases were generated from $\mathrm  Ber (\theta =0.7)$. Left column: $N=10$. Right column: $N=100$. (a-b) A bootstrap approximation to the sampling distribution of the MLE for a Bernoulli distribution. We show the histogram derived from $B=10,000$ bootstrap samples. (c-d) Histogram of 10,000 samples from the posterior distribution using a uniform prior.  
Figure(s) generated by [bootstrapDemoBer.py](https://github.com/probml/pyprobml/blob/master/scripts/bootstrapDemoBer.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run bootstrapDemoBer.py

## Figure 4.24:<a name='4.24'></a> <a name='samplingDistGaussShrinkage'></a> 


  Left: Sampling distribution of the MAP estimate (equivalent to the posterior mean) under a $\mathcal  N (\theta _0=0,\sigma ^2/\kappa _0)$ prior with different prior strengths $\kappa _0$. (If we set $\kappa =0$, the MAP estimate reduces to the MLE.) The data is $n=5$ samples drawn from $\mathcal  N (\theta ^*=1,\sigma ^2=1)$. Right: MSE relative to that of the MLE versus sample size. Adapted from Figure 5.6 of <a href='#Hoff09'>[Hof09]</a> .  
Figure(s) generated by [samplingDistributionGaussianShrinkage.py](https://github.com/probml/pyprobml/blob/master/scripts/samplingDistributionGaussianShrinkage.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run samplingDistributionGaussianShrinkage.py

## Figure 4.25:<a name='4.25'></a> <a name='biasVarianceLinReg'></a> 


  Illustration of bias-variance tradeoff for ridge regression. We generate 100 data sets from the true function, shown in solid green. Left: we plot the regularized fit for 20 different data sets. We use linear regression with a Gaussian RBF expansion, with 25 centers evenly spread over the $[0,1]$ interval. Right: we plot the average of the fits, averaged over all 100 datasets. Top row: strongly regularized: we see that the individual fits are similar to each other (low variance), but the average is far from the truth (high bias). Bottom row: lightly regularized: we see that the individual fits are quite different from each other (high variance), but the average is close to the truth (low bias). Adapted from <a href='#BishopBook'>[Bis06]</a>  Figure 3.5.  
Figure(s) generated by [biasVarModelComplexity3.py](https://github.com/probml/pyprobml/blob/master/scripts/biasVarModelComplexity3.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

In [None]:
%run biasVarModelComplexity3.py

## Figure 4.26:<a name='4.26'></a> <a name='biasVarianceCartoon'></a> 


  Cartoon illustration of the bias variance tradeoff. From   http://scott.fortmann-roe.com/docs/BiasVariance.html . Used with kind permission of Scott Fortmann-Roe. 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone https://github.com/probml/pyprobml /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  import pyprobml_utils as pml
  import colab_utils
  import os
  os.environ["PYPROBML"] = ".." # one above current scripts directory
  import google.colab 
  from google.colab.patches import cv2_imshow
  %reload_ext autoreload 
  %autoreload 2
  def show_image(img_path,size=None,ratio=None):
      img = colab_utils.image_resize(img_path, size)
      cv2_imshow(img)
  print('finished!')

<img src="https://github.com/probml/pml-book/raw/main/pml1/figures/Figure_4.26.png" width="256"/>

## References:
 <a name='BishopBook'>[Bis06]</a> C. Bishop "Pattern recognition and machine learning". (2006). 

<a name='Gelman04'>[Gel+04]</a> A. Gelman, J. Carlin, H. Stern and D. Rubin. "Bayesian data analysis". (2004). 

<a name='Hoff09'>[Hof09]</a> P. Hoff "A First Course in Bayesian Statistical Methods". (2009). 

<a name='Ortega2019'>[Ped+19]</a> O. PedroA, W. JaneX, R. Mark, G. Tim, K. Zeb, P. Razvan, H. Nicolas, V. Joel, P. Alex, S. Pablo, J. SiddhantM, M. T. Kevin, A. Mohammad, O. Ian, R. Neil, G. Andr'as, C. ChiappaSilvia, O. Simon, T. YeeWhye, v. Hado, d. Nando and B. M. Shane. "Meta-learning of Sequential Strategies". abs/1905.03030 (2019). arXiv: 1905.03030 

<a name='Schafer05'>[SS05]</a> J. Schaefer and K. Strimmer. "A shrinkage approach to large-scale covariance matrix estimation and implications for functional genomics". In: Statist. Appl. Genet. Mol. Biol (2005). 

