In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import xarray

np.random.seed(1)
### Bokeh Libraries 
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import Span,Range1d,Label,LabelSet
output_notebook()

### Figure 4.1 simulations

In [2]:
N = 500
pheads = 0.5
flipsequence= stats.bernoulli.rvs(p=pheads,loc=0,size=N)
runProp=np.cumsum(flipsequence)/np.arange(1,501,1)
p = figure(plot_width=600, plot_height=400, toolbar_location=None,tools="",title = 'Running proportion of heads')
p.line(np.arange(1,501,1),runProp ,line_width=1)
p.xaxis.axis_label = "Flip Number"
p.yaxis.axis_label = "Proportions of heads"
p.y_range = Range1d(0, 1)
hline = Span(location=pheads, dimension='width', line_color='blue')
p.add_layout(hline)
show(p)

In [3]:
## Creating the data set for Haireyecolor
_name = 'HairEyeColor'
raw_data = np.array([32, 53, 10, 3, 11, 50, 10, 30, 10, 25, 7, 5, 3, 15, 7, 8,
                 36, 66, 16, 4,  9, 34,  7, 64,  5, 29, 7, 5, 2, 14, 7, 8])

_dim = (4, 4, 2)
raw_data = raw_data.reshape(_dim[::-1])

_dims = ['Hair', 'Eye', 'Sex']
_coords = [['Black', 'Brown', 'Red', 'Blond'], 
           ['Brown', 'Blue', 'Hazel', 'Green'],
           ['Male', 'Female']]

HairEyeColor_data = xarray.DataArray(
    raw_data, dims=_dims[::-1],
    coords=_coords[::-1], name=_name
)


### Exercise 4.1

In [4]:
## using xarray library
print(HairEyeColor_data)
print('--------------------------------------------')
EyeHairFreq = HairEyeColor_data.sum(dim = 'Sex')
EyeHairProp = EyeHairFreq / EyeHairFreq.sum() # joint proportions,Table 4.1
print(np.around(EyeHairProp,2))
print('--------------------------------------------')
HairFreq = HairEyeColor_data.sum(dim= ['Sex','Eye'])
HairProp = HairFreq/HairFreq.sum()
print(np.around(HairProp,2))
print('--------------------------------------------')
EyeFreq = HairEyeColor_data.sum(dim= ['Hair','Sex'])
EyeProp = EyeFreq/EyeFreq.sum()
print(np.around(EyeProp,2)) # marginal proportions,Table 4.1
print('--------------------------------------------')
print(EyeHairProp.sel(Eye='Blue')/EyeProp.sel(Eye= 'Blue')) # conditional prob,Table 4.2

<xarray.DataArray 'HairEyeColor' (Sex: 2, Eye: 4, Hair: 4)>
array([[[32, 53, 10,  3],
        [11, 50, 10, 30],
        [10, 25,  7,  5],
        [ 3, 15,  7,  8]],

       [[36, 66, 16,  4],
        [ 9, 34,  7, 64],
        [ 5, 29,  7,  5],
        [ 2, 14,  7,  8]]])
Coordinates:
  * Sex      (Sex) <U6 'Male' 'Female'
  * Eye      (Eye) <U5 'Brown' 'Blue' 'Hazel' 'Green'
  * Hair     (Hair) <U5 'Black' 'Brown' 'Red' 'Blond'
--------------------------------------------
<xarray.DataArray 'HairEyeColor' (Eye: 4, Hair: 4)>
array([[0.11, 0.2 , 0.04, 0.01],
       [0.03, 0.14, 0.03, 0.16],
       [0.03, 0.09, 0.02, 0.02],
       [0.01, 0.05, 0.02, 0.03]])
Coordinates:
  * Eye      (Eye) <U5 'Brown' 'Blue' 'Hazel' 'Green'
  * Hair     (Hair) <U5 'Black' 'Brown' 'Red' 'Blond'
--------------------------------------------
<xarray.DataArray 'HairEyeColor' (Hair: 4)>
array([0.18, 0.48, 0.12, 0.21])
Coordinates:
  * Hair     (Hair) <U5 'Black' 'Brown' 'Red' 'Blond'
-----------------------------

### Exrecise 4.2

In [5]:
N = 500
pheads = 0.8
flipsequence= stats.bernoulli.rvs(p=pheads,loc=0,size=N)
runProp=np.cumsum(flipsequence)/np.arange(1,501,1)
p = figure(plot_width=600, plot_height=400, toolbar_location=None,tools="",title = 'Running proportion of heads')
p.line(np.arange(1,501,1),runProp ,line_width=1)
p.xaxis.axis_label = "Flip Number"
p.yaxis.axis_label = "Proportions of heads"
p.y_range = Range1d(0, 1)
hline = Span(location=pheads, dimension='width', line_color='blue')
p.add_layout(hline)
show(p)

### Exercise 4.4

In [6]:
xlow = 0
xhigh = 1
dx = 0.01
diff = xhigh - xlow
x = np.linspace(xlow,xhigh,int(diff/dx))
y = 6*x*(1-x)
p = figure(plot_width=600, plot_height=400, toolbar_location=None,tools="",title = 'Integral density')
p.line(x,y ,line_width=1)
p.vbar(x=x, width=0.001, bottom=0,top=y)
text_to_print = 'Σₓ Δxp(x) = '+ '{:0.2f}'.format(dx*y.sum())

area = Label(x=0.7, y=1.4, x_units='data', y_units='data',
                 text= text_to_print, render_mode='css',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0)

p.add_layout(area)
p.xaxis.axis_label = 'x'
p.yaxis.axis_label = 'y'

show(p)

### Exercise 4.5

In [7]:
##### xlow = 0

# Graph of normal probability density function, with comb of intervals.
meanval = 0.0               # Specify mean of distribution.
sdval = 0.2                 # Specify standard deviation of distribution.
xlow  = meanval - 3.5*sdval # Specify low end of x-axis.
xhigh = meanval  + 3.5*sdval + 0.01 # Specify high end of x-axis. added 0.01 to include xhigh
dx = sdval/10               # Specify interval width on x-axis
# Specify comb of points along the x axis:

x = np.arange(xlow,xhigh,dx)
y = ( 1/(sdval*np.sqrt(2*np.pi)) ) * np.exp( -.5 * ((x-meanval)/sdval)**2 )
p = figure(plot_width=600, plot_height=400, toolbar_location=None,tools="",title = 'Integral density')
p.line(x,y ,line_width=1)
p.vbar(x=x, width=0.001, bottom=0,top=y)

xlow  = meanval - 1*sdval # Specify low end of x-axis.
xhigh = meanval  + 1*sdval # Specify high end of x-axis. added 0.01 to include xhigh
dx = sdval/10               # Specify interval width on x-axis
x = np.arange(xlow,xhigh,dx)
y = ( 1/(sdval*np.sqrt(2*np.pi)) ) * np.exp( -.5 * ((x-meanval)/sdval)**2 )

text_to_print = 'Σₓ Δxp(x) = '+ '{:0.2f}'.format(dx*y.sum())

area = Label(x=0.2, y=1.4, x_units='data', y_units='data',
                 text= text_to_print, render_mode='css',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0)

p.add_layout(area)
p.xaxis.axis_label = 'x'
p.yaxis.axis_label = 'y'

show(p)

(b) mu = 162 cm sigma = 15 cm