# Week 1
## Implement random walk experiment to demonstrate CLT and a Gamma-Poisson model in Stan estimated on just one point


## load libraries

In [None]:
# numerical libraries
import numpy as np
import scipy.stats as st
import pandas as pd

# stan!
import pystan

# plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns
%pylab inline

## Friends flip out and get the CLT

Suppose you and a thousand of your closest friends line up on the halfway line of a soccer field. Each of you has a coin in your hand. At the sound of a whistle, you begin flipping the coins. Each time a coin comes up heads, that person moves one step towards the left-hand goal. Each time a coin comes up tails, that person moves one step towards the right-hand goal.

Each person flips the coin 16 times, follows the implied moves, and then stands still. Now we measure the distance of each person from the halfway line. Can you predict the proportion of people who are standing on the halfway line? How about the proportion 5 yards away? We claim that the distribution of people around the halfway line will be approximately normal.

In [None]:
# how many friends do you have?
n_friends = 500

# how many steps?
n_steps = 32

In [None]:
# simulate walks
walks = np.random.uniform(low=-1,high=1,size=[n_friends,n_steps])

In [None]:
# sum up each period's roll to get position
pos = np.cumsum(walks,axis=1)

In [None]:
# add the starting position
pos = np.hstack([np.zeros(shape=[n_friends,1]),pos])

In [None]:
# how many lines to draw?
n_lines = 50
idx = np.random.choice(n_friends,n_lines,replace=False)

In [None]:
# x's
x = np.linspace(-6,6,200)

# define figure
fig = plt.figure(figsize=(12,8))

# plot random walkers
ax1 = plt.subplot2grid((2, 3), (0, 0), rowspan=1, colspan=3) # define grid with 2 rows and 3 columns
plt.plot(pos[idx,:17].T,alpha=0.5,lw=1)

xcoords = [4,8,16]
for xc in xcoords:
    plt.axvline(xc,c='black',alpha=0.5,dashes=(2,1,2,1))

plt.title('Walking paths of some of your friends')
plt.xticks([0,4,8,16])
plt.ylabel('position')

# plot PDF at 4 steps
ax2 = plt.subplot2grid((2, 3), (1, 0), rowspan=1, colspan=1) # define grid with 2 rowa and 3 columns
plt.plot(x,st.norm.pdf(x,loc=0,scale=np.std(pos[:,4])),color='red')
sns.kdeplot(pos[:,4],bw=0.075,lw=2)
plt.axvline(0,c='black',alpha=0.5,dashes=(2,1,2,1))
plt.title('4 steps')
plt.xlim(-6,6)
plt.ylim(0,0.35)
plt.xlabel('position')
plt.ylabel('density')

# plot PDF at 16 steps
ax2 = plt.subplot2grid((2, 3), (1, 1), rowspan=1, colspan=1) # define grid with 2 rowa and 3 columns
plt.plot(x,st.norm.pdf(x,loc=0,scale=np.std(pos[:,8])),color='red')
sns.kdeplot(pos[:,8],bw=0.075,lw=2)
plt.axvline(0,c='black',alpha=0.5,dashes=(2,1,2,1))
plt.title('8 steps')
plt.xlim(-6,6)
plt.ylim(0,0.35)
plt.xlabel('position')

# plot PDF at 16 steps
ax2 = plt.subplot2grid((2, 3), (1, 2), rowspan=1, colspan=1) # define grid with 2 rowa and 3 columns
plt.plot(x,st.norm.pdf(x,loc=0,scale=np.std(pos[:,16])),color='red')
sns.kdeplot(pos[:,16],bw=0.075,lw=2)
plt.axvline(0,c='black',alpha=0.5,dashes=(2,1,2,1))
plt.title('16 steps')
plt.xlim(-6,6)
plt.ylim(0,0.35)
plt.xlabel('position')

plt.savefig('random_walkers.jpg',dpi=300);

## some binomials

In [None]:
# define figure
fig = plt.figure(figsize=(16,4))

# plot the pmf
ax1 = plt.subplot2grid((1,3), (0, 1), rowspan=1, colspan=1)
n = 10
x = np.linspace(1,n,n,dtype=int)
p = 0.5

plt.plot(x, st.binom.pmf(x,p=p,n=n), 'bo', ms=8, label='binom pmf')
plt.vlines(x, 0, st.binom.pmf(x,p=p,n=n), colors='b', lw=4, alpha=0.5)
plt.title('PMF for Binomial, 10 trials, p = 0.5')
plt.xlabel('Number of successes')
plt.xticks(x)
plt.ylabel('PMF');


# plot the pmf
ax2 = plt.subplot2grid((1,3), (0, 0), rowspan=1, colspan=1)
n = 10
x = np.linspace(1,n,n,dtype=int)
p = 0.25

plt.plot(x, st.binom.pmf(x,p=p,n=n), 'bo', ms=8, label='binom pmf')
plt.vlines(x, 0, st.binom.pmf(x,p=p,n=n), colors='b', lw=4, alpha=0.5)
plt.title('PMF for Binomial, 10 trials, p = 0.25')
plt.xlabel('Number of successes')
plt.xticks(x)
plt.ylabel('PMF');


# plot the pmf
ax3 = plt.subplot2grid((1,3), (0, 2), rowspan=1, colspan=1)
n = 10
x = np.linspace(1,n,n,dtype=int)
p = 0.75

plt.plot(x, st.binom.pmf(x,p=p,n=n), 'bo', ms=8, label='binom pmf')
plt.vlines(x, 0, st.binom.pmf(x,p=p,n=n), colors='b', lw=4, alpha=0.5)
plt.title('PMF for Binomial, 10 trials, p = 0.75')
plt.xlabel('Number of successes')
plt.xticks(x)
plt.ylabel('PMF');

plt.savefig('binomial_pmfs.jpg',dpi=300);


In [None]:
# plot PMF and CDF side-by-side
# define figure
fig = plt.figure(figsize=(18,6))

# plot PMF
ax1 = plt.subplot2grid((1,2), (0, 0), rowspan=1, colspan=1)
n = 10
x = np.linspace(1,n,n,dtype=int)
p = 0.5

plt.plot(x, st.binom.pmf(x,p=p,n=n), 'bo', ms=8, label='binom pmf')
plt.vlines(x, 0, st.binom.pmf(x,p=p,n=n), colors='b', lw=4, alpha=0.5)
plt.title('PMF for Binomial, 10 trials, p = 0.5')
plt.xlabel('Number of successes')
plt.xticks(x)
plt.ylabel('PMF');

# plot CDF
ax2 = plt.subplot2grid((1, 2), (0, 1), rowspan=1, colspan=1) # define grid with 1 row and 2 columns, plot on the right
plt.hlines(st.binom.cdf(x,p=p,n=n)[:-1],x[:-1],x[1:],colors='black', lw=4) # horizontal portions
plt.vlines(x[1:], st.binom.cdf(x,p=p,n=n)[:-1], st.binom.cdf(x,p=p,n=n)[1:], colors='black', lw=4, linestyle='dashed') # vertical portions
plt.plot(x,st.binom.cdf(x,p=p,n=n),'o',color='black',markersize=8) # left marker
plt.plot(x[1:],st.binom.cdf(x,p=p,n=n)[:-1],'o',color='black',markerfacecolor='white',markersize=8) # right marker
plt.title('CDF for Binomial, 10 trials, p = 0.5')
plt.xticks(x)
plt.xlabel('Number of successes')
plt.ylabel('CDF');

plt.savefig('binomial_pmf_cdf.jpg',dpi=300);

In [None]:
# define figure
fig = plt.figure(figsize=(16,4))

# plot the pmf
ax1 = plt.subplot2grid((1,3), (0, 1), rowspan=1, colspan=1)
n = 10
x = np.linspace(1,n,n,dtype=int)
p = 0.5

plt.plot(x, st.binom.pmf(x,p=p,n=n), 'bo', ms=8, label='binom pmf')
plt.vlines(x, 0, st.binom.pmf(x,p=p,n=n), colors='b', lw=4, alpha=0.5)
plt.title('PMF for Binomial, 10 trials, p = 0.5')
plt.xlabel('Number of successes')
plt.xticks(x)
plt.ylabel('PMF');


# plot the pmf
ax2 = plt.subplot2grid((1,3), (0, 0), rowspan=1, colspan=1)
n = 10
x = np.linspace(1,n,n,dtype=int)
p = 0.25

plt.plot(x, st.binom.pmf(x,p=p,n=n), 'bo', ms=8, label='binom pmf')
plt.vlines(x, 0, st.binom.pmf(x,p=p,n=n), colors='b', lw=4, alpha=0.5)
plt.title('PMF for Binomial, 10 trials, p = 0.25')
plt.xlabel('Number of successes')
plt.xticks(x)
plt.ylabel('PMF');


# plot the pmf
ax3 = plt.subplot2grid((1,3), (0, 2), rowspan=1, colspan=1)
n = 10
x = np.linspace(1,n,n,dtype=int)
p = 0.75

plt.plot(x, st.binom.pmf(x,p=p,n=n), 'bo', ms=8, label='binom pmf')
plt.vlines(x, 0, st.binom.pmf(x,p=p,n=n), colors='b', lw=4, alpha=0.5)
plt.title('PMF for Binomial, 10 trials, p = 0.75')
plt.xlabel('Number of successes')
plt.xticks(x)
plt.ylabel('PMF');

plt.savefig('binomial_pmfs.jpg',dpi=300);

## poisson pmf & cdf

The Poisson distribution is a discrete probability distribution that expresses the probability of a given number of events occurring in a fixed interval of time or space if these events occur with a known constant rate and independently of the time since the last event.

In [None]:
# max number of events to plot
n = 10

# mean number of events
p = 1.5

# sample space
x = np.linspace(0,n,n+1,dtype=int)

# probability mass function
np.round(st.poisson.pmf(x, p),3)

In [None]:
# plot PMF and CDF side-by-side
# define figure
fig = plt.figure(figsize=(12,4))

# plot PMF
ax1 = plt.subplot2grid((1, 2), (0, 0), rowspan=1, colspan=1) # define grid with 1 row and 2 columns, plot on the left
plt.plot(x, st.poisson.pmf(x,p), 'bo', ms=8, label='binom pmf')
plt.vlines(x, 0, st.poisson.pmf(x,p), colors='b', lw=4, alpha=0.5)
plt.title('PMF for Poisson with $\lambda$ = '+str(p))
plt.xlabel('Asthma mortality rate')
plt.xticks(x)
plt.ylabel('PMF');

# plot CDF
ax2 = plt.subplot2grid((1, 2), (0, 1), rowspan=1, colspan=1) # define grid with 1 row and 2 columns, plot on the right
plt.hlines(st.poisson.cdf(x,p)[:-1],x[:-1],x[1:],colors='black', lw=4) # horizontal portions
plt.vlines(x[1:], st.poisson.cdf(x,p)[:-1], st.poisson.cdf(x,p)[1:], colors='black', lw=4, linestyle='dashed') # vertical portions
plt.plot(x,st.poisson.cdf(x,p),'o',color='black',markersize=8) # left marker
plt.plot(x[1:],st.poisson.cdf(x,p)[:-1],'o',color='black',markerfacecolor='white',markersize=8) # right marker
plt.title('CDF for Poisson with $\lambda$ = '+str(p))
plt.xticks(x)
plt.xlabel('Asthma mortalility rate')
plt.ylabel('CDF');

plt.savefig('poisson_pfm_cdf.jpg',dpi=300);

In [None]:
sns.distplot(np.random.gamma(shape=3,scale=1/5,size=5000));
plt.title('Gamma(3,5) Distribution')
plt.xlabel('Asthma mortality rate')
plt.savefig('gamma_3_5_pdf.jpg',dpi=300);

In [None]:
sns.distplot(np.random.gamma(shape=6,scale=1/7,size=5000),label='posterior');
sns.distplot(np.random.gamma(shape=3,scale=1/5,size=5000),label='prior');
plt.title('Posterior and Prior Distributions')
plt.xlabel('Asthma mortality rate')
plt.legend()
plt.savefig('asthma_posterior.jpg',dpi=300);