# Week 1
## Examples of distributions of discrete random variables

Experiment with the Binomial and Poisson probability mass functions and cumulative distribution functions.

## load libraries

In [None]:
# numerical libraries
import numpy as np
import scipy.stats as st

# plotting libraries
import matplotlib.pyplot as plt
%pylab inline

## binomial pmf

The binomial distribution with parameters n and p is the discrete probability distribution of the number of successes in a sequence of n independent experiments, each asking a yes–no question, and each with its own boolean-valued outcome: success/yes (with probability p) or failure/no (with probability q = 1 − p).

In [None]:
# set number of experiments
n = 4

# set probability of success
p = 1/2

# sample space
x = np.linspace(0,n,n+1,dtype=int)

# probability mass function
np.round(st.binom.pmf(x, n, p),3)

In [None]:
np.round(st.binom.cdf(x, n, p),3)

In [None]:
# plot the pmf
plt.plot(x, st.binom.pmf(x, n, p), 'bo', ms=8, label='binom pmf')
plt.vlines(x, 0, st.binom.pmf(x, n, p), colors='b', lw=4, alpha=0.5)
plt.title('PMF for Binomial with (n, p) = ('+str(int(n))+', '+str(p)+')')
plt.xlabel('Number of successes in '+str(int(n))+' trials')
plt.xticks(x)
plt.ylabel('PMF')

In [None]:
# now plot the CDF
plt.hlines(st.binom.cdf(x, n, p)[:-1],x[:-1],x[1:],colors='black', lw=4) # horizontal portions
plt.vlines(x[1:], st.binom.cdf(x, n, p)[:-1], st.binom.cdf(x, n, p)[1:], colors='black', lw=4, linestyle='dashed') # vertical portions
plt.plot(x,st.binom.cdf(x, n, p),'o',color='black',markersize=8) # left marker
plt.plot(x[1:],st.binom.cdf(x, n, p)[:-1],'o',color='black',markerfacecolor='white',markersize=8) # right marker
plt.title('CDF for Binomial with (n, p) = ('+str(int(n))+', '+str(p)+')')
plt.xticks(x)
plt.xlabel('Number of successes in '+str(int(n))+' trials')
plt.ylabel('CDF')

In [None]:
# plot PMF and CDF side-by-side
# define figure
fig = plt.figure(figsize=(12,4))

# plot PMF
ax1 = plt.subplot2grid((1, 2), (0, 0), rowspan=1, colspan=1) # define grid with 1 row and 2 columns, plot on the left
plt.plot(x, st.binom.pmf(x, n, p), 'bo', ms=8)
plt.vlines(x, 0, st.binom.pmf(x, n, p), colors='b', lw=3, alpha=0.5)
plt.title('PMF for Binomial with (n, p) = ('+str(int(n))+', '+str(p)+')')
plt.xlabel('Number of successes in '+str(int(n))+' trials')
plt.xticks(x)
plt.ylabel('PMF')

# plot CDF
ax2 = plt.subplot2grid((1, 2), (0, 1), rowspan=1, colspan=1) # define grid with 1 row and 2 columns, plot on the right
plt.hlines(st.binom.cdf(x, n, p)[:-1],x[:-1],x[1:],colors='black', lw=3) # horizontal portions
plt.vlines(x[1:], st.binom.cdf(x, n, p)[:-1], st.binom.cdf(x, n, p)[1:], colors='black', lw=3, linestyle='dashed') # vertical portions
plt.plot(x,st.binom.cdf(x, n, p),'o',color='black',markersize=8) # left marker
plt.plot(x[1:],st.binom.cdf(x, n, p)[:-1],'o',color='black',markerfacecolor='white',markersize=8) # right marker
plt.title('CDF for Binomial with (n, p) = ('+str(int(n))+', '+str(p)+')')
plt.xticks(x)
plt.xlabel('Number of successes in '+str(int(n))+' trials')
plt.ylabel('CDF')

## poisson pmf & cdf

The Poisson distribution is a discrete probability distribution that expresses the probability of a given number of events occurring in a fixed interval of time or space if these events occur with a known constant rate and independently of the time since the last event.

In [None]:
# max number of events to plot
n = 20

# mean number of events
p = 6

# sample space
x = np.linspace(0,n,n+1,dtype=int)

# probability mass function
np.round(st.poisson.pmf(x, p),3)

In [None]:
# plot the pmf
plt.plot(x, st.poisson.pmf(x,p), 'bo', ms=8, label='binom pmf')
plt.vlines(x, 0, st.poisson.pmf(x,p), colors='b', lw=4, alpha=0.5)
plt.title('PMF for Poisson with $\lambda$ = '+str(int(p)))
plt.xlabel('Number of events')
plt.xticks(x)
plt.ylabel('PMF')

In [None]:
# now plot the CDF
plt.hlines(st.poisson.cdf(x,p)[:-1],x[:-1],x[1:],colors='black', lw=4) # horizontal portions
plt.vlines(x[1:], st.poisson.cdf(x,p)[:-1], st.poisson.cdf(x,p)[1:], colors='black', lw=4, linestyle='dashed') # vertical portions
plt.plot(x,st.poisson.cdf(x,p),'o',color='black',markersize=8) # left marker
plt.plot(x[1:],st.poisson.cdf(x,p)[:-1],'o',color='black',markerfacecolor='white',markersize=8) # right marker
plt.title('CDF for Poisson with $\lambda$ = '+str(int(p)))
plt.xticks(x)
plt.xlabel('Number of events')
plt.ylabel('CDF')

In [None]:
# plot PMF and CDF side-by-side
# define figure
fig = plt.figure(figsize=(12,4))

# plot PMF
ax1 = plt.subplot2grid((1, 2), (0, 0), rowspan=1, colspan=1) # define grid with 1 row and 2 columns, plot on the left
plt.plot(x, st.poisson.pmf(x,p), 'bo', ms=8, label='binom pmf')
plt.vlines(x, 0, st.poisson.pmf(x,p), colors='b', lw=4, alpha=0.5)
plt.title('PMF for Poisson with $\lambda$ = '+str(int(p)))
plt.xlabel('Number of events')
plt.xticks(x)
plt.ylabel('PMF')

# plot CDF
ax2 = plt.subplot2grid((1, 2), (0, 1), rowspan=1, colspan=1) # define grid with 1 row and 2 columns, plot on the right
plt.hlines(st.poisson.cdf(x,p)[:-1],x[:-1],x[1:],colors='black', lw=4) # horizontal portions
plt.vlines(x[1:], st.poisson.cdf(x,p)[:-1], st.poisson.cdf(x,p)[1:], colors='black', lw=4, linestyle='dashed') # vertical portions
plt.plot(x,st.poisson.cdf(x,p),'o',color='black',markersize=8) # left marker
plt.plot(x[1:],st.poisson.cdf(x,p)[:-1],'o',color='black',markerfacecolor='white',markersize=8) # right marker
plt.title('CDF for Poisson with $\lambda$ = '+str(int(p)))
plt.xticks(x)
plt.xlabel('Number of events')
plt.ylabel('CDF')