# Code Chapter 01

## Typical imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import scipy

In [None]:
import arviz
import pymc3

## Thinking probabilistically

### Probability distributions

#### Investigating the normal distribution with Python

In [None]:
µ = 0.
σ = 1.
X = scipy.stats.norm(µ, σ)
x = X.rvs(1000)

In [None]:
scipy.stats.describe(x).mean

In [None]:
(x < 1.96).sum() - (x < -1.97).sum()

#### The Gaussian (Normal) distribution

In [None]:
mu_params = [-1, 0, 1]
sd_params = [0.5, 1, 1.5]
x = np.linspace(-7, 7, 100)

_, ax = plt.subplots(len(mu_params), len(sd_params),
                     sharex='all', sharey='all',
                     figsize=(9, 7), constrained_layout=True)
for i in range(len(mu_params)):
    for j in range(len(sd_params)):
        mu = mu_params[i]
        sd = sd_params[j]
        # Sample a normal distribution at all x
        y = scipy.stats.norm(mu, sd).pdf(x)
        ax[i, j].plot(x, y)
        ax[i, j].plot([], label=f'μ={mu:3.2f}\nσ={sd:3.2f}', alpha=0)
        ax[i, j].legend(loc=1)
ax[2, 1].set_xlabel('x')
ax[1, 0].set_ylabel('p(x)', rotation=0, labelpad=20)
ax[1, 0].set_yticks([])  # no `xticks` because values not necessary for understanding

### Independently and identically distributed variables

In [None]:
data = np.genfromtxt('./data/mauna_loa_CO2.csv', delimiter=',')
plt.plot(data[:, 0], data[:, 1])
plt.xlabel('year')
plt.ylabel('$CO_2$ (ppmv)')

## The "mandatory" coin flipping problem

### Choosing the likelihood (for the coin flipping problem)

In [None]:
n_params = [1, 2, 4]  # Number of trials
p_params = [0.25, 0.5, 0.75]  # Probability of success

x = np.arange(0, max(n_params))
f, ax = plt.subplots(len(n_params), len(p_params),
                     sharex='all', sharey='all',
                     figsize=(8, 7), constrained_layout=True)

for i in range(len(n_params)):
    for j in range(len(p_params)):
        n = n_params[i]
        p = p_params[j]

        y = scipy.stats.binom(n=n, p=p).pmf(x)

        ax[i, j].vlines(x, 0, y, colors='C0', lw=5)
        ax[i, j].set_ylim(0, 1)
        ax[i, j].plot(0, 0, label=f'N = {n:3.2f}\nθ = {p:3.2f}', alpha=0)
        ax[i, j].legend()
        ax[2, 1].set_ylabel('y')
        ax[1, 0].set_ylabel('p(y | Θ, N')
        ax[0, 0].set_xticks(x)

### Choosing the prior (for the coin flipping problem)

In [None]:
params = [0.5, 1, 2, 3]
x = np.linspace(0, 1, 100)
f, ax = plt.subplots(len(params), len(params),
                     sharex='all', sharey='all',
                     figsize=(8, 7), constrained_layout=True)

for i in range(len(params)):
    for j in range(len(params)):
        a = params[i]
        b = params[j]
        y = scipy.stats.beta(a, b).pdf(x)

        ax[i, j].plot(x, y)
        ax[i, j].plot(0, 0, label=f'α = {a:2.1f}i\nß = {b:2.1f}', alpha=0)
        ax[i, j].legend()
ax[1, 0].set_yticks([])  # no y-ticks - visual relative appearance is sufficient
ax[1, 0].set_xticks([0, 0.5, 1])  # Beta distribution has domain of [0, 1]
f.text(0.5, 0.05, 'Θ', ha='center')  # Label the x-axis of the **figure**
f.text(0.7, 0.5, 'p(Θ)', va='center', rotation=0)  # Label the y-axis of the figure but rotate the text to be vertical

### Computing and plotting the posterior

In [None]:
plt.figure(figsize=(10, 8))

# Actual data
n_trials = [0, 1, 2, 3, 4, 8, 16, 32, 50, 150]
data = [0, 1, 1, 1, 1, 4, 6, 9, 13, 48]
theta_real = 0.35

beta_params = [(1, 1), (20, 20), (1, 4)]
dist = scipy.stats.beta  # The beta distribution from `scipy.stats`
x = np.linspace(0, 1, 200)  # 200 points spaced regularly between 0 and 1

for idx, N in enumerate(n_trials):
    if idx == 0:
        # The first item in the plot is "special"
        plt.subplot(4, 3, 2)  # The second plot in a 4x3 array of plots
        plt.xlabel('Θ')
    else:
        plt.subplot(4, 3, idx + 3)  # Add 3 to skip first row
        plt.xticks([])  # No x-ticks
    y = data[idx]
    for (a_prior, b_prior) in beta_params:
        # The probability of theta given y (the data).
        # The calculation uses the fact the Beta and Binomial distributions are conjugate priors.
        # The result of the product of the prior and the likelihood is, itself, a Beta distribution.
        p_theta_given_y = dist.pdf(x, a_prior + y, b_prior + N - y)
        plt.fill_between(x, 0, p_theta_given_y, alpha=0.7)

    plt.axvline(theta_real, ymax=0.3, color='k')  # Plot a vertical line indicating `theta_real`
    plt.plot(0, 0, label=f'{N:4d} trials\n{y:4d} heads', alpha=0)
    plt.xlim(0, 1)  # Limits of the Beta distribution
    plt.ylim(0, 12)
    plt.legend()
    plt.yticks([])
plt.tight_layout()