** Problem 4.2 ** 
First we load in our work horses and our data.

In [2]:
import collections
import itertools

# Our numerical workhorses
import numpy as np
import pandas as pd
import scipy.optimize
import scipy.stats as st

# Numerical differentiation packages
import numdifftools as ndt

# Our main MCMC package
import emcee

# Import pyplot for plotting
import matplotlib.pyplot as plt

# Seaborn, useful for graphics
import seaborn as sns

# Corner is useful for displaying MCMC results
import corner

# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2,
      'axes.labelsize': 18,
      'axes.titlesize': 18,
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style('darkgrid', rc=rc)

# Load data
df = pd.read_csv('./data/hiv_data.csv', comment = '#')

OSError: File b'./data/hiv_data.csv' does not exist

The Perelman model for the viral load data is given to us as:
\begin{align}
V(t) = V_0e^{-ct} + \frac{cV_0}{c - \delta}\big[\frac {c}{c - \delta}(e^{-\delta t} - e^{-ct}) - \delta te^{-ct}\big]
\end{align}
So we define our Perelman model as our viral load model.

In [3]:
# want to define our viral load from the equation given in the problem
def ViralLoad(params, t):
    '''
    Defines V(t) with limits
    '''
    
    # Unpack paramerters
    c, delta, V_0 = params
    
    # Limit our V(t)
    if abs(c - delta) < 1e-9:
        return V_0 * (1 + delta*t + (delta * t)**2 / 2) * np.exp(-delta * t)
    
    # define the equation in the brackets for V(t) cause it's so long
    bracket = c / (c - delta) * (np.exp(-delta * t) - np.exp(-c * t)) - delta * t * np.exp(-c * t)
    
    return V_0 * np.exp(-c * t) + c * V_0 / (c - delta) * bracket


We want to take advantage of using the log of our posterior so we have to adjust our parameters accordingly.

In [4]:
# We want a log of our posterior so we have to adjust our parameters
def ViralLoadParam(params, t):
    '''
    Viral load parameters with log parameters
    '''
    return ViralLoad(np.exp(params), t)

Our posterior is a student-t distribution. The variance is unknown so we marginalize it. ** figure out what the student-t distribution is** 

In [5]:
# Our posterior is a student-t distribution, marginalize the unknown variance
def resid(params, t, V):
    '''
    Residuals for viral load
    '''
    return V - ViralLoadParam(params, t)

# Define our posterior
def log_posterior(params, t, V):
    '''
    Log posterior of our HIV data
    '''
    c, delta, V_0 = params

    return -len(t) / 2 * np.log(np.sum(resid(np.exp(params), t, V)**2))

Now we pass our transcript counts as arguments.

In [6]:
args = (df['Days after administration'], df['RNA copies per mL'])

NameError: name 'df' is not defined

Now we define the specifications to use MCMC.

In [9]:
# Number of parameters
ndim = 3

# Define number of walkers
nwalkers = 50

# Define the number of walkers to burn
nburn = 500

# Define the number of steps MCMC will take after the burn
nsteps = 1000

# For reproducibility seed random number generator
np.random.seed(42)

We need to tell the walkers where to start. $p_0[i,j]$ is the starting point where i is the walkers and j is the parameters. We want our parameters c and $\delta$ to call random numbers from the interval (0, 10) and to call nwalkers. **Our V_0**

In [10]:
# Define where the walkers start
# p0[i,j] where i is walkers and j is parameters
# we want c and delta for interval 0 to 10
p0 = np.empty((nwalkers, ndim))
p0[:,0] = np.random.uniform(0, 10, nwalkers)
p0[:,1] = np.random.uniform(0, 10, nwalkers)
p0[:,2] = np.random.exponential(0.1, nwalkers)

Next we instantiate our sampler.

In [11]:
# Now we instantiate the sampler
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args = args)

NameError: name 'args' is not defined

Now we have to do a burn-in.

In [12]:
# burn the sampler in
pos, prob, state = sampler.run_mcmc(p0, nburn, storechain = False)

NameError: name 'sampler' is not defined

And lastly we run the sampler

In [13]:
# sample
_ = sampler.run_mcmc(pos, nsteps)

NameError: name 'sampler' is not defined