In [12]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import pystan

# 1. Call Center Data
Data: waiting times for the 13th hour of a day in a call center<br>
Prior distribution: Gamma distribution with $\alpha = 1$ and $\beta = 0.25$<br>
Likelihood function: exponential with parameter $\lambda$<br>
Parameters: rate $\lambda$<br>
Posterior: Gamma distribution over $\lambda$<br>

In [13]:
## import the dataset (code from call_center_solution.ipynb)
waiting_times_day = np.loadtxt('call_center.csv')

# Split the data into 24 separate series, one for each hour of the day
current_time = 0
waiting_times_per_hour = [[] for _ in range(24)]  # Make 24 empty lists, one per hour
for t in waiting_times_day:
    current_hour = int(current_time // 60)
    current_time += t
    waiting_times_per_hour[current_hour].append(t)
    
# use just the 13th hour of the day
waiting_times_hour = waiting_times_per_hour[13]

In [15]:
call_center_data = {
    '13': {
        'alpha': 1,  # fixed prior hyperparameters for the
        'beta': 0.25,   # gamma distribution
        'num_calls': len(waiting_times_hour),  # number of calls coming in
        'waiting_times': waiting_times_hour} # data set on waiting times
    }

In [24]:
calls_stan_code = """

// The data block contains all known quantities - typically the observed
// data and any constant hyperparameters.
data {  
    int<lower=1> num_calls;  // number of calls
    real<lower=0> waiting_times[num_calls];  // waiting times
    real<lower=0> alpha;  // fixed prior hyperparameter
    real<lower=0> beta;   // fixed prior hyperparameter
}

// All unknown quantities, in this case the waiting time lambda
parameters {
    real<lower=0,upper=1> lambd;  // rate lambda for the exponential
}

// The model block contains all probability distributions in the model.
model {
    lambd ~ gamma(alpha, beta);  // prior over p
    for(i in 1:num_calls) {
        waiting_times[i] ~ exponential(lambd);  // likelihood function
    }
}

"""

In [25]:
calls_stan_model = pystan.StanModel(model_code=calls_stan_code)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_49cc200ef4ffd7cdd272093cb453bd5d NOW.


In [26]:
calls_stan_results = calls_stan_model.sampling(data=call_center_data['13'])

In [28]:
print(calls_stan_results)

Inference for Stan model: anon_model_49cc200ef4ffd7cdd272093cb453bd5d.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

        mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
lambd    1.0  8.8e-5 3.1e-3   0.99    1.0    1.0    1.0    1.0   1281    1.0
lp__  -67.72    0.03   0.81 -70.07 -67.93 -67.39 -67.17  -67.1   1034    1.0

Samples were drawn using NUTS at Wed Oct  7 11:23:40 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).


# 2. Normal likelihood with normal-inverse-gamma prior

# 3. Log-normal HRTEM data