# Import packages
Use kernel "ABM_env" -- see README.

In [None]:
from __future__ import division
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
import math
import time
from tqdm import tqdm
import cProfile
import pickle
from scipy.stats import gaussian_kde
from scipy import interpolate
import pandas as pd
from math import sqrt
from multiprocessing import Pool
import multiprocessing as mp
import time
import math
import os
from amcmc import ammcmc

from ABM import SEIR_multiple_pops
from run_simulations import simulate_epidemic_1d
from CalibrationMethod1_methods import *

In [None]:
#parameters
m = 1 #number of populations
centers = np.array([[0,0]]) #theta, phi or x, y
spread = np.array([0.1]) #standard deviation of normal distribution
pop = np.array([100]) #population
A_1 = 0.01 #theta or x mobility (azimuth mobility)
A_2 = 0.01 #phi or y mobility (inclination mobility)
R = 1 #radius
d_IU = 0.005
E_0 = np.array([0]) #fraction of initially exposed
I_0 = np.array([0.01]) #fraction of initially infected
S_0 = np.array([0.99]) #fraction of initially susceptible
T_E = 11.6 #time from exposure to infectious
T_E_stdev = 1.9 #standard deviation of exposure time
T_I = 18.49 #incubation time
T_I_stdev = 3.71 #standard deviation of infection time
del_t = 0.1 #time step
verlet_iter = 300 #number of steps between updating verlet list
T = 300
rand_seed = 1
g = None
al = None
jumping_times = np.zeros(int(T/del_t)+1)
jump_prob = 0.5
spherical = None
dist = 'Gamma'

time_vec = np.linspace(0,T,int(T/del_t)+1)
    

In [None]:
# Load in training data
data_file = open('./Data/Training Data/One-parameter case/Calibration method 1/new_I_data_One-Pop-Disc.pickle', "rb")
data = pickle.load(data_file)
data_file.close()

#Load in training data mobility and jumping probability values:
parameter_matrix = pd.read_csv('./Data/Training Data/One-parameter case/Calibration method 1/variable_parameter_values_One-Pop-Disc.csv', index_col=0).to_numpy()
mobilities = parameter_matrix[:,0]
jumping_probs = parameter_matrix[:,1]
random_seeds = parameter_matrix[:,2]

#Reshape to sort by mobility and jumping prob:
#based on assumption that data is given in shape (num_of_sims, num_of_time_steps, num_of_subpops)
#and that within the list of simulations, the mobility is the "outer" index (changes more slowly)
#and the jumping probability is the "inner" index (changes more quickly)

unique_mobilities = np.unique(mobilities) 
unique_jumping_probs = np.unique(jumping_probs)

num_of_mobilities = unique_mobilities.shape[0]
num_of_jumping_probs = unique_jumping_probs.shape[0]

num_of_random_seeds_per_param_set = round(data.shape[0]/(num_of_mobilities*num_of_jumping_probs)) #assuming equal number of random seeds run for each param set
num_of_time_steps = data.shape[1]
# num_of_subpops = 2 #assumes 2 sub-populations

unique_mobilities = np.unique(mobilities) 
num_of_mobilities = unique_mobilities.shape[0]

data_sorted = np.zeros((num_of_mobilities, num_of_random_seeds_per_param_set, num_of_time_steps))

data_raveled = np.ravel(data, order = 'C')
data = np.reshape(data_raveled, data_sorted.shape)
data = np.swapaxes(data, 0, 2)

## Data processing: Sum training dataset along time segments

In [None]:
num_of_intervals = 5 #number of time intervals to split up for likelihood calculation

time_steps = data.shape[0]
intervals = np.arange(0,time_steps,int(time_steps/num_of_intervals))
intervals[-1] = intervals[-1]+time_steps%num_of_intervals
new_I_per_interval = np.zeros((num_of_intervals,data.shape[1],data.shape[2]))

for i in range(num_of_intervals):
    data_ = data[intervals[i]:intervals[i+1],:,:]
    new_I_per_interval[i,:,:] = np.sum(data_, axis = 0)

In [None]:
#checking that data makes sense:
fig, ax = plt.subplots(constrained_layout=True, dpi = 200)
ax.plot(time_vec,data[:,10,15], zorder = 1, label = 'New infections at each time step')
ax.set_ylim(0)
ax.set_ylabel('Number of new infections at time step')

ax2 = ax.twinx()
ax2.stairs(new_I_per_interval[:,10,15], np.array(intervals)/10, fill = True, color = 'grey', alpha = 0.5, zorder = -1, label = 'Sum of infections during time segment')
ax2.set_ylabel('Sum of infections during time segment')
fig.legend(bbox_to_anchor=(0.9, 0.95))
ax.set_xlabel('Time (days)')
ax.set_title('Example of initial and segmented dataset for a single run')
plt.show()

# Generate expanded kernel density estimate PDFs for each time segment and mobility value

To correctly interpolate between probability distributions, may require KDE values from outside of desired final probability distribution range (0-100 agents). This means we need to calculate KDE values on an extended grid to set up the interpolation.

In [None]:
#-------------------------PARAMETER DEFINITIONS----------------------

shape_param = 0.5
type_ = "linear"

#--------------------------KDE & MEANS, VARIANCES, STDEVS------------------------------

#generate kernel density estimate PDFs for each time segment and mobility value
means = np.zeros((num_of_intervals, num_of_mobilities))
variances = np.zeros((num_of_intervals, num_of_mobilities))
stdevs = np.zeros((num_of_intervals, num_of_mobilities))
stdevs_approx = np.zeros((num_of_intervals, num_of_mobilities)) #approximate stdev for 0-stdev pdfs based on replacement approximation function

KDE_fns = np.zeros((num_of_intervals, num_of_mobilities))
KDE_fns = KDE_fns.astype('object')

for interval in tqdm(range(num_of_intervals)):
    for mob_ind in range(num_of_mobilities):
        means[interval, mob_ind] = np.mean(new_I_per_interval[interval, :, mob_ind])
        variances[interval, mob_ind] = np.var(new_I_per_interval[interval, :, mob_ind])
        stdevs[interval, mob_ind] = np.std(new_I_per_interval[interval, :, mob_ind])
        stdevs_approx[interval, mob_ind] = np.std(new_I_per_interval[interval, :, mob_ind])
        try: 
            kde = gaussian_kde(new_I_per_interval[interval, :, mob_ind]) #generate KDE -- will fail if all values are 0's
            KDE_fns[interval, mob_ind] = kde.pdf
        except: 
            mean = means[interval, mob_ind]
            KDE_fns[interval, mob_ind], stdev_of_approx_fn = return_zero_stdev_pdf(mean, shape_param, type_)  
            stdevs_approx[interval, mob_ind] = stdev_of_approx_fn
            

In [None]:
def f_pdf(sample_point, new_I_per_interval_ref):
    sample_point = sample_point[0]
    if sample_point<=0.025 and sample_point>=0.005: #don't like to have this hard-coded here either but also interpolation doesn't work otherwise, and computation time is wasted because prior will be 0 anyway
        x_grid, KDE, interpolated_mean, interpolated_var, interpolated_stdev_approx = interp_KDE_1d(sample_point, unique_mobilities, 
              means, variances, stdevs, stdevs_approx,KDE_fns, 
              mesh_number = 201, mesh_min = 0, mesh_max = 100, 
                  mesh_min_extended = 0, mesh_max_extended = 100, add_to_stdev = 0, shape_param = 0.5, type_ = "linear", mean = mean, renormalize=True)

        log_probs = np.zeros_like(KDE)
        for interval in range(num_of_intervals):
            log_probs[interval] = np.log(KDE[interval](new_I_per_interval_ref[interval]))
        if np.sum(log_probs) == -np.inf:
            return [-1e100,0]
        else:
            return [np.sum(log_probs),0]
    else:
        return [-1e100,0]

### Import test/sample data:

In [None]:
data_file = open('./Data/Test Data/One-parameter case/new_I_data_One-Pop-NEW-COMBINED-TEST.pickle', "rb") # Combined batch of MCMC
data = pickle.load(data_file)
data_file.close()

parameter_matrix = pd.read_csv('./Data/Test Data/One-parameter case/variable_parameter_values_One-Pop-NEW-COMBINED-TEST.csv', index_col=0).to_numpy() 

#Load in mobility and jumping probability values:
mobilities = parameter_matrix[:,0]
random_seeds = parameter_matrix[:,1]
num_of_samples = mobilities.shape[0]

intervals = np.arange(0,num_of_time_steps,int(num_of_time_steps/num_of_intervals))
intervals[-1] = intervals[-1]+num_of_time_steps%num_of_intervals
new_I_per_interval = np.zeros((num_of_samples, num_of_intervals))

for i in range(num_of_intervals):
    data_ = data[:,intervals[i]:intervals[i+1]]
    print(data_.shape)
    new_I_per_interval[:,i] = np.sum(data_, axis = 1)

In [None]:
import os
from pathlib import Path

#------------------Choose Trial dataset (to try to calibrate to):---------------------
sample_data_set_ind = 1
new_I_per_interval_ref = new_I_per_interval[sample_data_set_ind]
print('actual mobility:', mobilities[sample_data_set_ind])
RUN_NAME = "One-Pop"
mob_value = mobilities[sample_data_set_ind]

#-----------------Run Trial MCMC---------------------

nsamples = 500
end = nsamples
nburn = 50
seed=100

# nsamples = 75000
# seed=100
# nburn = 5000
# end = nsamples

nskip = 0
nthin = 1
tmpchn_dir = "./MCMC_example_results/"+RUN_NAME
logfile_dir = "./MCMC_example_results/"+RUN_NAME
os.makedirs(tmpchn_dir, exist_ok = True)
os.makedirs(logfile_dir, exist_ok = True)

tmpchn = tmpchn_dir + "/amcmc_TMP_ABM_sample_ind_"+str(sample_data_set_ind)+".dat"
logfile = logfile_dir + "/amcmc_LOG_ABM_sample_ind_"+str(sample_data_set_ind)+".dat"

if os.path.isfile(tmpchn): #remove previous run file if it exists
    os.remove(tmpchn)
if os.path.isfile(logfile): #remove previous run file if it exists
    os.remove(logfile)

opts = {"nsteps": nsamples, "nfinal": 10000000,"gamma": 1,
        "inicov": np.array([0.001]),"inistate": np.array([0.0151]),
        "spllo": np.array([0.005]),"splhi": np.array([0.025]),
        "logfile": logfile,"burnsc":5,
        "nburn":nburn,"nadapt":100,"coveps":1.e-10,"ofreq":50,"tmpchn":tmpchn,'rnseed':sample_data_set_ind
        }

ndim = 1
np.random.seed(seed)

print('Sampling f_pdf function with AMCMC ...')
start_time = time.time()
sol=ammcmc(opts,f_pdf,new_I_per_interval_ref)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Elapsed AMCMC Run Time: {elapsed_time} seconds")

samples = sol['chain']
logprob = sol['minfo'][:,1]

import matplotlib.pyplot as plt
samples = samples[nskip::nthin]
logprob = logprob[nskip::nthin]

#save data:
with open(tmpchn_dir+'/AMCMC_sample_ind_'+str(int(sample_data_set_ind))+'.pickle', 'wb') as handle:
    pickle.dump(sol, handle, protocol=pickle.HIGHEST_PROTOCOL)

print('Acceptance rate',sol['accr'])
print('Mean:',np.mean(samples, axis=0))
print('Var:',np.var(samples, axis=0))
print('Cov:',np.cov(samples.T)) 

upper_percentile = np.percentile(sol['chain'][nburn:], 97.5)
lower_percentile = np.percentile(sol['chain'][nburn:], 2.5)
upper_percentile_50 = np.percentile(sol['chain'][nburn:], 75)
lower_percentile_50 = np.percentile(sol['chain'][nburn:], 25)

print('95% credible interval upper bound:', upper_percentile) 
print('95% credible interval lower bound:',lower_percentile)

#----------Brute force posterior sampling----------

save_dir = './Brute_force_posterior_estimation_example/'+RUN_NAME
os.makedirs(save_dir, exist_ok = True)
save_file = save_dir+'/new_I_data_'+str(int(sample_data_set_ind))+'LOG_PROBS.txt'

if os.path.isfile(save_file): #remove previous run if it exists
    os.remove(save_file)


n_m = 15
# n_m = 200
test_mobilities = np.linspace(unique_mobilities[0]+1E-10, unique_mobilities[-1]-1E-10, n_m)

log_probs = np.zeros((test_mobilities.shape[0]))
start_time = time.time()
for j in range(test_mobilities.shape[0]):
    log_probs[j] = f_pdf([test_mobilities[j]], new_I_per_interval_ref)[0]

    fout = open(save_file, 'ab')
    dataout = np.array([[test_mobilities[j],log_probs[j]]])
    np.savetxt(fout, dataout, fmt='%.8e',delimiter=' ', newline='\n')
    fout.close()
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Elapsed brute force posterior estimation run time: {elapsed_time} seconds")

with open(save_dir+'/new_I_data_'+str(int(sample_data_set_ind))+'LOG_PROBS.pickle', 'wb') as handle:
    pickle.dump(log_probs, handle, protocol=pickle.HIGHEST_PROTOCOL)
    


In [None]:
#----------------Plotting-------------------

#Plot chain values:
nburn = 50
fig = plt.figure(dpi = 300)
plt.plot(sol['chain'])
plt.xlabel('Iterations')
plt.ylabel('Mobility')
plt.title('MCMC chain')
plt.show()

#Plot histogram, 95% credible interval bounds, true mobility value
bins = np.linspace(0.005,0.025,20)
counts, bin_edges = np.histogram(sol['chain'][nburn:end], bins = bins)

fig, ax1 = plt.subplots(dpi=300, figsize = (4,3))

ax1.hist(sol['chain'][nburn:end], bins = bins, label = 'MCMC results')

ax1.set_ylim(0,max(counts))
ax1.set_ylabel('Frequency in MCMC chain')
ax1.set_xlabel('Mobility')

plt.axvline(upper_percentile, color = 'red', label = '95% CI')
plt.axvline(lower_percentile, color = 'red')
plt.axvline(upper_percentile_50, color = 'gold', label = '50% CI')
plt.axvline(lower_percentile_50, color = 'gold')

plt.axvline(mob_value, color = 'limegreen', label = 'True mobility', linestyle = '--')

fig.legend(bbox_to_anchor = (1.3,0.85), borderaxespad=0.)
plt.xticks(unique_mobilities[::4])

# plt.savefig('figs_final/1d_MCMC_results/histogram-plot-sample'+str(sample_id)+'.png',bbox_inches='tight')

plt.show()

#Plot brute force posterior sampling results
fig = plt.figure(dpi = 300)
plt.plot(test_mobilities, np.exp(log_probs))
plt.xlabel('Mobility')
plt.ylabel('Probability density')
plt.title('Brute force sampled posterior')
plt.show()
