# Import packages
Use kernel "ABM_env" -- see README.

In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import math
import time
from tqdm import tqdm
import cProfile
import pickle
import scipy as sci
import pandas as pd

import importlib
from ABC_2D_case_class import *
from ABC_2D_data_set_class import * 
%run ABC_2D_data_set_class.py
%run ABC_2D_case_class.py
%run ABC_2D_weight_functions.py

case_set_allow_save=0 # Protection from accidental saving

# Example usage

In [None]:
# Set up the case set

# Load in sample/test data to try to calibrate to
data_file='./Data/Test Data/Two-parameter case/new_I_data_Two-Pop-NEW-COMBINED-TEST.pickle'
params_file='./Data/Test Data/Two-parameter case/variable_parameter_values_Two-Pop-NEW-COMBINED-TEST.csv'
sample_data=data_set(data_file=data_file,params_file=params_file,T=300,mob_range=[0.005,0.025],jp_range=[0.,0.001])


# Load in a portion of training data (full training data set is very large)
data_file='./Data/Training Data/Two-parameter case/Calibration method 2/new_I_data_Two-Pop-Cont.pickle'
params_file='./Data/Training Data/Two-parameter case/Calibration method 2/variable_parameter_values_Two-Pop-Cont.csv'

segment_start = 0
segment_end = 10000
training_data=data_set(data_file=data_file,params_file=params_file,T=300,mob_range=[0.005,0.025],jp_range=[0.,0.001], limit_data_range = True, data_range_start = segment_start, data_range_end = segment_end)

#Initialize case set with sample and training data
cases=case_set(sample_data,training_data)
cases.initialize_all_cases(mod=100) #only initialize every 100th case from sample/test data to calibrate on, set mod=1 to intialize every test data set

#Run scoring (calculate ABC "distances" between test data and training data)
cases.run_scoring()

In [None]:
case_num = 5 #choose test dataset to run calibration on
cases.case_list[case_num].make_single_analysis(epanechnikov,0.02,0.2,res=200)
# cases.run_single_analysis(step,0.02,0.2,res=200) # run analysis on all cases
cases.case_list[case_num].analysis_single.make_kde_plot_2D(plot_matches=True) # produce plot of inferred posterior (opacity of plotted matches corresponds to weight)


# Reproduction usage
The code below reproduces the calibration and simulation-based calibration tests run for the two-parameter case using calibration method 2. Full analysis may be slow.

In [None]:
# Make sample data sets

#Full sample/test data set with 1666 sets:
data_file='./Data/Test Data/Two-parameter case/new_I_data_Two-Pop-NEW-COMBINED-TEST.pickle'
params_file='./Data/Test Data/Two-parameter case/variable_parameter_values_Two-Pop-NEW-COMBINED-TEST.csv'
sample_data=data_set(data_file=data_file,params_file=params_file,T=300,mob_range=[0.005,0.025],jp_range=[0.,0.001])


In [None]:
# Make training data sets -- run accumulation of infections over time
##This doesn't need to be rerun if already have training_data_I_ABM_ALL.pickle file, can go to next cell.

#Training data with 85000 sets:
data_file='./Data/Training Data/Two-parameter case/Calibration method 2/new_I_data_Two-Pop-Cont.pickle'
params_file='./Data/Training Data/Two-parameter case/Calibration method 2/variable_parameter_values_Two-Pop-Cont.csv'
training_data=data_set(data_file=data_file,params_file=params_file,T=300,mob_range=[0.005,0.025],jp_range=[0.,0.001], limit_data_range = False)
filename = './Data/Training Data/Two-parameter case/Calibration method 2/training_data_I_ABM_ALL.pickle' 
with open(filename, 'wb') as handle:
    pickle.dump(training_data.I_ABM_ALL, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# Load training data

#Raw training data files:
data_file='./Data/Training Data/Two-parameter case/Calibration method 2/new_I_data_Two-Pop-Cont.pickle'
params_file='./Data/Training Data/Two-parameter case/Calibration method 2/variable_parameter_values_Two-Pop-Cont.csv'

#Accumulated training data file:
filename = './Data/Training Data/Two-parameter case/Calibration method 2/training_data_I_ABM_ALL.pickle' 

with open(filename, 'rb') as handle:
    loaded_variable = pickle.load(handle)

I_ABM_ALL = loaded_variable.astype(np.int8)

#Put training data data set together:
training_data = data_set(data_file=data_file,params_file=params_file,T=300,mob_range=[0.005,0.025],jp_range=[0.,0.001], limit_data_range = False, accumulated_data = True, I_ABM_ALL = I_ABM_ALL)


In [None]:
#Run scoring (calculating scores between test data sets and each training data set):

#Create case set:
cases=case_set(sample_data,training_data)
cases.initialize_all_cases(mod=1)
cases.run_scoring(style = "concat")

#Save case set:
case_set_allow_save = 1

if case_set_allow_save==1: #So that I don't overwrite the data with zeros
    print('Saving...')
    with open('./Data/Training Data/Two-parameter case/Calibration method 2/case_set.pickle', 'wb') as handle:
        pickle.dump(cases, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print('Done')
    
case_set_allow_save = 0

In [None]:
# Do analysis, saving results along the way

segments = np.arange(0,1700,50) #do analysis in segments of 50 samples at a time (saving results in between)
segments[-1] = 1666
print(segments.shape)

for i in range(0, segments.shape[0]):

    #LOAD IN CASE SET (WITH PRE-CALCULATED SCORES) IF NOT ALREADY LOADED
    with open('./Data/Training Data/Two-parameter case/Calibration method 2/case_set.pickle', 'rb') as handle:
        cases = pickle.load(handle)

    print('Starting analysis on segment ', i, '\n')

    #Specify ABC hyperparameters to run:
    
    # weight_function_vect=[step, neg_exp, linear, epanechnikov] # i
    # centriod_vect=[0.002,0.02,0.2,2,20] # j
    # estimator_bw_vect=[0.1,0.3,1,3,10] # k
    weight_function_vect=[epanechnikov] # i <-- to run only one option, still input option in a list []
    centriod_vect=[0.002] # j
    estimator_bw_vect=[0.1] # k
    res=500

    # Anaysis for some of the cases in the set:
    start_time = time.time()
    cases.run_partial_analysis_array(weight_function_vect,centriod_vect,estimator_bw_vect,start = segments[i], end = segments[i+1], res=res)
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Elapsed Run Time: {elapsed_time} seconds")

    #Save data:
    variables_to_save = ['KDE', 'KDE_mob', 'KDE_jp', 'mean_mob', 'mean_jp', 'continous_rank_mob',
                         'continous_rank_jp', 'crps_mob', 'crps_jp', 'weights', 'inside_95',
                         'inside_50', 'inside_hollow_95', 'inside_hollow_50', 'mob_grid', 'jp_grid']

    for variable_name in variables_to_save:
        
        variable_data = cases.get_attribute_from_partial_case_list_analysis_array(variable_name, start = segments[i], end = segments[i+1])
        filename = f'./Data/ABC_2D_results/{variable_name}_{i}.pickle'
        
        with open(filename, 'wb') as handle:
            pickle.dump(variable_data, handle, protocol=pickle.HIGHEST_PROTOCOL)

        print(f'Saved {variable_name} to {filename}')

In [None]:
#Load in ABC results

variables_to_load = ['continous_rank_mob', 'continous_rank_jp', 'crps_mob', 'crps_jp', 'KDE_mob', 'KDE_jp',
                    'mob_grid', 'jp_grid', 'KDE', 'inside_95',
                    'inside_50', 'inside_hollow_95', 'inside_hollow_50']

loaded_data = {}

for i in range(33):
    for variable_name in variables_to_load:
        try:
            filename = f'./Data/ABC_2D_results/{variable_name}_{i}.pickle'

            print(i)
            with open(filename, 'rb') as handle:
                loaded_variable = pickle.load(handle)
            if i == 0:
                loaded_data[variable_name] = loaded_variable
            else:
                loaded_data[variable_name] = loaded_data[variable_name]+loaded_variable
                
            # print(f'Loaded {variable_name} from {filename}')
        except:
            pass

## Plotting SBC results
Reproduces two-parameter case results in Fig. 15.

In [None]:
#Plotting SBC results

# ---------------------- Mobility: ------------------------

continuous_rank = np.array(loaded_data['continous_rank_mob'])[:,0,0,0]

rank_test=[]
for i in range(0,1666):
    rank_test.append(continuous_rank[i])

# plt.plot(np.sort(rank_test))
# plt.xlabel('Index sorted by quantile')
# plt.ylabel('Quantile')
# plt.plot([0,len(rank_test)],[0,1])
# plt.show()

# plt.scatter(np.array(range(len(rank_test))),rank_test)
# plt.xlabel('Index')
# plt.ylabel('Quantile')
# plt.show()

vals = rank_test
n_bins = 51
counts, bins = np.histogram(vals, bins=n_bins, range=None, density=None, weights=None)
plt.figure(dpi = 500, figsize = (3.5,1.75))
plt.stairs(counts, bins, fill = True)
plt.xlabel('Mobility quantile')
plt.ylabel('Frequency')
# plt.savefig('figs_final/mob_ABC_SBC_histogram_2d.png',bbox_inches='tight')
plt.savefig('mob_ABC_SBC_histogram_2d.png',bbox_inches='tight')
print('p-value:', sci.stats.chisquare(counts).pvalue)
plt.show()

# ------------------- Jumping probability: --------------------

continuous_rank = np.array(loaded_data['continous_rank_jp'])[:,0,0,0]

rank_test=[]
for i in range(0,1666):
    rank_test.append(continuous_rank[i])

# plt.plot(np.sort(rank_test))
# plt.xlabel('Index sorted by quantile')
# plt.ylabel('Quantile')
# plt.plot([0,len(rank_test)],[0,1])
# plt.show()

# plt.scatter(np.array(range(len(rank_test))),rank_test)
# plt.xlabel('Index')
# plt.ylabel('Quantile')
# plt.show()

vals = rank_test
n_bins = 51
counts, bins = np.histogram(vals, bins=n_bins, range=None, density=None, weights=None)
plt.figure(dpi = 500, figsize = (3.5,1.75))
plt.stairs(counts, bins, fill = True)
plt.xlabel('Jumping probability quantile')
plt.ylabel('Frequency')
# plt.savefig('figs_final/JP_ABC_SBC_histogram_2d.png',bbox_inches='tight')
plt.savefig('JP_ABC_SBC_histogram_2d.png',bbox_inches='tight')
print('p-value:', sci.stats.chisquare(counts).pvalue)

## Plotting KDE and marginals from loaded results data
set: "sample_id = 1627" to recreate example in Fig. 12
 
set: "sample_id = 114" to recreate example in Fig. 16

In [None]:
sample_id = 1627
KDE = loaded_data['KDE'][sample_id][0,0,0,:,:]
inside_95 = loaded_data['inside_95'][sample_id][0,0,0,:,:]
inside_50 = loaded_data['inside_50'][sample_id][0,0,0,:,:]
inside_hollow_95 = loaded_data['inside_hollow_95'][sample_id][0,0,0,:,:]
inside_hollow_50 = loaded_data['inside_hollow_50'][sample_id][0,0,0,:,:]
y_flat = loaded_data['jp_grid'][0][0,0,0,:,:]
x_flat = loaded_data['mob_grid'][0][0,0,0,:,:]
res = 500

actual_mob = cases.sample_data.mobilities[sample_id]
actual_jp = cases.sample_data.jps[sample_id]

Z = KDE
#renormalize to integrate to one\:
mob_1d_mids = x_flat[:,0]
jp_1d_mids = y_flat[0,:]
integral = np.trapz(Z, jp_1d_mids, axis = 1)
integral = np.trapz(integral, mob_1d_mids)
Z = Z/integral

mob_1d_grid=np.linspace(0.005,0.025,res)
jp_1d_grid=np.linspace(0,0.001,res+1) # +1 makes sure the sizes are different so that axis dont get mixed up anywhere, can delete once running
print(mob_1d_grid.shape, jp_1d_grid.shape)

mob_marginal = np.trapz(Z, jp_1d_mids, axis = 1)
jp_marginal = np.trapz(Z, mob_1d_mids, axis = 0)

# --------- Calculate marginal mobility credible interval bounds ---------
# Make CDF for marginal mobility
CDF=np.zeros_like(mob_1d_grid)
CDF[1:]=np.cumsum(mob_marginal/np.sum(mob_marginal)) # Leave the first term as zero
CDF_interp=sci.interpolate.interp1d(mob_1d_grid, CDF, kind='linear') # Input is mob, output is probability
inv_CDF_interp=sci.interpolate.interp1d(CDF, mob_1d_grid, kind='linear') # Input is value 0 to 1

#Calculate marginal CIs for mobility
upper_95_CI_mob = inv_CDF_interp(1-0.025)
lower_95_CI_mob = inv_CDF_interp(0.025)
upper_50_CI_mob = inv_CDF_interp(1-0.25)
lower_50_CI_mob = inv_CDF_interp(0.25)

# --------- Calculate marginal jumping prob credible interval bounds ---------
# Make CDF for marginal jp
CDF=np.zeros_like(jp_1d_grid)
CDF[1:]=np.cumsum(jp_marginal/np.sum(jp_marginal)) # Leave the first term as zero
CDF_interp=sci.interpolate.interp1d(jp_1d_grid, CDF, kind='linear') # Input is mob, output is probability
inv_CDF_interp=sci.interpolate.interp1d(CDF, jp_1d_grid, kind='linear') # Input is value 0 to 1

#Calculate marginal CIs for jp
upper_95_CI_jp = inv_CDF_interp(1-0.025)
lower_95_CI_jp = inv_CDF_interp(0.025)
upper_50_CI_jp = inv_CDF_interp(1-0.25)
lower_50_CI_jp = inv_CDF_interp(0.25)


In [None]:
#----------------Posterior plot with 95 and 50% credible intervals (WITH colorbar) ------------------
colors = [(0.9,0,0,c) for c in np.linspace(0,1,100)]
cmapred = mcolors.LinearSegmentedColormap.from_list('mycmap', colors, N=2)
colors = [(0.9,0.6,0.6,c) for c in np.linspace(0,1,100)]
cmapblue = mcolors.LinearSegmentedColormap.from_list('mycmap', colors, N=2)

color_1 = (237/255,248/255,251/255)
color_2 = (35/255,139/255,69/255)

colors = [color_1, color_2]

my_colormap = mcolors.LinearSegmentedColormap.from_list("CustomColormap", colors)

fig, ax = plt.subplots(dpi = 500, figsize = (4, 3.5))
ax_main = plt.subplot()
pcm = ax_main.pcolormesh(y_flat, x_flat, Z, cmap=my_colormap)

plt.xlabel('Jumping probability')
plt.ylabel('Mobility')
plt.scatter(actual_jp, actual_mob, c='blue', label = 'True parameter', marker ='*')
plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
plt.yticks(np.linspace(0.005, 0.025, 5))
#     plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.legend()

import matplotlib.ticker as ticker

exp_value = 5 #MANUALLY SET THE EXP VALUE FOR SCI NOTATION ON COLORBAR

def fmt(x, pos):
    return str(round(x/(10**exp_value), 2))

ax_cbar = plt.subplot()
cbar = fig.colorbar(pcm, format=ticker.FuncFormatter(fmt))
cbar.set_label('Probability density')

# # Adding exponent value at the top
cbar.ax.text(1.05, 1, f"1e{exp_value}", transform=cbar.ax.transAxes, va='bottom', ha='left')

#     cbar.set_ticks([])  # Remove tick labels from the colorbar

# plt.savefig('figs_final/ABC_results_2D_posterior-with-colorbar-sample'+str(sample_id)+'.png',bbox_inches='tight')

plt.show()

# ----------------- Marginal plots --------------------
print('mob marginal integral', np.trapz(mob_marginal, mob_1d_mids))
print('jp marginal integral', np.trapz(jp_marginal, jp_1d_mids))

#jumping probability marginal:
plt.figure(figsize = (3.5, 1), dpi = 500)
plt.plot(jp_1d_mids, jp_marginal, linewidth=2, color = color_2)
plt.xlim(0,0.001)
plt.axvline(actual_jp, linewidth=2, color = "blue", linestyle = "--") #true param
plt.axvline(upper_95_CI_jp, linewidth=2, color = (0.9,0,0))
plt.axvline(lower_95_CI_jp, linewidth=2, color = (0.9,0,0))
plt.axvline(upper_50_CI_jp, linewidth=2, color = (0.9,0.6,0.6))
plt.axvline(lower_50_CI_jp, linewidth=2, color = (0.9,0.6,0.6))
plt.xticks([])
# plt.savefig('figs_final/ABC_results_JP_marginal_posterior-sample'+str(sample_id)+'.png',bbox_inches='tight')
plt.show()

#mobility marginal:
plt.figure(figsize = (3.5, 1), dpi = 500)
plt.scatter([], [], c='blue', marker="*", label='True parameter')
plt.axvline(actual_mob, linewidth=2, color = "blue", label='True parameter', linestyle = "--") #true param
plt.plot(mob_1d_mids, mob_marginal, linewidth=2, color = color_2, label = 'Marginal posterior')
plt.xlim(0.025,0.005)
plt.axvline(upper_95_CI_mob, linewidth=2, color = (0.9,0,0), label='Marginal 95% CI')
plt.axvline(lower_95_CI_mob, linewidth=2, color = (0.9,0,0))
plt.axvline(upper_50_CI_mob, linewidth=2, color = (0.9,0.6,0.6), label='Marginal 50% CI')
plt.axvline(lower_50_CI_mob, linewidth=2, color = (0.9,0.6,0.6))
plt.xticks(np.linspace(0.0050,0.025,9))
plt.xticks([])
# plt.legend(bbox_to_anchor=(1.3, 0.95))
# plt.savefig('figs_final/ABC_results_MOB_marginal_posterior-sample'+str(sample_id)+'.png',bbox_inches='tight')

plt.show()