# Import packages
Use kernel "ABM_env" -- see README.

In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import math
import time
from tqdm import tqdm
import cProfile
import pickle
import scipy as sci
import pandas as pd
import seaborn
import matplotlib.ticker as tkr

import importlib
from ABC_case_class import *
from ABC_data_set_class import * 
%run ABC_data_set_class.py 
%run ABC_case_class.py
%run ABC_weight_functions.py

case_set_allow_save=0 # Protection from accidental saving

# Example usage

In [None]:
# Make training data set
data_file='./Data/Training Data/One-parameter case/Calibration method 2/new_I_data_One-Pop-Cont.pickle'
params_file='./Data/Training Data/One-parameter case/Calibration method 2/variable_parameter_values_One-Pop-Cont.csv'

training_data=data_set(data_file=data_file,params_file=params_file,T=300,steps_to_acc=3001,mob_range=[0.005,0.025])

In [None]:
# Make test data set
data_file='./Data/Test Data/One-parameter case/new_I_data_One-Pop-NEW-COMBINED-TEST.pickle' #Maya file structure
params_file='./Data/Test Data/One-parameter case/variable_parameter_values_One-Pop-NEW-COMBINED-TEST.csv' #Maya file structure

sample_data=data_set(data_file=data_file,params_file=params_file,T=300,steps_to_acc=3001,mob_range=[0.005,0.025],limit_data_range = False)


In [None]:
# Set up the case set
cases=case_set(sample_data,training_data)
cases.initialize_all_cases(mod=100) # CHANGE THE MOD TO INCLUDE MORE DATA, mod=1 for all data

# Score each sample
cases.run_scoring()

In [None]:
case_num = 9 #choose test dataset to run calibration on
cases.case_list[case_num].make_single_analysis(epanechnikov,0.02,0.2,res=200)
# cases.run_single_analysis(step,0.02,0.2,res=200) # run analysis on all cases
cases.case_list[case_num].analysis_single.make_kde_plot(legend = True, plot_matches = True) # produce plot of inferred posterior (opacity of plotted matches corresponds to weight)

# Reproduction usage
The code below reproduces the calibration and simulation-based calibration tests run for the one-parameter case using calibration method 2. Full analysis may be slow.

In [None]:
# Do analysis, saving results along the way

# Training data set:
data_file='./Data/Training Data/One-parameter case/Calibration method 2/new_I_data_One-Pop-Cont.pickle'
params_file='./Data/Training Data/One-parameter case/Calibration method 2/variable_parameter_values_One-Pop-Cont.csv'
training_data=data_set(data_file=data_file,params_file=params_file,T=300,steps_to_acc=3001,mob_range=[0.005,0.025])

# Test/sample data files:
data_file='./Data/Test Data/One-parameter case/new_I_data_One-Pop-NEW-COMBINED-TEST.pickle'
params_file='./Data/Test Data/One-parameter case/variable_parameter_values_One-Pop-NEW-COMBINED-TEST.csv'

segments = np.arange(0,1001,50) #do analysis in segments of 50 samples

for i in range(0, segments.shape[0]-1):
    print('Starting analysis on segment ', i, '\n')
    #Choose the sample data for this set of analysis
    sample_data=data_set(data_file=data_file,params_file=params_file,T=300,steps_to_acc=3001,mob_range=[0.005,0.025],limit_data_range = True, data_range_start = segments[i], data_range_end = segments[i+1])
    
    start_time = time.time()
    # Set up the case set
    cases=case_set(sample_data,training_data)
    cases.initialize_all_cases(mod=1)

    # Score each sample
    cases.run_scoring()
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Case set up time: {elapsed_time} seconds")
    
    # ABC hyperparameters
    weight_function_vect=[step, neg_exp, linear, epanechnikov] # i
    centriod_vect=[0.002,0.02,0.2, 2, 20] # j
    estimator_bw_vect=[0.1, 0.3, 1, 3, 10] # k
    res=500

    # Anaysis for all the cases in the set
    start_time = time.time()
    cases.run_analysis_array(weight_function_vect,centriod_vect,estimator_bw_vect,res=res)
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Run time: {elapsed_time} seconds")

    #Save data:
    variables_to_save = ['range_95', 'range_50', 'mean', 'range_size_95', 'range_size_50',
                        'continous_rank', 'rank', 'non_zero_matches', 'crps', 'KDE',
                        'range_pass_95', 'range_pass_50']

    for variable_name in variables_to_save:
        variable_data = cases.get_attribute_from_case_list_analysis_array(variable_name)
        filename = f'./Data/ABC_Results/{variable_name}_{i}.pickle'
        
        with open(filename, 'wb') as handle:
            pickle.dump(variable_data, handle, protocol=pickle.HIGHEST_PROTOCOL)

        print(f'Saved {variable_name} to {filename}')


In [None]:
# Load analysis results

variables_to_load = ['range_95', 'range_50', 'mean', 'range_size_95', 'range_size_50',
                     'continous_rank', 'rank', 'non_zero_matches', 'crps', 'KDE',
                     'range_pass_95', 'range_pass_50']

loaded_data = {}

for i in range(20): #(change if you have changed the segment sizes)
    for variable_name in variables_to_load:
        filename = f'./Data/ABC_Results/{variable_name}_{i}.pickle'

        print(i)
        with open(filename, 'rb') as handle:
            loaded_variable = pickle.load(handle)
        if i == 0:
            loaded_data[variable_name] = loaded_variable
        else:
            loaded_data[variable_name] = loaded_data[variable_name]+loaded_variable
            
        # print(f'Loaded {variable_name} from {filename}')

#load the sample test data (for mobility values):
data_file='./Data/Test Data/One-parameter case/new_I_data_One-Pop-NEW-COMBINED-TEST.pickle'
params_file='./Data/Test Data/One-parameter case/variable_parameter_values_One-Pop-NEW-COMBINED-TEST.csv'
sample_data=data_set(data_file=data_file,params_file=params_file,T=300,steps_to_acc=3001,mob_range=[0.005,0.025],limit_data_range = True, data_range_start = 0, data_range_end = 1000)


## Plotting hyperparameter sweep
Reproduces Fig. 13 and Fig. 14.

In [None]:
ind_to_include = np.arange(0, 1000)
n = ind_to_include.shape[0]

# #For each hyperparameter combination, check what fraction of the "true" mobilities fell within 95% CI
range_pass_95 = np.array(loaded_data['range_pass_95'])[ind_to_include, :, :]
rate_pass_95 = np.sum(range_pass_95, axis = 0)/n

#For each hyperparameter combination, check what fraction of the "true" mobilities fell within 50% CI
range_pass_50 = np.array(loaded_data['range_pass_50'])[ind_to_include, :, :]
rate_pass_50 = np.sum(range_pass_50, axis = 0)/n

#Load continuous ranked probability score:
crps = np.array(loaded_data['crps'])
avg_crps = np.sum(crps, axis = 0)/n

continuous_rank = np.array(loaded_data['continous_rank'])

seaborn.set(rc={'figure.dpi': 500,'figure.figsize':(4,3)})
seaborn.set(font="DejaVu Sans")

shape_fn_names = ['Step', 'Negative exponential', 'Linear', 'Epanechnikov']

formatter = tkr.ScalarFormatter(useMathText=True)
formatter.set_scientific(True)
formatter.set_powerlimits((-2, 2))

#calculate absolute difference between x% and rate of test samples being in x% CI:
abs_diff_rate_pass_95 = np.abs(rate_pass_95-0.95)
abs_diff_rate_pass_50 = np.abs(rate_pass_50-0.50)

#Heatmap plot of absolute differences for 50% CI:
for shape_ind in range(4):
    ax = seaborn.heatmap(np.flipud(abs_diff_rate_pass_50[shape_ind, :, :]), xticklabels = [0.1, 0.3, 1, 3, 10], yticklabels = [10, 1, 0.1, 0.01, 0.001]
                         , cbar_kws={"format": formatter})
    ax.set(xlabel='KDE bandwidth', ylabel='Centroid')
    ax.set_title(shape_fn_names[shape_ind])
    figure = ax.get_figure()    
    # figure.savefig('figs_final/'+shape_fn_names[shape_ind]+'_abs_diff_rate_pass_50.png', dpi=500, bbox_inches='tight') 
    plt.show()

#Heatmap plot of CRPS:
for shape_ind in range(4):
    ax = seaborn.heatmap(np.flipud(avg_crps[shape_ind, :, :]), xticklabels = [0.1, 0.3, 1, 3, 10], yticklabels = [10, 1, 0.1, 0.01, 0.001], 
                         cbar_kws={"format": formatter, 'label': 'CRPS'})
    ax.set(xlabel='KDE bandwidth', ylabel='Centroid')
    ax.set_title(shape_fn_names[shape_ind])
    figure = ax.get_figure()    
    # figure.savefig('figs_final/'+shape_fn_names[shape_ind]+'_crps.png', dpi=500, bbox_inches='tight') 
    plt.show()

seaborn.reset_defaults()


## Plotting SBC results
With shape_ind = 3,centroid_ind = 0, bw_ind = 0, reproduces the one-parameter case in Fig. 15.

In [None]:
shape_ind = 3
centroid_ind = 0
bw_ind = 0

#Make a list of all the "continuous ranks" of the true mobility wrt the estimated posterior
rank_test=[]
for i in range(0,1000):
    rank_test.append(continuous_rank[i,shape_ind,centroid_ind,bw_ind])

# # --------------- Plot index sorted by rank -------------
# plt.plot(np.sort(rank_test))
# plt.xlabel('Index sorted by rank')
# plt.ylabel('Rank')
# plt.plot([0,len(rank_test)],[0,1])
# plt.show()

# # -------------- plot scatter plot of rank and test sample index ----------
# plt.scatter(np.array(range(len(rank_test))),rank_test)
# plt.xlabel('Index')
# plt.ylabel('Rank')
# plt.show()

# -------------- plot rank histogram, calculate p-value ------------
vals = rank_test
n_bins = 51
counts, bins = np.histogram(vals, bins=n_bins, range=None, density=None, weights=None)
print('p-value:', sci.stats.chisquare(counts).pvalue)
plt.figure(dpi = 500, figsize = (3,3))
plt.stairs(counts, bins, fill = True)
plt.xlabel('Mobility quantile')
plt.ylabel('Frequency')
# plt.savefig('figs_final/mob_ABC_SBC_histogram_1d.png',bbox_inches='tight')
plt.show()

## Plotting KDE and marginals from loaded results data
set: "sample_ind = 955" to recreate one-parameter case example in Fig. 12

In [None]:
#------------------ Plot sample KDE w CIs, true mobility -----------------
KDE = np.array(loaded_data['KDE'])
sample_ind = 955

mob_value = sample_data.mobilities[sample_ind]

upper_percentile_95 = loaded_data['range_95'][sample_ind][shape_ind,centroid_ind,bw_ind, 0]
lower_percentile_95 = loaded_data['range_95'][sample_ind][shape_ind,centroid_ind,bw_ind, 1]
upper_percentile_50 = loaded_data['range_50'][sample_ind][shape_ind,centroid_ind,bw_ind, 0]
lower_percentile_50 = loaded_data['range_50'][sample_ind][shape_ind,centroid_ind,bw_ind, 1]

grid=np.linspace(0.005,0.025,500)
grid_mids=mids(grid) # grid is at midpoints

plt.figure(dpi = 500, figsize = (4,3))
plt.plot(grid_mids,KDE[sample_ind,shape_ind,centroid_ind,bw_ind], label = 'ABC Posterior')

plt.axvline(upper_percentile_95, color = 'red', label = '95% CI')
plt.axvline(lower_percentile_95, color = 'red')
plt.axvline(upper_percentile_50, color = 'gold', label = '50% CI')
plt.axvline(lower_percentile_50, color = 'gold')
plt.axvline(mob_value, color = 'limegreen', label = 'True mobility', linestyle = '--')

plt.xlabel('Mobility')
plt.ylabel('Posterior density')
plt.legend()
plt.ylim(0, max(KDE[sample_ind,shape_ind,centroid_ind,bw_ind])*1.2)
plt.show()