In [1]:
%cd ..

/home/prasanna/Projects/CS_4501_Fall22_RL_Project


In [2]:
import configparser
import pandas as pd
import numpy as np
from my_scripts.optimization import vacc
#from scripts.optimization import vacc_bayes_opt
#from ConstrainedLaMCTS.LAMCTS import bayes_opt
import multiprocess
import torch
import argparse
import os.path
import sys
from ConstrainedLaMCTS.LAMCTS.lamcts import MCTS
from collections import namedtuple

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
config_dir = "config/va_dulles_county_0_10/{}"

In [4]:
Args = namedtuple("Args",
                  "threads agg_size agg_mapping threads_per_sim sim_draws out_dir name dims sim_workers")
args = Args(
    threads=80,
    agg_size=128,
    agg_mapping="config/va_dulles_county_0_10/agg_mapping.csv",
    threads_per_sim=16,
    sim_draws=10500,
    out_dir = "output/baseline_computation/",
    name = "0_05_baseline",
    dims = 128,
    sim_workers = 5
)

In [5]:
sim_param_config = configparser.ConfigParser()
sim_param_config.optionxform = str  # retain case sensitivity

# load simulation files
with open(config_dir.format("params.cfg")) as config_file:
    sim_param_config.read_file(config_file)
    tsir_config_str = dict(sim_param_config['tsir_config'])
    tsir_config = {}
    for k, v in tsir_config_str.items():
        if k == 'iters':
            tsir_config['iters'] = int(tsir_config_str['iters'])
        elif k == 'grav_variant':
            tsir_config['grav_variant'] = tsir_config_str['grav_variant']
        else:
            tsir_config[k] = float(tsir_config_str[k])
    opt_config = dict(sim_param_config['opt_config'])
    opt_config['constraint_bnd'] = float(opt_config['constraint_bnd'])

print(tsir_config)

print(opt_config)

{'iters': 20, 'tau1': 2.05129243, 'tau2': 1.00658961, 'rho': 1.39756446, 'theta': 1.66575258e-10, 'alpha': 0.97, 'beta': 7.5}
{'obj': 'attacksize', 'V_repr': 'ratio', 'constraint_bnd': 0.1, 'constraint_type': 'ineq', 'aggregate': 'True'}


In [6]:
# load vaccination/population data and distance matrix
vacc_df = pd.read_csv(config_dir.format("pop.csv"))
dist_list = pd.read_csv(config_dir.format("dist.csv"))
dist_mat = dist_list.pivot(index='zipcode1', columns='zipcode2', values='distKM')
dist_mat = dist_mat.fillna(0)

seed = pd.read_csv(config_dir.format(sim_param_config['seed']['seed']),header=None)
seed = np.array(seed).flatten()


torch.set_num_threads(args.threads)

# should use aggregate in opt_config

do_aggregate = args.agg_size is not None and args.agg_mapping is not None

if do_aggregate:
    agg_mapping = pd.read_csv(args.agg_mapping)
    agg_mapping = np.array(agg_mapping['mapping'])
    v = vacc.VaccProblemLAMCTSWrapper(
        opt_config = opt_config, 
        V_0= vacc_df['vacc'], 
        seed = seed,
        sim_config = tsir_config, 
        pop = vacc_df, 
        distances = np.array(dist_mat),
        negate=True, scale=True,
        cores=args.threads_per_sim, n_sim=args.sim_draws,
        output_dir = args.out_dir,
        name=args.name,
        agg_vector=agg_mapping,
        agg_size=args.agg_size,
        save_memory=False
    )
else:
    v = vacc.VaccProblemLAMCTSWrapper(
        opt_config = opt_config, 
        V_0= vacc_df['vacc'], 
        seed = seed,
        sim_config = tsir_config, 
        pop = vacc_df, 
        distances = np.array(dist_mat),
        negate=True, scale=True,
        cores=args.threads_per_sim, n_sim=args.sim_draws,
        output_dir = args.out_dir,
        name=args.name,
        save_memory=False
    )

if do_aggregate:
    P = np.zeros(args.agg_size)
    for zipcode_index, county_index in enumerate(agg_mapping):
        P[county_index] += vacc_df['pop'][zipcode_index]
    c = opt_config['constraint_bnd']
    # upper bound by the least vaccinated in each county
    ub = np.ones(args.agg_size)*np.inf
    for zipcode_index, county_index in enumerate(agg_mapping):
        this_zip_vacc = vacc_df.loc[zipcode_index,'vacc']
        if ub[county_index] > this_zip_vacc:
            ub[county_index] = this_zip_vacc
    v.ub = ub
    v.lb = np.zeros(args.dims)
    #for index, element in enumerate(ub):
        # if there are redundant cities (no zipcodes assigned)
        # just give it an upper bound of 1
        # essentially just a dummy var, doesn't affect opt
    #    if np.isinf(element):
    #        ub[index] = 1
else:
    P = np.array(vacc_df['pop'])
    c = opt_config['constraint_bnd']
    ub = np.array(vacc_df['vacc'])

print(P)
print(c)
print(ub)

[  32402.   78036.  112210.   19961.   21830.   27460.   10572.  227888.
   54418.    4581.   81783.    3393.   25785.   14352.   13012.   20187.
    6765.    6191.   26666.   25998.   17343.    3824.   12523.   55021.
  220796.  330514.   16014.    3292.   47508.    5631.   55825.   10713.
   16321.   11161.    9959. 1005627.   53194.   14748.   55553.   11900.
   18507.   47846.   11865.   74306.   22584.   16069.   16614.   22696.
   20340.    9471.   13308.    2912.   31870.  128662.  133233.   61177.
  312955.   39397.    1710.   32829.   50021.    6368.   21127.    8945.
    5577.   14750.  348091.   38549.    8731.   88741.   10361.   44006.
   31925.   28017.    6128.   25175.    7542.   63095.   17679.   22202.
  150652.  204584.   10357.   12177.    3933.   12176.   28246.   21671.
   14753.   33955.   41875.   11122.   84394.   25175.   17105.   63574.
  376473.   30421.   20416.    8742.    4777.  123499.   63332.   95775.
   23299.   38599.   17848.   34222.   19469.   401

In [7]:
to_eval = [np.ones(128)*x for x in np.linspace(0.05,0.10,6)]

In [None]:
from multiprocess import Pool

pools = [Pool(args.threads_per_sim) for i in range(args.sim_workers)]
results, sim_pools = v(to_eval, pool=pools, batch=True, return_sim_pool=True)

In [None]:
for i in range(len(sim_pools))
    print('-----{}------'.format(i))
    print(np.mean(v.engine.eval_history['input'][i]),np.mean(v.engine.eval_history['output'][i]))
    print(bootstrap(data=(v.engine.eval_history['output'][i],), statistic=np.mean))
    print(bootstrap(data=(v.engine.eval_history['output'][i],), statistic=np.median))

In [None]:
v.engine.save_eval_history(path="output/baseline_computation/baseline_results",as_csv=True,as_serial=False)

In [None]:
# Get the average attack size for each zipcode
from scipy.stats import bootstrap
from statsmodels.stats.proportion import proportion_confint
def compute_zip_summary_dataframe(pool):
    outbreaks = {'zip':[],
                 'avg_att_size':[],
                 'avg_att_size_err_low':[],
                 'avg_att_size_err_hi':[],
                 'med_att_size':[], 
                 'med_att_size_err_low':[],
                 'med_att_size_err_hi':[],
                 'prob_outbreak':[],
                 'prob_outbreak_err_low':[],
                 'prob_outbreak_err_hi':[]
                }
    for zip_i in range(len(vacc_df.index)):
    #for zip_i in range(5):
        zipcode = vacc_df.loc[zip_i,'id']
        pool_samples = pool.get_samples(zip_i)
        AS_samples = np.sum(pool_samples,axis=1)
        zip_avg_attacksize = np.mean(AS_samples)
        zip_avg_attacksize_error = bootstrap(data=(AS_samples,),statistic=np.mean)
        zip_med_attacksize = np.median(AS_samples)
        zip_med_attacksize_error = bootstrap(data=(AS_samples,),statistic=np.median)
        prob_outbreak = np.sum(AS_samples > 1)/(len(AS_samples))
        ci_low, ci_up = proportion_confint(count=np.sum(AS_samples > 1),nobs=len(AS_samples),method='beta')
        outbreaks['zip'].append(zipcode)
        outbreaks['avg_att_size'].append(zip_avg_attacksize)
        outbreaks['avg_att_size_err_low'].append(zip_avg_attacksize_error.confidence_interval.low)
        outbreaks['avg_att_size_err_hi'].append(zip_avg_attacksize_error.confidence_interval.high)
        outbreaks['med_att_size'].append(zip_med_attacksize)
        outbreaks['med_att_size_err_low'].append(zip_med_attacksize_error.confidence_interval.low)
        outbreaks['med_att_size_err_hi'].append(zip_med_attacksize_error.confidence_interval.high)
        outbreaks['prob_outbreak'].append(prob_outbreak)
        outbreaks['prob_outbreak_err_low'].append(ci_low)
        outbreaks['prob_outbreak_err_hi'].append(ci_up)
    df = pd.DataFrame(outbreaks)
    return df
#compute_zip_summary_dataframe(pool).sort_values(by='prob_outbreak',ascending=False).head(20)

In [None]:
for pool,constr in zip(sim_pools,np.linspace(5,10,6)):
    df = compute_zip_summary_dataframe(pool)
    df.to_csv('output/baseline_computation/{}_baseline_zipcode_summary.csv'.format(constr))