In [44]:
import pandas as pd
import numpy as np
from tqdm.notebook import trange, tqdm
from itertools import islice, chain
import hashlib
import os
import math
from SALib.sample import saltelli
from SALib.analyze import sobol
from SALib.test_functions import Ishigami
from SALib.sample import morris as ms
from SALib.analyze import morris as ma
from SALib.plotting import morris as mp
from simulation import Simulation

In [2]:
variables_all = {
    'proportion_wearing_masks': (0.1, 0.95),
    'test_proportion_to_start': (0.01, 0.1),
    'self_isolate_proportion': (0.1, 0.95),
    'min_tests_daily_proportion': (0.001, 0.01),
    'max_tests_daily_proportion': (0.01, 0.1),
    'desired_positive_proportion': (0.01, 0.5),
    'healthcare_capacity': (50, 150),
    'speed': (0.005, 0.04),
}

In [3]:
def build_problem(variables):
    return {
        'num_vars': len(variables),
        'names': variables.keys(),
        'bounds': [list(x) for x in variables.values()]
    }

In [4]:
def compute_row_hash(row):
    return hashlib.sha1(np.array(row)).hexdigest()

In [5]:
def generate_samples_df(variables, sampling_fun):
    problem = build_problem(variables)
    samples = sampling_fun(problem)
    
    df = pd.DataFrame(samples, columns=variables.keys())
    
    missing_cols = set(variables_all.keys()) - set(df.columns)
    for m in missing_cols:
        df[m] = ""
    
    df['hash'] = df.apply(compute_row_hash, axis=1)
    df.set_index('hash', inplace=True)
    return df

In [6]:
def spit_params_dicts(samples_df):
    for idx, row in df.iterrows():
        params = dict(row)
        params['run_id'] = idx
        yield params

In [7]:
paramsets_file = 'paramsets.csv'
def load_paramsets_if_present():
    return pd.read_csv(paramsets_file, index_col=0) if os.path.isfile(paramsets_file) else None

In [8]:
def load_results_if_present(result_id):
    result_file = f'results/r_{result_id}.csv'
    return pd.read_csv(result_file, index_col=0) if os.path.isfile(result_file) else None

In [9]:
def generate_persistent_samples(variables, sampling_fun):
    df = generate_samples_df(variables, sampling_fun)
    pdf = load_paramsets_if_present()
    if pdf is None:
        df.to_csv(paramsets_file)
    else:
        xdf = pdf.append(df)
        udf = xdf.reset_index().drop_duplicates('hash').set_index('hash')
        udf.to_csv(paramsets_file)
    
    return list(df.index)

In [10]:
def select_variables(*names):
    names = set(names)
    if 'all' in names or len(names) == 0:
        return variables_all
    
    return dict(filter(lambda x: x[0] in names, variables_all.items()))

In [16]:
# def compute_sensitivity(variables, sampling_fun):
#     setids = generate_persistent_samples(variables, sampling_fun)
    
#     paramsets = load_paramsets_if_present()
#     assert paramsets is not None
    
#     for setid in tqdm(setids):
#         results = load_results_if_present(setid)
#         if results is None:
#             param_dict = paramsets.loc[setid].dropna().to_dict()
#             param_dict['quiet'] = True
#             param_dict['visualize'] = False
#             param_dict['verbose'] = False
#             param_dict['print_sum'] = False
#             param_dict['run_id'] = setid

#             sim = Simulation(**param_dict)
#             sim.run()
            
#             results = load_results_if_present(setid)
#             assert results is not None

In [34]:
def batches(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

In [52]:
def generate_paramsets(variables, sampling_fun, batch_size=None, no_batches=None):
    setids = generate_persistent_samples(variables, sampling_fun)
    
    if batch_size is None:
        batch_size = int(math.ceil(len(setids) / no_batches))
    
    for i, b in enumerate(batches(setids, batch_size)):
        print(f'---|{i}|-')
        print(' '.join(b))
    
    return setids

In [53]:
exp1 = generate_paramsets(select_variables('all'), lambda x: ms.sample(x, 100, num_levels=2, optimal_trajectories=4, seed=1234), no_batches=2)

---|0|-
a92141d3cd2f5ee38e8e2fa1e5849420ac6cf7fd e02c4d13d64cdda9c08b29b7efb9e9277794641f 60e047e16bb7d3ad810160871f872097ecb67b28 61f961e0f783aa518c5b043c71f4e04d35a32298 38b44c89bbfaa5ede8d6af4d95384d14659b3b5c 3796515b5205ccb3b5fe8d1d7a34a59d4e918e13 467dcd108563bb286188f5c8ea825d62343980c0 bb361b804f1df4e74f5946fff75207308f588919 f112db8474d79d7f2a2331089331f898601ac388 0caf876de4262ddbcaef2fe967124873edeeeaa3 f779c44701960ae5879c2109811d6362a7893ba8 b9ba83e0360f6a663e75a431f112aa75842ad39c 8f214c5445e5347511363341ce2a456a74612d23 83b364a8e94f2c5cc3a493137cb60290d71fc34c 6d86ac165b9a5da18508639cb36af8e4492608b9 9964a9b3a240ee160b4a74ab5f7b9263f4c79bcf 435b8494c6d9f0c883c18fb64c385dde6206d125 b9e49eb3792e47cc466c1b06692f89e399b02f26
---|1|-
2751eefd6e6ffdbd3ece8e630c559adcc3d4d0bd ccb0e340de1c1bdb264cbfff55e73f55307b1b0e c4a5a972115ab03d28fdb0b89cfb70e245dd6ff6 3c04efe0696c73abd211e3fa89d19088f3537098 cc0c9f8cf6e69eac5ec3446ee9d53c5c1482dbac 3599b3cedb5db48b9b312e327d1f2c9dfdd7665b 

In [78]:
def compute_metrics(exp):
    metrics = []
    for rid in exp1:
        rdf = load_results_if_present(rid)
        fatalities = rdf['fatalities'].iloc[-1]
        unaffected = rdf['susceptible'].iloc[-1]
        sev3 = rdf['severity2'].iloc[-1]
        metrics.append((fatalities, unaffected, sev3))
    return np.array(metrics, dtype=np.float64)

In [79]:
pdf = load_paramsets_if_present()

In [80]:
metrics1 = compute_metrics(exp1)

In [81]:
sample1 =  np.array([np.array(x) for i, x in pdf.loc[exp1].iterrows()])

In [82]:
ma.analyze(build_problem(select_variables('all')), sample1, metrics1, num_levels=2)

ValueError: cannot reshape array of size 108 into shape (4,9)