In [33]:
import pandas as pd
import numpy as np
from tqdm.notebook import trange, tqdm
from itertools import islice, chain
import hashlib
import os
from SALib.sample import saltelli
from SALib.analyze import sobol
from SALib.test_functions import Ishigami
from SALib.sample import morris as ms
from SALib.analyze import morris as ma
from SALib.plotting import morris as mp
from simulation import Simulation

In [2]:
variables_all = {
    'proportion_wearing_masks': (0.1, 0.95),
    'test_proportion_to_start': (0.01, 0.1),
    'self_isolate_proportion': (0.1, 0.95),
    'min_tests_daily_proportion': (0.001, 0.01),
    'max_tests_daily_proportion': (0.01, 0.1),
    'desired_positive_proportion': (0.01, 0.5),
    'healthcare_capacity': (50, 150),
    'speed': (0.005, 0.04),
}

In [3]:
def build_problem(variables):
    return {
        'num_vars': len(variables),
        'names': variables.keys(),
        'bounds': [list(x) for x in variables.values()]
    }

In [4]:
def compute_row_hash(row):
    return hashlib.sha1(np.array(row)).hexdigest()

In [5]:
def generate_samples_df(variables, sampling_fun):
    problem = build_problem(variables)
    samples = sampling_fun(problem)
    
    df = pd.DataFrame(samples, columns=variables.keys())
    
    missing_cols = set(variables_all.keys()) - set(df.columns)
    for m in missing_cols:
        df[m] = ""
    
    df['hash'] = df.apply(compute_row_hash, axis=1)
    df.set_index('hash', inplace=True)
    return df

In [6]:
def spit_params_dicts(samples_df):
    for idx, row in df.iterrows():
        params = dict(row)
        params['run_id'] = idx
        yield params

In [7]:
paramsets_file = 'paramsets.csv'
def load_paramsets_if_present():
    return pd.read_csv(paramsets_file, index_col=0) if os.path.isfile(paramsets_file) else None

In [8]:
def load_results_if_present(result_id):
    result_file = f'results/r_{result_id}.csv'
    return pd.read_csv(result_file, index_col=0) if os.path.isfile(result_file) else None

In [9]:
def generate_persistent_samples(variables, sampling_fun):
    df = generate_samples_df(variables, sampling_fun)
    pdf = load_paramsets_if_present()
    if pdf is None:
        df.to_csv(paramsets_file)
    else:
        xdf = pdf.append(df)
        udf = xdf.reset_index().drop_duplicates('hash').set_index('hash')
        udf.to_csv(paramsets_file)
    
    return list(df.index)

In [10]:
def select_variables(*names):
    names = set(names)
    if 'all' in names or len(names) == 0:
        return variables_all
    
    return dict(filter(lambda x: x[0] in names, variables_all.items()))

In [16]:
# def compute_sensitivity(variables, sampling_fun):
#     setids = generate_persistent_samples(variables, sampling_fun)
    
#     paramsets = load_paramsets_if_present()
#     assert paramsets is not None
    
#     for setid in tqdm(setids):
#         results = load_results_if_present(setid)
#         if results is None:
#             param_dict = paramsets.loc[setid].dropna().to_dict()
#             param_dict['quiet'] = True
#             param_dict['visualize'] = False
#             param_dict['verbose'] = False
#             param_dict['print_sum'] = False
#             param_dict['run_id'] = setid

#             sim = Simulation(**param_dict)
#             sim.run()
            
#             results = load_results_if_present(setid)
#             assert results is not None

In [34]:
def batches(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

In [39]:
def generate_paramsets(variables, sampling_fun, batch_size):
    setids = generate_persistent_samples(variables, sampling_fun)
    
    for i, b in enumerate(batches(setids, batch_size)):
        print(f'---|{i}|-')
        print(' '.join(b))

In [40]:
generate_paramsets(select_variables('speed'), lambda x: saltelli.sample(x, 10), 10)

---|0|-
87ba5323e3047041c7a79118caba858ddea28974 f3f25677cdad10fbedf8c3057f77c0a7772a279b f8f70767a2c3dc046d2a6a523fedac1a68433f52 cdeb727ee9e029b07a2ccfcb2caa2dcf5db71783 cafe0b92537659f6409b8a583e599e2f0ff2841e b79a8efaacefadaaafd9be54b7e4a660cd62b438 c26b7a45f00ae9fd22d85a957a6fcdbb7ac5ca46 2181cfd9c2872d7528bb50263b7b5d22a18b2d1d 6d3789f14ccd1857064e10999bdfcd8f3fc16758 cf987237929abd61cca3883c16fe06f122a1eab9
---|1|-
69254aea42ab581af9f7f1b9bcaf0cae306865b8 af04dd29969788a64a758a4c774fc3ad32d82c05 2f4767a19e3fbc4a4fbae67833ff950c239ad966 3b7c92595bf3ee161b70803abf500e4e06d94414 c2281491d81ee81fae01d7055a47882e1cf07956 e1720a4598910e202f007887095543bbea590bbc 809af73011554af0b92cea0cc55d11627d212afe 7413ab5d6e914d8746de46369fd8c3a356adf8bf e09b3f3f3ccd600b37fdd4b1e65e366f4d2f502b c0b88cc798d2b21831ffa1275579e52141fd1625
---|2|-
216d6e50e85d83678d49321c4d4038a7493c66e7 c3341d2c239cd0271331853391ae80135782da17 e38bdcd24c3143fd66924b9e974f2587024f8620 47929de529e42e785b0efe7062945c88d