In [1]:
import jn_setup
from tools.paths import *
from simulation_procedure import make_model
import pandas as pd, pyabc, hydroeval as he, numpy as np, uuid

In [2]:
# mode = "test"
mode = "debug"
model = make_model(mode = mode, swmm_cleanup = 'full', vvwm_cleanup = 'some')

C:\Users\jstelman\Git\stelman_urban_pesticides\master_debug\NPlesantCreek.inp


In [3]:
"""
Priors. Get the values from that csv. Just for SWMM at first.
"""
swmm_ranges = pd.read_csv(os.path.join(master_path, "swmm_param_priors.csv"), index_col=0,
                           usecols = ["Parameter","Min", "Range"])

'''
Link up with the vvwm priors and make one big list with 36 params
'''
vvwm_ranges = pd.read_csv(os.path.join(master_path, "vvwm_param_priors.csv"), index_col=0,
                           usecols = ["Parameter","Min", "Range"])

param_ranges = pd.concat([swmm_ranges, vvwm_ranges], axis = 0)

# # testing how many is too many
# if mode == "debug":
#     # added new one to list
#     param_ranges = param_ranges.loc[['NImperv','NPerv','WCoeff2','BCoeff2','MaxRate','MinRate','PctZero',
#                                      'SImperv', 'SPerv','Decay','DryTime','Por','FC','WP','Ksat','Kdecay',
#                                      'kd','porosity','bulk_density','chl','froc1','doc1','froc2','doc2','bnmas','sused',
#                                      'sol','benthic_depth','hydro','photo',
#                                      'aer_aq','aer_aq_temp','anae_aq','anae_aq_temp'
#                                      #'rflat','Rough'
#                                     ]]

priors = param_ranges.to_dict("index")

# borrowed from Jeff: <https://github.com/JeffreyMinucci/bee_neonic_abc/blob/master/pyabc_run.ipynb>
prior = pyabc.Distribution(**{key: pyabc.RV("uniform", loc = v['Min'], scale = v['Range'])
                        for key, v in priors.items()})

In [4]:
priors

{'NImperv': {'Min': 0.01, 'Range': 0.015},
 'NPerv': {'Min': 0.05, 'Range': 0.45},
 'WCoeff2': {'Min': 0.066, 'Range': 0.148},
 'BCoeff2': {'Min': 0.5, 'Range': 1.5},
 'MaxRate': {'Min': 8.46, 'Range': 118.54},
 'MinRate': {'Min': 0.254, 'Range': 10.666},
 'PctZero': {'Min': 0.01, 'Range': 99.99},
 'SImperv': {'Min': 1.27, 'Range': 1.27},
 'SPerv': {'Min': 2.54, 'Range': 2.54},
 'Decay': {'Min': 2.0, 'Range': 5.0},
 'DryTime': {'Min': 2.0, 'Range': 12.0},
 'Por': {'Min': 0.4, 'Range': 0.1},
 'FC': {'Min': 0.06, 'Range': 0.32},
 'WP': {'Min': 0.024, 'Range': 0.241},
 'Ksat': {'Min': 0.25, 'Range': 12.45},
 'Kdecay': {'Min': 0.002739726, 'Range': 0.197260274},
 'kd': {'Min': 882.0, 'Range': 5028.0},
 'porosity': {'Min': 0.1, 'Range': 0.7},
 'bulk_density': {'Min': 0.86, 'Range': 0.9},
 'chl': {'Min': 0.001, 'Range': 1.499},
 'froc1': {'Min': 0.001, 'Range': 0.139},
 'doc1': {'Min': 0.1, 'Range': 14.9},
 'froc2': {'Min': 0.001, 'Range': 0.029},
 'doc2': {'Min': 0.01, 'Range': 59.99},
 'bn

In [4]:
prior

<Distribution 'BCoeff2', 'Decay', 'DryTime', 'FC', 'Kdecay', 'Ksat', 'MaxRate', 'MinRate', 'NImperv', 'NPerv', 'PctZero', 'Por', 'SImperv', 'SPerv', 'WCoeff2', 'WP', 'aer_aq', 'aer_aq_temp', 'anae_aq', 'anae_aq_temp', 'benthic_depth', 'bnmas', 'bulk_density', 'chl', 'doc1', 'doc2', 'froc1', 'froc2', 'hydro', 'kd', 'photo', 'porosity', 'sol', 'sused'>

# Make the .new object
### 1. Import observed data

In [5]:
# import it again to make inspect it
# specifically for TEST mode!
if mode == 'debug':
    with open(os.path.join(main_path, 'master_debug','debug_obs_data.txt'),'r') as read_file:
        obs_dict = eval(read_file.read())
elif mode == 'test':
    with open(os.path.join(main_path, 'master_test','test_obs_data.txt'),'r') as read_file:
        obs_dict = eval(read_file.read())
elif mode == 'run':
    with open(os.path.join(main_path, 'master','obs_data.txt'),'r') as read_file:
        obs_dict = eval(read_file.read())
obs_dict

{'2009-02-13_28': 0.0485, '2009-04-07_28': 0.0192, '2009-04-13_28': 0.00858}

### 2. Initialize dask client for dask distributed sampler

In [6]:
# from dask.distributed import Client#, LocalCluster
# cluster = LocalCluster()#n_workers=(90/2), threads_per_worker = 2)  # Set for 96 vCPU compute instance
# client = Client(cluster)#,timeout=400)

# make it simpler
# if __name__ == "__main__":
# client = Client()

# sampler = pyabc.sampler.DaskDistributedSampler(dask_client = client)

# See if this takes all those errors out
sampler = pyabc.sampler.SingleCoreSampler()
# make the process more transparent
sampler.sample_factory.record_rejected = True
sampler.show_progress = True

### 3. Set up a sqlite db directory

In [7]:
# Initialize a new ABC inference run
dbid = uuid.uuid4().hex[0:8]
print(dbid)
database_dir = os.path.join(temp_path, 'results_db')  
if not os.path.exists(database_dir):
    os.mkdir(database_dir)
db_path = ("sqlite:///" +
           os.path.join(database_dir, "test_pyabc_" + dbid + ".db"))

f3e83b6a


### 4. Defining a Distance function

We need to refactor the NSE distance function using the pyabc.Distance class.
We will need the hydroeval library and the pyabc.SimpleFunctionDistance to do this

In [8]:
# make a file to hold onto these NSEs for our own record
with open(os.path.join(temp_path, "NSEs_" + dbid + ".txt"), "w") as nse_file:
    nse_file.write("NSEs\n")

In [9]:
def nse(x, x_0):
    nse = he.evaluator(he.nse, 
                       simulation_s = np.array(list(x.values())),
                       evaluation = np.array(list(x_0.values())))[0]
    print("nse ", nse)
    # make record
    with open(os.path.join(temp_path, "NSEs_" + dbid + ".txt"),"a") as nse_file:
        nse_file.write(str(nse)+"\n")
    return nse
    
NSE = pyabc.SimpleFunctionDistance(fun=nse)

# the best answer is 1
# make one that measures distance from 1
NSED = pyabc.SimpleFunctionDistance(fun = lambda x, x_0: (1 - nse(x, x_0)))

### 5. Define ABCSMC object

In [10]:
abc = pyabc.ABCSMC(model, prior, 
                   # might fix the dask problem too
                   population_size = pyabc.ConstantPopulationSize(4), # just to shorten the run
                   sampler = sampler,
                   distance_function = NSED)

In [11]:
abc

<pyabc.inference.smc.ABCSMC at 0x2441cf92580>

### 6. Initialize a new abc run

In [12]:
abc.new(db_path, obs_dict)

<pyabc.storage.history.History at 0x2441cf04e50>

In [13]:
# Back to 1 gen
history = abc.run(max_nr_populations=2, minimum_epsilon=0.2)

Folder  b0602853  created 

second path removal successful
Folder  b0602853 outfall_31_28  created 

second path removal successful
second path removal successful
second path removal successful
second path removal successful
second path removal successful
 25% |██████                  | 1/4 Folder  b64dc431  created 

second path removal successful
Folder  b64dc431 outfall_31_28  created 

second path removal successful
second path removal successful
second path removal successful
second path removal successful
second path removal successful
 50% |████████████            | 2/4 Folder  4d234a96  created 

second path removal successful
Folder  4d234a96 outfall_31_28  created 

second path removal successful
second path removal successful
second path removal successful
second path removal successful
second path removal successful
 75% |██████████████████      | 3/4 Folder  8b55a285  created 

second path removal successful
Folder  8b55a285 outfall_31_28  created 

second path removal suc