From 1e92cf3b379909a737d50b68506f0ca2fb5f5f07 Mon Sep 17 00:00:00 2001 From: jwhite Date: Tue, 16 Apr 2024 12:47:47 -0600 Subject: [PATCH] testing thresholding --- autotest/pst_from_tests.py | 56 ++++++++++++++++++++------------------ pyemu/utils/helpers.py | 50 ++++++++++++++++++++++++---------- pyemu/utils/os_utils.py | 2 +- 3 files changed, 65 insertions(+), 43 deletions(-) diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py index 391de843..81440ad0 100644 --- a/autotest/pst_from_tests.py +++ b/autotest/pst_from_tests.py @@ -4886,17 +4886,17 @@ def list_float_int_index_test(tmp_path): assert np.isclose(diff,bparval1).all(), diff.loc[~np.isclose(diff,bparval1)] -def mf6_freyberg_thresh_invest(tmp_path): +def mf6_freyberg_thresh_test(tmp_path): import numpy as np import pandas as pd - pd.set_option('display.max_rows', 500) - pd.set_option('display.max_columns', 500) - pd.set_option('display.width', 1000) - try: - import flopy - except: - return + # pd.set_option('display.max_rows', 500) + # pd.set_option('display.max_columns', 500) + # pd.set_option('display.width', 1000) + # try: + import flopy + # except: + # return org_model_ws = os.path.join('..', 'examples', 'freyberg_mf6') tmp_model_ws = setup_tmp(org_model_ws, tmp_path) @@ -5096,10 +5096,7 @@ def mf6_freyberg_thresh_invest(tmp_path): print(pst.npar,pst.npar_adj) org_par = par.copy() - - num_reals = 100 - pe = pf.draw(num_reals, use_specsim=False) pe.enforce() print(pe.shape) @@ -5153,8 +5150,9 @@ def mf6_freyberg_thresh_invest(tmp_path): # reset away from the truth... pst.parameter_data.loc[:,"parval1"] = org_par.parval1.values.copy() - pst.control_data.noptmax=6 + pst.control_data.noptmax = 2 pst.pestpp_options["ies_par_en"] = "prior.jcb" + pst.pestpp_options["ies_num_reals"] = 30 pst.pestpp_options["ies_subset_size"] = -10 pst.pestpp_options["ies_no_noise"] = True #pst.pestpp_options["ies_bad_phi_sigma"] = 2.0 @@ -5171,21 +5169,20 @@ def mf6_freyberg_thresh_invest(tmp_path): #pst.pestpp_options["ies_par_en"] = "prior.jcb" pst.write(os.path.join(pf.new_d, "freyberg.pst"), version=2) + m_d = "master_thresh" + pyemu.os_utils.start_workers(pf.new_d, ies_exe_path, "freyberg.pst", worker_root=".", master_dir=m_d, + num_workers=10) + phidf = pd.read_csv(os.path.join(m_d,"freyberg.phi.actual.csv")) + print(phidf["mean"]) + assert phidf["mean"].min() < 1000 - pyemu.os_utils.start_workers(pf.new_d, ies_exe_path, "freyberg.pst", worker_root=".", master_dir="master_thresh", - num_workers=40) - - pst.pestpp_options["ies_multimodal_alpha"] = 0.99 + #pst.pestpp_options["ies_multimodal_alpha"] = 0.99 #pst.pestpp_options["ies_num_threads"] = 6 - pst.write(os.path.join(pf.new_d, "freyberg.pst"),version=2) + #pst.write(os.path.join(pf.new_d, "freyberg.pst"),version=2) - pyemu.os_utils.start_workers(pf.new_d, ies_exe_path, "freyberg.pst", worker_root=".", master_dir="master_thresh_mm", - num_workers=40) - # if os.path.exists(m_d): - # shutil.rmtree(m_d) - # shutil.copytree(pf.new_d,m_d) - # pyemu.os_utils.run("{0} freyberg.pst".format(ies_exe_path),cwd=m_d) + #pyemu.os_utils.start_workers(pf.new_d, ies_exe_path, "freyberg.pst", worker_root=".", master_dir="master_thresh_mm", + # num_workers=40) def plot_thresh(m_d): import flopy @@ -5219,6 +5216,10 @@ def plot_thresh(m_d): pr_pv = pr_oe.phi_vector pr_pv.sort_values(inplace=True,ascending=False) pr_oe = pr_oe.loc[pr_pv.index,:] + + reals_to_plot = pr_pv.index[:19].tolist() + if "base" in pr_pv.index: + reals_to_plot.append("base") for iiter in range(1,mxiter+1): pt_oe = pyemu.ObservationEnsemble.from_csv(pst=pst,filename=os.path.join(m_d, "freyberg.{0}.obs.csv".format(iiter))) pt_oe.index = pt_oe.index.map(str) @@ -5280,7 +5281,8 @@ def plot_thresh(m_d): with PdfPages(os.path.join(m_d,"results_{0}_hk_layer_{1}.pdf".format(iiter,k+1))) as pdf: ireal = 0 - for real in pr_oe.index: + #for real in pr_oe.index: + for real in reals_to_plot: if real not in pt_oe.index: continue prarr = np.zeros((nrow,ncol)) - 1 @@ -5345,10 +5347,10 @@ def plot_thresh(m_d): #mf6_freyberg_shortnames_test() #mf6_freyberg_direct_test() - mf6_freyberg_thresh_invest(".") + mf6_freyberg_thresh_test(".") - #plot_thresh("master_thresh") - plot_thresh("master_thresh_mm") + plot_thresh("master_thresh") + #plot_thresh("master_thresh_mm") #mf6_freyberg_varying_idomain() # xsec_test() # mf6_freyberg_short_direct_test() diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py index 8cfb2803..4376d740 100644 --- a/pyemu/utils/helpers.py +++ b/pyemu/utils/helpers.py @@ -3691,10 +3691,6 @@ def setup_threshold_pars(orgarr_file,cat_dict,testing_workspace=".",inact_arr=No raise Exception("only two categories currently supported, {0} found in target_proportions_dict".\ format(len(cat_dict))) - tol = 1.0e-10 - if org_arr.std() < tol * 2.0: - print("WARNING: org_arr has very low standard deviation, adding some noise for now...") - org_arr += np.random.normal(0,1e-6,org_arr.shape) prop_tags,prop_vals,fill_vals = [],[],[] for key,(proportion,fill_val) in cat_dict.items(): if int(key) not in cat_dict: @@ -3729,6 +3725,11 @@ def setup_threshold_pars(orgarr_file,cat_dict,testing_workspace=".",inact_arr=No def apply_threshold_pars(csv_file): """apply the thresholding process. everything keys off of csv_file name... + Note: if the standard deviation of the continous thresholding array is too low, + the line search will fail. Currently, if this stdev is less than 1.e-10, + then a homogenous array of the first category fill value will be created. User + beware! + """ df = pd.read_csv(csv_file,index_col=0) thresarr_file = csv_file.replace("props.csv","arr.dat") @@ -3745,6 +3746,11 @@ def apply_threshold_pars(csv_file): tvals = df.threshproportion.astype(float).values.copy() #ttags = df.threshcat.astype(int).values.copy() tfill = df.threshfill.astype(float).values.copy() + + iarr = None + if os.path.exists(inactarr_file): + iarr = np.loadtxt(inactarr_file, dtype=int) + # for now... assert len(tvals) == 2 if np.any(tvals <= 0.0): @@ -3755,11 +3761,26 @@ def apply_threshold_pars(csv_file): target_prop = tvals[0] tol = 1.0e-10 - if tarr.std() < tol * 2.0: - #raise Exception("thresholding array {0} has very low standard deviation".format(thresarr_file)) - - print("WARNING: thresholding array {0} has very low standard deviation, adding noise".format(thresarr_file)) - tarr += np.random.normal(0, tol*2.0, tarr.shape) + if tarr.std() < tol: + print("WARNING: thresholding array {0} has very low standard deviation".format(thresarr_file)) + print(" using a homogenous array with first category fill value {0}".format(tfill[0])) + + farr = np.zeros_like(tarr) + tfill[0] + if iarr is not None: + farr[iarr == 0] = -1.0e+30 + tarr[iarr == 0] = -1.0e+30 + df.loc[tcat[0], "threshold"] = tarr.mean() + df.loc[tcat[1], "threshold"] = tarr.mean() + df.loc[tcat[0], "proportion"] = 1 + df.loc[tcat[1], "proportion"] = 0 + + df.to_csv(csv_file.replace(".csv", "_results.csv")) + np.savetxt(orgarr_file, farr, fmt="%15.6E") + np.savetxt(tarr_file, tarr, fmt="%15.6E") + return tarr.mean(), 1.0 + + # print("WARNING: thresholding array {0} has very low standard deviation, adding noise".format(thresarr_file)) + # tarr += np.random.normal(0, tol*2.0, tarr.shape) # a classic: gr = (np.sqrt(5.) + 1.) / 2. @@ -3768,10 +3789,10 @@ def apply_threshold_pars(csv_file): c = b - ((b - a) / gr) d = a + ((b - a) / gr) - iarr = None + tot_shape = tarr.shape[0] * tarr.shape[1] - if os.path.exists(inactarr_file): - iarr = np.loadtxt(inactarr_file, dtype=int) + if iarr is not None: + # this keeps inact from interfering with calcs later... tarr[iarr == 0] = 1.0e+30 tiarr = iarr.copy() @@ -3805,12 +3826,9 @@ def get_current_prop(_cur_thresh): d = a + ((b - a) / gr) numiter += 1 - #print(a,b,c,d) thresh = (a+b) / 2.0 prop = get_current_prop(thresh) - #print(thresh,prop) farr = np.zeros_like(tarr) - 1 - farr[tarr>thresh] = tfill[1] farr[tarr<=thresh] = tfill[0] tarr[tarr>thresh] = tcat[0] @@ -3821,6 +3839,8 @@ def get_current_prop(_cur_thresh): tarr[iarr == 0] = -1.0e+30 df.loc[tcat[0],"threshold"] = thresh df.loc[tcat[1], "threshold"] = 1.0 - thresh + df.loc[tcat[0], "proportion"] = prop + df.loc[tcat[1], "proportion"] = 1.0 - prop df.to_csv(csv_file.replace(".csv","_results.csv")) np.savetxt(orgarr_file,farr,fmt="%15.6E") np.savetxt(tarr_file, tarr, fmt="%15.6E") diff --git a/pyemu/utils/os_utils.py b/pyemu/utils/os_utils.py index 21513165..38aee062 100644 --- a/pyemu/utils/os_utils.py +++ b/pyemu/utils/os_utils.py @@ -132,7 +132,7 @@ def run(cmd_str, cwd=".", verbose=False): def _try_remove_existing(d, forgive=False): try: - shutil.rmtree(d, onexc=_remove_readonly) # , onerror=del_rw) + shutil.rmtree(d, onerror=_remove_readonly) # , onerror=del_rw) return True except Exception as e: if not forgive: