From 1e92cf3b379909a737d50b68506f0ca2fb5f5f07 Mon Sep 17 00:00:00 2001
From: jwhite <jtwhite1000@gmail.com>
Date: Tue, 16 Apr 2024 12:47:47 -0600
Subject: [PATCH] testing thresholding

---
 autotest/pst_from_tests.py | 56 ++++++++++++++++++++------------------
 pyemu/utils/helpers.py     | 50 ++++++++++++++++++++++++----------
 pyemu/utils/os_utils.py    |  2 +-
 3 files changed, 65 insertions(+), 43 deletions(-)

diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py
index 391de843..81440ad0 100644
--- a/autotest/pst_from_tests.py
+++ b/autotest/pst_from_tests.py
@@ -4886,17 +4886,17 @@ def list_float_int_index_test(tmp_path):
     assert np.isclose(diff,bparval1).all(), diff.loc[~np.isclose(diff,bparval1)]
 
 
-def mf6_freyberg_thresh_invest(tmp_path):
+def mf6_freyberg_thresh_test(tmp_path):
 
     import numpy as np
     import pandas as pd
-    pd.set_option('display.max_rows', 500)
-    pd.set_option('display.max_columns', 500)
-    pd.set_option('display.width', 1000)
-    try:
-        import flopy
-    except:
-        return
+    # pd.set_option('display.max_rows', 500)
+    # pd.set_option('display.max_columns', 500)
+    # pd.set_option('display.width', 1000)
+    # try:
+    import flopy
+    # except:
+    #     return
 
     org_model_ws = os.path.join('..', 'examples', 'freyberg_mf6')
     tmp_model_ws = setup_tmp(org_model_ws, tmp_path)
@@ -5096,10 +5096,7 @@ def mf6_freyberg_thresh_invest(tmp_path):
     print(pst.npar,pst.npar_adj)
 
     org_par = par.copy()
-
-
     num_reals = 100
-
     pe = pf.draw(num_reals, use_specsim=False)
     pe.enforce()
     print(pe.shape)
@@ -5153,8 +5150,9 @@ def mf6_freyberg_thresh_invest(tmp_path):
     # reset away from the truth...
     pst.parameter_data.loc[:,"parval1"] = org_par.parval1.values.copy()
 
-    pst.control_data.noptmax=6
+    pst.control_data.noptmax = 2
     pst.pestpp_options["ies_par_en"] = "prior.jcb"
+    pst.pestpp_options["ies_num_reals"] = 30
     pst.pestpp_options["ies_subset_size"] = -10
     pst.pestpp_options["ies_no_noise"] = True
     #pst.pestpp_options["ies_bad_phi_sigma"] = 2.0
@@ -5171,21 +5169,20 @@ def mf6_freyberg_thresh_invest(tmp_path):
     #pst.pestpp_options["ies_par_en"] = "prior.jcb"
     
     pst.write(os.path.join(pf.new_d, "freyberg.pst"), version=2)
+    m_d = "master_thresh"
+    pyemu.os_utils.start_workers(pf.new_d, ies_exe_path, "freyberg.pst", worker_root=".", master_dir=m_d,
+                                 num_workers=10)
+    phidf = pd.read_csv(os.path.join(m_d,"freyberg.phi.actual.csv"))
+    print(phidf["mean"])
+    assert phidf["mean"].min() < 1000
 
-    pyemu.os_utils.start_workers(pf.new_d, ies_exe_path, "freyberg.pst", worker_root=".", master_dir="master_thresh",
-                                 num_workers=40)
-
-    pst.pestpp_options["ies_multimodal_alpha"] = 0.99
+    #pst.pestpp_options["ies_multimodal_alpha"] = 0.99
     
     #pst.pestpp_options["ies_num_threads"] = 6
-    pst.write(os.path.join(pf.new_d, "freyberg.pst"),version=2)
+    #pst.write(os.path.join(pf.new_d, "freyberg.pst"),version=2)
 
-    pyemu.os_utils.start_workers(pf.new_d, ies_exe_path, "freyberg.pst", worker_root=".", master_dir="master_thresh_mm",
-                                 num_workers=40)
-    # if os.path.exists(m_d):
-    #     shutil.rmtree(m_d)
-    # shutil.copytree(pf.new_d,m_d)
-    # pyemu.os_utils.run("{0} freyberg.pst".format(ies_exe_path),cwd=m_d)
+    #pyemu.os_utils.start_workers(pf.new_d, ies_exe_path, "freyberg.pst", worker_root=".", master_dir="master_thresh_mm",
+    #                             num_workers=40)
 
 def plot_thresh(m_d):
     import flopy
@@ -5219,6 +5216,10 @@ def plot_thresh(m_d):
     pr_pv = pr_oe.phi_vector
     pr_pv.sort_values(inplace=True,ascending=False)
     pr_oe = pr_oe.loc[pr_pv.index,:]
+
+    reals_to_plot = pr_pv.index[:19].tolist()
+    if "base" in pr_pv.index:
+        reals_to_plot.append("base")
     for iiter in range(1,mxiter+1):
         pt_oe = pyemu.ObservationEnsemble.from_csv(pst=pst,filename=os.path.join(m_d, "freyberg.{0}.obs.csv".format(iiter)))
         pt_oe.index = pt_oe.index.map(str)
@@ -5280,7 +5281,8 @@ def plot_thresh(m_d):
 
             with PdfPages(os.path.join(m_d,"results_{0}_hk_layer_{1}.pdf".format(iiter,k+1))) as pdf:
                 ireal = 0
-                for real in pr_oe.index:
+                #for real in pr_oe.index:
+                for real in reals_to_plot:
                     if real not in pt_oe.index:
                         continue
                     prarr = np.zeros((nrow,ncol)) - 1
@@ -5345,10 +5347,10 @@ def plot_thresh(m_d):
     #mf6_freyberg_shortnames_test()
     #mf6_freyberg_direct_test()
 
-    mf6_freyberg_thresh_invest(".")
+    mf6_freyberg_thresh_test(".")
 
-    #plot_thresh("master_thresh")
-    plot_thresh("master_thresh_mm")
+    plot_thresh("master_thresh")
+    #plot_thresh("master_thresh_mm")
     #mf6_freyberg_varying_idomain()
     # xsec_test()
     # mf6_freyberg_short_direct_test()
diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py
index 8cfb2803..4376d740 100644
--- a/pyemu/utils/helpers.py
+++ b/pyemu/utils/helpers.py
@@ -3691,10 +3691,6 @@ def setup_threshold_pars(orgarr_file,cat_dict,testing_workspace=".",inact_arr=No
         raise Exception("only two categories currently supported, {0} found in target_proportions_dict".\
                         format(len(cat_dict)))
 
-    tol = 1.0e-10
-    if org_arr.std() < tol * 2.0:
-        print("WARNING: org_arr has very low standard deviation, adding some noise for now...")
-        org_arr += np.random.normal(0,1e-6,org_arr.shape)
     prop_tags,prop_vals,fill_vals = [],[],[]
     for key,(proportion,fill_val) in cat_dict.items():
         if int(key) not in cat_dict:
@@ -3729,6 +3725,11 @@ def setup_threshold_pars(orgarr_file,cat_dict,testing_workspace=".",inact_arr=No
 def apply_threshold_pars(csv_file):
     """apply the thresholding process.  everything keys off of csv_file name...
 
+    Note: if the standard deviation of the continous thresholding array is too low,
+    the line search will fail.  Currently, if this stdev is less than 1.e-10,
+    then a homogenous array of the first category fill value will be created.  User
+    beware!
+
     """
     df = pd.read_csv(csv_file,index_col=0)
     thresarr_file = csv_file.replace("props.csv","arr.dat")
@@ -3745,6 +3746,11 @@ def apply_threshold_pars(csv_file):
     tvals = df.threshproportion.astype(float).values.copy()
     #ttags = df.threshcat.astype(int).values.copy()
     tfill = df.threshfill.astype(float).values.copy()
+
+    iarr = None
+    if os.path.exists(inactarr_file):
+        iarr = np.loadtxt(inactarr_file, dtype=int)
+
     # for now...
     assert len(tvals) == 2
     if np.any(tvals <= 0.0):
@@ -3755,11 +3761,26 @@ def apply_threshold_pars(csv_file):
     target_prop = tvals[0]
 
     tol = 1.0e-10
-    if tarr.std() < tol * 2.0:
-        #raise Exception("thresholding array {0} has very low standard deviation".format(thresarr_file))
-
-        print("WARNING: thresholding array {0} has very low standard deviation, adding noise".format(thresarr_file))
-        tarr += np.random.normal(0, tol*2.0, tarr.shape)
+    if tarr.std() < tol:
+        print("WARNING: thresholding array {0} has very low standard deviation".format(thresarr_file))
+        print("         using a homogenous array with first category fill value {0}".format(tfill[0]))
+
+        farr = np.zeros_like(tarr) + tfill[0]
+        if iarr is not None:
+            farr[iarr == 0] = -1.0e+30
+            tarr[iarr == 0] = -1.0e+30
+        df.loc[tcat[0], "threshold"] = tarr.mean()
+        df.loc[tcat[1], "threshold"] = tarr.mean()
+        df.loc[tcat[0], "proportion"] = 1
+        df.loc[tcat[1], "proportion"] = 0
+
+        df.to_csv(csv_file.replace(".csv", "_results.csv"))
+        np.savetxt(orgarr_file, farr, fmt="%15.6E")
+        np.savetxt(tarr_file, tarr, fmt="%15.6E")
+        return tarr.mean(), 1.0
+
+        #    print("WARNING: thresholding array {0} has very low standard deviation, adding noise".format(thresarr_file))
+        #    tarr += np.random.normal(0, tol*2.0, tarr.shape)
 
     # a classic:
     gr = (np.sqrt(5.) + 1.) / 2.
@@ -3768,10 +3789,10 @@ def apply_threshold_pars(csv_file):
     c = b - ((b - a) / gr)
     d = a + ((b - a) / gr)
 
-    iarr = None
+
     tot_shape = tarr.shape[0] * tarr.shape[1]
-    if os.path.exists(inactarr_file):
-        iarr = np.loadtxt(inactarr_file, dtype=int)
+    if iarr is not None:
+
         # this keeps inact from interfering with calcs later...
         tarr[iarr == 0] = 1.0e+30
         tiarr = iarr.copy()
@@ -3805,12 +3826,9 @@ def get_current_prop(_cur_thresh):
         d = a + ((b - a) / gr)
         numiter += 1
 
-    #print(a,b,c,d)
     thresh = (a+b) / 2.0
     prop = get_current_prop(thresh)
-    #print(thresh,prop)
     farr = np.zeros_like(tarr) - 1
-
     farr[tarr>thresh] = tfill[1]
     farr[tarr<=thresh] = tfill[0]
     tarr[tarr>thresh] = tcat[0]
@@ -3821,6 +3839,8 @@ def get_current_prop(_cur_thresh):
         tarr[iarr == 0] = -1.0e+30
     df.loc[tcat[0],"threshold"] = thresh
     df.loc[tcat[1], "threshold"] = 1.0 - thresh
+    df.loc[tcat[0], "proportion"] = prop
+    df.loc[tcat[1], "proportion"] = 1.0 - prop
     df.to_csv(csv_file.replace(".csv","_results.csv"))
     np.savetxt(orgarr_file,farr,fmt="%15.6E")
     np.savetxt(tarr_file, tarr, fmt="%15.6E")
diff --git a/pyemu/utils/os_utils.py b/pyemu/utils/os_utils.py
index 21513165..38aee062 100644
--- a/pyemu/utils/os_utils.py
+++ b/pyemu/utils/os_utils.py
@@ -132,7 +132,7 @@ def run(cmd_str, cwd=".", verbose=False):
 
 def _try_remove_existing(d, forgive=False):
     try:
-        shutil.rmtree(d, onexc=_remove_readonly)  # , onerror=del_rw)
+        shutil.rmtree(d, onerror=_remove_readonly)  # , onerror=del_rw)
         return True
     except Exception as e:
         if not forgive: