From 6a66358ab9583ec706f636e2f91022b25f5c5c1f Mon Sep 17 00:00:00 2001
From: Jeremy White <jtwhite1000@gmail.com>
Date: Mon, 15 Mar 2021 16:52:25 -0600
Subject: [PATCH 1/4] first impl of arr par summary

---
 pyemu/utils/helpers.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py
index f1363bdd0..66e377ffc 100644
--- a/pyemu/utils/helpers.py
+++ b/pyemu/utils/helpers.py
@@ -3912,6 +3912,33 @@ def apply_list_pars():
         )
 
 
+def process_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.0e+20):
+    df = pd.read_csv(arr_par_file, index_col=0)
+    arr_pars = df.loc[df.index_cols.isna()].copy()
+    model_input_files = df.model_file.unique()
+    model_input_files.sort()
+    records = dict()
+    stat_dict = {"mean":np.nanmean,"stdev":np.nanstd,"median":np.nanmedian}
+    quantiles = [0.05,0.25,0.75,0.95]
+    for stat in stat_dict.keys():
+        records[stat] = []
+    for q in quantiles:
+        records["quantile_{0}".format(q)] = []
+
+    for model_input_file in model_input_files:
+
+        arr = np.loadtxt(model_input_file)
+        arr[np.abs(arr) > abs_mask] = np.nan
+        for stat,func in stat_dict.items():
+            records[stat].append(func(arr))
+        for q in quantiles:
+            records["quantile_{0}".format(q)].append(np.nanquantile(arr,q))
+    #scrub model input files
+    model_input_files = [f.replace(".","_").replace("\\","_").replace("/","_") for f in model_input_files]
+    df = pd.DataFrame(records,index=model_input_files)
+    df.index.name = "model_file"
+    df.to_csv("arr_par_summary.csv")
+
 def apply_genericlist_pars(df,chunk_len=50):
     """a function to apply list style mult parameters
 

From a7cf958a6a3571df5fea9a17d7b033a9e9c758fa Mon Sep 17 00:00:00 2001
From: Jeremy White <jtwhite1000@gmail.com>
Date: Tue, 16 Mar 2021 11:34:06 -0600
Subject: [PATCH 2/4] more work on arr par summary

---
 autotest/pst_from_tests.py | 21 ++++++++++----
 pyemu/utils/helpers.py     | 57 +++++++++++++++++++++++++++++++++++---
 pyemu/utils/pst_from.py    | 10 +++++++
 3 files changed, 79 insertions(+), 9 deletions(-)

diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py
index bedc6eafc..d3f4aa82f 100644
--- a/autotest/pst_from_tests.py
+++ b/autotest/pst_from_tests.py
@@ -1618,8 +1618,6 @@ def mf6_freyberg_varying_idomain():
         a = m.dis.idomain.array[k,:,:].copy()
         print(a)
         ib[k] = a
-    #return
-    #ib[0][:2,:]  = 0
 
     tags = {"npf_k_": [0.1, 10.]}#, "npf_k33_": [.1, 10], "sto_ss": [.1, 10], "sto_sy": [.9, 1.1]}
     dts = pd.to_datetime("1-1-2018") + pd.to_timedelta(np.cumsum(sim.tdis.perioddata.array["perlen"]), unit="d")
@@ -1644,7 +1642,14 @@ def mf6_freyberg_varying_idomain():
     df = pf.add_observations("heads.csv", insfile="heads.csv.ins", index_cols="time", use_cols=list(df.columns.values),
                         prefix="hds", ofile_sep=",")
 
-    # build pest
+
+    pst = pf.build_pst('freyberg.pst')
+    os.chdir(pf.new_d)
+    df = pyemu.helpers.calc_arr_par_summary_stats()
+    os.chdir("..")
+    pf.post_py_cmds.append("pyemu.helpers.calc_arr_par_summary_stats()")
+    pf.add_observations("arr_par_summary.csv",index_cols=["model_file"],use_cols=df.columns.tolist(),
+                        obsgp=["arr_par_summary" for _ in df.columns],prefix=["arr_par_summary" for _ in df.columns])
     pst = pf.build_pst('freyberg.pst')
     pst.control_data.noptmax = 0
     pst.write(os.path.join(pf.new_d, "freyberg.pst"))
@@ -1657,6 +1662,12 @@ def mf6_freyberg_varying_idomain():
     assert pst.phi < 1.0e-6
 
 
+
+
+
+
+
+
 def xsec_test():
     import numpy as np
     import pandas as pd
@@ -2669,12 +2680,12 @@ def mf6_freyberg_arr_obs_and_headerless_test():
 
 if __name__ == "__main__":
     #invest()
-    freyberg_test()
+    #freyberg_test()
     #freyberg_prior_build_test()
     #mf6_freyberg_test()
     #mf6_freyberg_shortnames_test()
     #mf6_freyberg_direct_test()
-    #mf6_freyberg_varying_idomain()
+    mf6_freyberg_varying_idomain()
     #xsec_test()
     #mf6_freyberg_short_direct_test()
     #tpf = TestPstFrom()
diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py
index 66e377ffc..e14fe261b 100644
--- a/pyemu/utils/helpers.py
+++ b/pyemu/utils/helpers.py
@@ -3912,7 +3912,23 @@ def apply_list_pars():
         )
 
 
-def process_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.0e+20):
+def calc_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.0e+20):
+    """read and generate summary statistics for the resulting model input arrays from
+    applying array par multipliers
+
+    Args:
+        arr_par_file (`str`): the array multiplier key file.
+        abs_mask (`float`): value above which the abs of values
+            in the model input array is masked
+
+    Returns:
+        pd.DataFrame: dataframe of summary stats for each model_file entry
+
+    Note:
+        this function uses an optional "zone_file" column. If multiple zones
+            files are used, then zone arrays are aggregated to a single array
+
+    """
     df = pd.read_csv(arr_par_file, index_col=0)
     arr_pars = df.loc[df.index_cols.isna()].copy()
     model_input_files = df.model_file.unique()
@@ -3922,22 +3938,55 @@ def process_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.
     quantiles = [0.05,0.25,0.75,0.95]
     for stat in stat_dict.keys():
         records[stat] = []
+        records[stat+"_org"] = []
+        records[stat + "_dif"] = []
+
     for q in quantiles:
         records["quantile_{0}".format(q)] = []
+        records["quantile_{0}_org".format(q)] = []
+        records["quantile_{0}_dif".format(q)] = []
 
     for model_input_file in model_input_files:
 
         arr = np.loadtxt(model_input_file)
-        arr[np.abs(arr) > abs_mask] = np.nan
+        org_file = df.loc[df.model_file==model_input_file,"org_file"].values
+
+        print(org_file)
+        org_file = org_file[0]
+        org_arr = np.loadtxt(org_file)
+        if "zone_file" in df.columns:
+            zone_file = df.loc[df.model_file == model_input_file,"zone_file"].dropna().unique()
+            if len(zone_file) > 1:
+                zone_arr = np.zeros_like(arr)
+                for zf in zone_file:
+                    za = np.loadtxt(zf)
+                    zone_arr[za!=0] = 1
+            else:
+                zone_arr = np.loadtxt(zone_file[0])
+            arr[zone_arr==0] = np.NaN
+            org_arr[zone_arr==0] = np.NaN
+        else:
+            arr[np.abs(arr) > abs_mask] = np.nan
+            org_arr[np.abs(arr) > abs_mask] = np.nan
         for stat,func in stat_dict.items():
-            records[stat].append(func(arr))
+            v = func(arr)
+            records[stat].append(v)
+            ov = func(org_arr)
+            records[stat+"_org"].append(ov)
+            records[stat+"_dif"].append(v-ov)
         for q in quantiles:
-            records["quantile_{0}".format(q)].append(np.nanquantile(arr,q))
+            v = np.nanquantile(arr,q)
+            ov = np.nanquantile(org_arr,q)
+            records["quantile_{0}".format(q)].append(v)
+            records["quantile_{0}_org".format(q)].append(ov)
+            records["quantile_{0}_dif".format(q)].append(v-ov)
+
     #scrub model input files
     model_input_files = [f.replace(".","_").replace("\\","_").replace("/","_") for f in model_input_files]
     df = pd.DataFrame(records,index=model_input_files)
     df.index.name = "model_file"
     df.to_csv("arr_par_summary.csv")
+    return df
 
 def apply_genericlist_pars(df,chunk_len=50):
     """a function to apply list style mult parameters
diff --git a/pyemu/utils/pst_from.py b/pyemu/utils/pst_from.py
index 6c3615f78..9af77a65c 100644
--- a/pyemu/utils/pst_from.py
+++ b/pyemu/utils/pst_from.py
@@ -1994,6 +1994,14 @@ def add_parameters(
         # eventually filled by PEST) to actual model files so that actual
         # model input file can be generated
         # (using helpers.apply_list_and_array_pars())
+        zone_filename = None
+        if zone_array is not None:
+            #zone_filename = tpl_filename.replace(".tpl",".zone")
+            zone_filename = Path(str(tpl_filename).replace(".tpl", ".zone"))
+            self.logger.statement("saving zone array {0} for tpl file {1}".format(zone_filename,tpl_filename))
+            np.savetxt(zone_filename,zone_array,fmt="%4d")
+            zone_filename = zone_filename.name
+
         relate_parfiles = []
         for mod_file in file_dict.keys():
             mult_dict = {
@@ -2018,6 +2026,8 @@ def add_parameters(
                 mult_dict["pp_fill_value"] = 1.0
                 mult_dict["pp_lower_limit"] = 1.0e-10
                 mult_dict["pp_upper_limit"] = 1.0e+10
+            if zone_filename is not None:
+                mult_dict["zone_file"] = zone_filename
             relate_parfiles.append(mult_dict)
         relate_pars_df = pd.DataFrame(relate_parfiles)
         # store on self for use in pest build etc

From 9dda2a0baaab2fe22e6ef846ed8d23132e0f2a28 Mon Sep 17 00:00:00 2001
From: Jeremy White <jtwhite1000@gmail.com>
Date: Tue, 16 Mar 2021 12:06:21 -0600
Subject: [PATCH 3/4] fix for 3d zone array

---
 pyemu/utils/pst_from.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyemu/utils/pst_from.py b/pyemu/utils/pst_from.py
index 9af77a65c..3ddd43cc7 100644
--- a/pyemu/utils/pst_from.py
+++ b/pyemu/utils/pst_from.py
@@ -1995,7 +1995,7 @@ def add_parameters(
         # model input file can be generated
         # (using helpers.apply_list_and_array_pars())
         zone_filename = None
-        if zone_array is not None:
+        if zone_array is not None and zone_array.ndim < 3:
             #zone_filename = tpl_filename.replace(".tpl",".zone")
             zone_filename = Path(str(tpl_filename).replace(".tpl", ".zone"))
             self.logger.statement("saving zone array {0} for tpl file {1}".format(zone_filename,tpl_filename))

From ab36b62b17080e36b0692371dc960c80daf7b4e3 Mon Sep 17 00:00:00 2001
From: Jeremy White <jtwhite1000@gmail.com>
Date: Tue, 16 Mar 2021 14:25:31 -0600
Subject: [PATCH 4/4] tweaks to docstring

---
 autotest/pst_from_tests.py | 18 +++++++++++++++---
 pyemu/utils/helpers.py     | 10 ++++++----
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py
index d3f4aa82f..140154ebc 100644
--- a/autotest/pst_from_tests.py
+++ b/autotest/pst_from_tests.py
@@ -1643,11 +1643,12 @@ def mf6_freyberg_varying_idomain():
                         prefix="hds", ofile_sep=",")
 
 
-    pst = pf.build_pst('freyberg.pst')
+    #pst = pf.build_pst('freyberg.pst')
+    pf.parfile_relations.to_csv(os.path.join(pf.new_d, "mult2model_info.csv"))
     os.chdir(pf.new_d)
-    df = pyemu.helpers.calc_arr_par_summary_stats()
+    df = pyemu.helpers.calc_array_par_summary_stats()
     os.chdir("..")
-    pf.post_py_cmds.append("pyemu.helpers.calc_arr_par_summary_stats()")
+    pf.post_py_cmds.append("pyemu.helpers.calc_array_par_summary_stats()")
     pf.add_observations("arr_par_summary.csv",index_cols=["model_file"],use_cols=df.columns.tolist(),
                         obsgp=["arr_par_summary" for _ in df.columns],prefix=["arr_par_summary" for _ in df.columns])
     pst = pf.build_pst('freyberg.pst')
@@ -1661,6 +1662,17 @@ def mf6_freyberg_varying_idomain():
     print(pst.phi)
     assert pst.phi < 1.0e-6
 
+    df = pd.read_csv(os.path.join(pf.new_d,"mult2model_info.csv"), index_col=0)
+    arr_pars = df.loc[df.index_cols.isna()].copy()
+    model_files = arr_pars.model_file.unique()
+    pst.try_parse_name_metadata()
+    for model_file in model_files:
+        arr = np.loadtxt(os.path.join(pf.new_d,model_file))
+        clean_name = model_file.replace(".","_").replace("\\","_").replace("/","_")
+        sim_val = pst.res.loc[pst.res.name.apply(lambda x: clean_name in x ),"modelled"]
+        sim_val = sim_val.loc[sim_val.index.map(lambda x: "mean_model_file" in x)]
+        print(model_file,sim_val,arr.mean())
+
 
 
 
diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py
index e14fe261b..fa5d40dcb 100644
--- a/pyemu/utils/helpers.py
+++ b/pyemu/utils/helpers.py
@@ -3912,14 +3912,15 @@ def apply_list_pars():
         )
 
 
-def calc_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.0e+20):
+def calc_array_par_summary_stats(arr_par_file="mult2model_info.csv", abs_mask=1.0e+20):
     """read and generate summary statistics for the resulting model input arrays from
     applying array par multipliers
 
     Args:
         arr_par_file (`str`): the array multiplier key file.
         abs_mask (`float`): value above which the abs of values
-            in the model input array is masked
+            in the model input array is masked.  The mask is only
+            applied of `zone_file` column is nan and `abs_mask` is not `None`
 
     Returns:
         pd.DataFrame: dataframe of summary stats for each model_file entry
@@ -3951,7 +3952,7 @@ def calc_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.0e+
         arr = np.loadtxt(model_input_file)
         org_file = df.loc[df.model_file==model_input_file,"org_file"].values
 
-        print(org_file)
+        #print(org_file)
         org_file = org_file[0]
         org_arr = np.loadtxt(org_file)
         if "zone_file" in df.columns:
@@ -3965,7 +3966,7 @@ def calc_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.0e+
                 zone_arr = np.loadtxt(zone_file[0])
             arr[zone_arr==0] = np.NaN
             org_arr[zone_arr==0] = np.NaN
-        else:
+        elif abs_mask is not None:
             arr[np.abs(arr) > abs_mask] = np.nan
             org_arr[np.abs(arr) > abs_mask] = np.nan
         for stat,func in stat_dict.items():
@@ -4125,6 +4126,7 @@ def _process_list_file(model_file,df):
         else:
             mlts = pd.read_csv(mlt.mlt_file)
             # get mult index to align with org_data,
+            # get mult index to align with org_data,
             # mult idxs will always be written zero based if int
             # if original model files is not zero based need to add 1
             add1 = int(mlt.zero_based == False)