From 6a66358ab9583ec706f636e2f91022b25f5c5c1f Mon Sep 17 00:00:00 2001 From: Jeremy White Date: Mon, 15 Mar 2021 16:52:25 -0600 Subject: [PATCH 1/4] first impl of arr par summary --- pyemu/utils/helpers.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py index f1363bdd0..66e377ffc 100644 --- a/pyemu/utils/helpers.py +++ b/pyemu/utils/helpers.py @@ -3912,6 +3912,33 @@ def apply_list_pars(): ) +def process_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.0e+20): + df = pd.read_csv(arr_par_file, index_col=0) + arr_pars = df.loc[df.index_cols.isna()].copy() + model_input_files = df.model_file.unique() + model_input_files.sort() + records = dict() + stat_dict = {"mean":np.nanmean,"stdev":np.nanstd,"median":np.nanmedian} + quantiles = [0.05,0.25,0.75,0.95] + for stat in stat_dict.keys(): + records[stat] = [] + for q in quantiles: + records["quantile_{0}".format(q)] = [] + + for model_input_file in model_input_files: + + arr = np.loadtxt(model_input_file) + arr[np.abs(arr) > abs_mask] = np.nan + for stat,func in stat_dict.items(): + records[stat].append(func(arr)) + for q in quantiles: + records["quantile_{0}".format(q)].append(np.nanquantile(arr,q)) + #scrub model input files + model_input_files = [f.replace(".","_").replace("\\","_").replace("/","_") for f in model_input_files] + df = pd.DataFrame(records,index=model_input_files) + df.index.name = "model_file" + df.to_csv("arr_par_summary.csv") + def apply_genericlist_pars(df,chunk_len=50): """a function to apply list style mult parameters From a7cf958a6a3571df5fea9a17d7b033a9e9c758fa Mon Sep 17 00:00:00 2001 From: Jeremy White Date: Tue, 16 Mar 2021 11:34:06 -0600 Subject: [PATCH 2/4] more work on arr par summary --- autotest/pst_from_tests.py | 21 ++++++++++---- pyemu/utils/helpers.py | 57 +++++++++++++++++++++++++++++++++++--- pyemu/utils/pst_from.py | 10 +++++++ 3 files changed, 79 insertions(+), 9 deletions(-) diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py index bedc6eafc..d3f4aa82f 100644 --- a/autotest/pst_from_tests.py +++ b/autotest/pst_from_tests.py @@ -1618,8 +1618,6 @@ def mf6_freyberg_varying_idomain(): a = m.dis.idomain.array[k,:,:].copy() print(a) ib[k] = a - #return - #ib[0][:2,:] = 0 tags = {"npf_k_": [0.1, 10.]}#, "npf_k33_": [.1, 10], "sto_ss": [.1, 10], "sto_sy": [.9, 1.1]} dts = pd.to_datetime("1-1-2018") + pd.to_timedelta(np.cumsum(sim.tdis.perioddata.array["perlen"]), unit="d") @@ -1644,7 +1642,14 @@ def mf6_freyberg_varying_idomain(): df = pf.add_observations("heads.csv", insfile="heads.csv.ins", index_cols="time", use_cols=list(df.columns.values), prefix="hds", ofile_sep=",") - # build pest + + pst = pf.build_pst('freyberg.pst') + os.chdir(pf.new_d) + df = pyemu.helpers.calc_arr_par_summary_stats() + os.chdir("..") + pf.post_py_cmds.append("pyemu.helpers.calc_arr_par_summary_stats()") + pf.add_observations("arr_par_summary.csv",index_cols=["model_file"],use_cols=df.columns.tolist(), + obsgp=["arr_par_summary" for _ in df.columns],prefix=["arr_par_summary" for _ in df.columns]) pst = pf.build_pst('freyberg.pst') pst.control_data.noptmax = 0 pst.write(os.path.join(pf.new_d, "freyberg.pst")) @@ -1657,6 +1662,12 @@ def mf6_freyberg_varying_idomain(): assert pst.phi < 1.0e-6 + + + + + + def xsec_test(): import numpy as np import pandas as pd @@ -2669,12 +2680,12 @@ def mf6_freyberg_arr_obs_and_headerless_test(): if __name__ == "__main__": #invest() - freyberg_test() + #freyberg_test() #freyberg_prior_build_test() #mf6_freyberg_test() #mf6_freyberg_shortnames_test() #mf6_freyberg_direct_test() - #mf6_freyberg_varying_idomain() + mf6_freyberg_varying_idomain() #xsec_test() #mf6_freyberg_short_direct_test() #tpf = TestPstFrom() diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py index 66e377ffc..e14fe261b 100644 --- a/pyemu/utils/helpers.py +++ b/pyemu/utils/helpers.py @@ -3912,7 +3912,23 @@ def apply_list_pars(): ) -def process_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.0e+20): +def calc_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.0e+20): + """read and generate summary statistics for the resulting model input arrays from + applying array par multipliers + + Args: + arr_par_file (`str`): the array multiplier key file. + abs_mask (`float`): value above which the abs of values + in the model input array is masked + + Returns: + pd.DataFrame: dataframe of summary stats for each model_file entry + + Note: + this function uses an optional "zone_file" column. If multiple zones + files are used, then zone arrays are aggregated to a single array + + """ df = pd.read_csv(arr_par_file, index_col=0) arr_pars = df.loc[df.index_cols.isna()].copy() model_input_files = df.model_file.unique() @@ -3922,22 +3938,55 @@ def process_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1. quantiles = [0.05,0.25,0.75,0.95] for stat in stat_dict.keys(): records[stat] = [] + records[stat+"_org"] = [] + records[stat + "_dif"] = [] + for q in quantiles: records["quantile_{0}".format(q)] = [] + records["quantile_{0}_org".format(q)] = [] + records["quantile_{0}_dif".format(q)] = [] for model_input_file in model_input_files: arr = np.loadtxt(model_input_file) - arr[np.abs(arr) > abs_mask] = np.nan + org_file = df.loc[df.model_file==model_input_file,"org_file"].values + + print(org_file) + org_file = org_file[0] + org_arr = np.loadtxt(org_file) + if "zone_file" in df.columns: + zone_file = df.loc[df.model_file == model_input_file,"zone_file"].dropna().unique() + if len(zone_file) > 1: + zone_arr = np.zeros_like(arr) + for zf in zone_file: + za = np.loadtxt(zf) + zone_arr[za!=0] = 1 + else: + zone_arr = np.loadtxt(zone_file[0]) + arr[zone_arr==0] = np.NaN + org_arr[zone_arr==0] = np.NaN + else: + arr[np.abs(arr) > abs_mask] = np.nan + org_arr[np.abs(arr) > abs_mask] = np.nan for stat,func in stat_dict.items(): - records[stat].append(func(arr)) + v = func(arr) + records[stat].append(v) + ov = func(org_arr) + records[stat+"_org"].append(ov) + records[stat+"_dif"].append(v-ov) for q in quantiles: - records["quantile_{0}".format(q)].append(np.nanquantile(arr,q)) + v = np.nanquantile(arr,q) + ov = np.nanquantile(org_arr,q) + records["quantile_{0}".format(q)].append(v) + records["quantile_{0}_org".format(q)].append(ov) + records["quantile_{0}_dif".format(q)].append(v-ov) + #scrub model input files model_input_files = [f.replace(".","_").replace("\\","_").replace("/","_") for f in model_input_files] df = pd.DataFrame(records,index=model_input_files) df.index.name = "model_file" df.to_csv("arr_par_summary.csv") + return df def apply_genericlist_pars(df,chunk_len=50): """a function to apply list style mult parameters diff --git a/pyemu/utils/pst_from.py b/pyemu/utils/pst_from.py index 6c3615f78..9af77a65c 100644 --- a/pyemu/utils/pst_from.py +++ b/pyemu/utils/pst_from.py @@ -1994,6 +1994,14 @@ def add_parameters( # eventually filled by PEST) to actual model files so that actual # model input file can be generated # (using helpers.apply_list_and_array_pars()) + zone_filename = None + if zone_array is not None: + #zone_filename = tpl_filename.replace(".tpl",".zone") + zone_filename = Path(str(tpl_filename).replace(".tpl", ".zone")) + self.logger.statement("saving zone array {0} for tpl file {1}".format(zone_filename,tpl_filename)) + np.savetxt(zone_filename,zone_array,fmt="%4d") + zone_filename = zone_filename.name + relate_parfiles = [] for mod_file in file_dict.keys(): mult_dict = { @@ -2018,6 +2026,8 @@ def add_parameters( mult_dict["pp_fill_value"] = 1.0 mult_dict["pp_lower_limit"] = 1.0e-10 mult_dict["pp_upper_limit"] = 1.0e+10 + if zone_filename is not None: + mult_dict["zone_file"] = zone_filename relate_parfiles.append(mult_dict) relate_pars_df = pd.DataFrame(relate_parfiles) # store on self for use in pest build etc From 9dda2a0baaab2fe22e6ef846ed8d23132e0f2a28 Mon Sep 17 00:00:00 2001 From: Jeremy White Date: Tue, 16 Mar 2021 12:06:21 -0600 Subject: [PATCH 3/4] fix for 3d zone array --- pyemu/utils/pst_from.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyemu/utils/pst_from.py b/pyemu/utils/pst_from.py index 9af77a65c..3ddd43cc7 100644 --- a/pyemu/utils/pst_from.py +++ b/pyemu/utils/pst_from.py @@ -1995,7 +1995,7 @@ def add_parameters( # model input file can be generated # (using helpers.apply_list_and_array_pars()) zone_filename = None - if zone_array is not None: + if zone_array is not None and zone_array.ndim < 3: #zone_filename = tpl_filename.replace(".tpl",".zone") zone_filename = Path(str(tpl_filename).replace(".tpl", ".zone")) self.logger.statement("saving zone array {0} for tpl file {1}".format(zone_filename,tpl_filename)) From ab36b62b17080e36b0692371dc960c80daf7b4e3 Mon Sep 17 00:00:00 2001 From: Jeremy White Date: Tue, 16 Mar 2021 14:25:31 -0600 Subject: [PATCH 4/4] tweaks to docstring --- autotest/pst_from_tests.py | 18 +++++++++++++++--- pyemu/utils/helpers.py | 10 ++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py index d3f4aa82f..140154ebc 100644 --- a/autotest/pst_from_tests.py +++ b/autotest/pst_from_tests.py @@ -1643,11 +1643,12 @@ def mf6_freyberg_varying_idomain(): prefix="hds", ofile_sep=",") - pst = pf.build_pst('freyberg.pst') + #pst = pf.build_pst('freyberg.pst') + pf.parfile_relations.to_csv(os.path.join(pf.new_d, "mult2model_info.csv")) os.chdir(pf.new_d) - df = pyemu.helpers.calc_arr_par_summary_stats() + df = pyemu.helpers.calc_array_par_summary_stats() os.chdir("..") - pf.post_py_cmds.append("pyemu.helpers.calc_arr_par_summary_stats()") + pf.post_py_cmds.append("pyemu.helpers.calc_array_par_summary_stats()") pf.add_observations("arr_par_summary.csv",index_cols=["model_file"],use_cols=df.columns.tolist(), obsgp=["arr_par_summary" for _ in df.columns],prefix=["arr_par_summary" for _ in df.columns]) pst = pf.build_pst('freyberg.pst') @@ -1661,6 +1662,17 @@ def mf6_freyberg_varying_idomain(): print(pst.phi) assert pst.phi < 1.0e-6 + df = pd.read_csv(os.path.join(pf.new_d,"mult2model_info.csv"), index_col=0) + arr_pars = df.loc[df.index_cols.isna()].copy() + model_files = arr_pars.model_file.unique() + pst.try_parse_name_metadata() + for model_file in model_files: + arr = np.loadtxt(os.path.join(pf.new_d,model_file)) + clean_name = model_file.replace(".","_").replace("\\","_").replace("/","_") + sim_val = pst.res.loc[pst.res.name.apply(lambda x: clean_name in x ),"modelled"] + sim_val = sim_val.loc[sim_val.index.map(lambda x: "mean_model_file" in x)] + print(model_file,sim_val,arr.mean()) + diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py index e14fe261b..fa5d40dcb 100644 --- a/pyemu/utils/helpers.py +++ b/pyemu/utils/helpers.py @@ -3912,14 +3912,15 @@ def apply_list_pars(): ) -def calc_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.0e+20): +def calc_array_par_summary_stats(arr_par_file="mult2model_info.csv", abs_mask=1.0e+20): """read and generate summary statistics for the resulting model input arrays from applying array par multipliers Args: arr_par_file (`str`): the array multiplier key file. abs_mask (`float`): value above which the abs of values - in the model input array is masked + in the model input array is masked. The mask is only + applied of `zone_file` column is nan and `abs_mask` is not `None` Returns: pd.DataFrame: dataframe of summary stats for each model_file entry @@ -3951,7 +3952,7 @@ def calc_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.0e+ arr = np.loadtxt(model_input_file) org_file = df.loc[df.model_file==model_input_file,"org_file"].values - print(org_file) + #print(org_file) org_file = org_file[0] org_arr = np.loadtxt(org_file) if "zone_file" in df.columns: @@ -3965,7 +3966,7 @@ def calc_arr_par_summary_stats(arr_par_file="mult2model_info.csv",abs_mask=1.0e+ zone_arr = np.loadtxt(zone_file[0]) arr[zone_arr==0] = np.NaN org_arr[zone_arr==0] = np.NaN - else: + elif abs_mask is not None: arr[np.abs(arr) > abs_mask] = np.nan org_arr[np.abs(arr) > abs_mask] = np.nan for stat,func in stat_dict.items(): @@ -4125,6 +4126,7 @@ def _process_list_file(model_file,df): else: mlts = pd.read_csv(mlt.mlt_file) # get mult index to align with org_data, + # get mult index to align with org_data, # mult idxs will always be written zero based if int # if original model files is not zero based need to add 1 add1 = int(mlt.zero_based == False)