diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py index 6ea088d9a..fb01bad71 100644 --- a/autotest/pst_from_tests.py +++ b/autotest/pst_from_tests.py @@ -726,6 +726,9 @@ def mf6_freyberg_test(): pf.new_d, "freyberg6.sfr_packagedata_test.txt"), delim_whitespace=True, index_col=0) df.index = df.index - 1 + print(df.rhk) + print((sfr_pkgdf.set_index('rno').loc[df.index, 'rhk'] * + sfr_pars.set_index('#rno').loc[df.index, 'parval1'])) assert np.isclose( df.rhk, (sfr_pkgdf.set_index('rno').loc[df.index, 'rhk'] * sfr_pars.set_index('#rno').loc[df.index, 'parval1'])).all() @@ -1118,9 +1121,183 @@ def mf6_freyberg_da_test(): assert len(mults_not_linked_to_pst) == 0, print(mults_not_linked_to_pst) +def mf6_freyberg_direct_test(): + + import numpy as np + import pandas as pd + pd.set_option('display.max_rows', 500) + pd.set_option('display.max_columns', 500) + pd.set_option('display.width', 1000) + try: + import flopy + except: + return + + org_model_ws = os.path.join('..', 'examples', 'freyberg_mf6') + tmp_model_ws = "temp_pst_from_direct" + if os.path.exists(tmp_model_ws): + shutil.rmtree(tmp_model_ws) + os.mkdir(tmp_model_ws) + sim = flopy.mf6.MFSimulation.load(sim_ws=org_model_ws) + # sim.set_all_data_external() + sim.simulation_data.mfpath.set_sim_path(tmp_model_ws) + # sim.set_all_data_external() + m = sim.get_model("freyberg6") + sim.set_all_data_external() + sim.write_simulation() + + # to by pass the issues with flopy + # shutil.copytree(org_model_ws,tmp_model_ws) + # sim = flopy.mf6.MFSimulation.load(sim_ws=org_model_ws) + # m = sim.get_model("freyberg6") + + # SETUP pest stuff... + os_utils.run("{0} ".format("mf6"), + cwd=tmp_model_ws) + + template_ws = "new_temp_direct" + sr = m.modelgrid + # set up PstFrom object + pf = PstFrom(original_d=tmp_model_ws, new_d=template_ws, + remove_existing=True, + longnames=True, spatial_reference=sr, + zero_based=False, start_datetime="1-1-2018") + # obs + # using tabular style model output + # (generated by pyemu.gw_utils.setup_hds_obs()) + # pf.add_observations('freyberg.hds.dat', insfile='freyberg.hds.dat.ins2', + # index_cols='obsnme', use_cols='obsval', prefix='hds') + + df = pd.read_csv(os.path.join(tmp_model_ws, "sfr.csv"), index_col=0) + pf.add_observations("sfr.csv", insfile="sfr.csv.ins", index_cols="time", use_cols=list(df.columns.values)) + v = pyemu.geostats.ExpVario(contribution=1.0, a=1000) + gr_gs = pyemu.geostats.GeoStruct(variograms=v,transform="log") + rch_temporal_gs = pyemu.geostats.GeoStruct(variograms=pyemu.geostats.ExpVario(contribution=1.0, a=60)) + pf.extra_py_imports.append('flopy') + ib = m.dis.idomain[0].array + tags = {"npf_k_": [0.1, 10.], "npf_k33_": [.1, 10], "sto_ss": [.1, 10], "sto_sy": [.9, 1.1], + "rch_recharge": [.5, 1.5]} + dts = pd.to_datetime("1-1-2018") + pd.to_timedelta(np.cumsum(sim.tdis.perioddata.array["perlen"]), unit="d") + print(dts) + for tag, bnd in tags.items(): + lb, ub = bnd[0], bnd[1] + arr_files = [f for f in os.listdir(tmp_model_ws) if tag in f and f.endswith(".txt")] + if "rch" in tag: + for arr_file in arr_files: + pf.add_parameters(filenames=arr_file, par_type="grid", par_name_base="rch_gr", + pargp="rch_gr", zone_array=ib, upper_bound=1.0e-3, lower_bound=1.0e-7, + geostruct=gr_gs,par_style="direct") + + for arr_file in arr_files: + kper = int(arr_file.split('.')[1].split('_')[-1]) - 1 + pf.add_parameters(filenames=arr_file, par_type="constant", + par_name_base=arr_file.split('.')[1] + "_cn", + pargp="rch_const", zone_array=ib, upper_bound=ub, lower_bound=lb, + geostruct=rch_temporal_gs, + datetime=dts[kper]) + else: + for arr_file in arr_files: + pf.add_parameters(filenames=arr_file, par_type="grid", par_name_base=arr_file.split('.')[1] + "_gr", + pargp=arr_file.split('.')[1] + "_gr", zone_array=ib, upper_bound=ub, + lower_bound=lb, + geostruct=gr_gs) + + + list_files = ["freyberg6.wel_stress_period_data_{0}.txt".format(t) + for t in range(1, m.nper + 1)] + list_files.sort() + for list_file in list_files: + kper = int(list_file.split(".")[1].split('_')[-1]) - 1 + #add spatially constant, but temporally correlated wel flux pars + pf.add_parameters(filenames=list_file, par_type="constant", par_name_base="twel_mlt_{0}".format(kper), + pargp="twel_mlt".format(kper), index_cols=[0, 1, 2], use_cols=[3], + upper_bound=1.5, lower_bound=0.5, datetime=dts[kper], geostruct=rch_temporal_gs) + + # add temporally indep, but spatially correlated wel flux pars + pf.add_parameters(filenames=list_file, par_type="grid", par_name_base="wel_grid_{0}".format(kper), + pargp="wel_{0}".format(kper), index_cols=[0, 1, 2], use_cols=[3], + upper_bound=0.0, lower_bound=-1000, geostruct=gr_gs,par_style="direct", + transform="none") + + list_file = "freyberg6.ghb_stress_period_data_1.txt" + pf.add_parameters(filenames=list_file, par_type="grid", par_name_base=["ghb_stage","ghb_cond"], + pargp=["ghb_stage","ghb_cond"], index_cols=[0, 1, 2], use_cols=[3,4], + upper_bound=[35,150], lower_bound=[32,50], geostruct=gr_gs, par_style="direct", + transform="none") + + + # add model run command + pf.mod_sys_cmds.append("mf6") + print(pf.mult_files) + print(pf.org_files) + + # build pest + pst = pf.build_pst('freyberg.pst') + + df = pd.read_csv(os.path.join(tmp_model_ws, "heads.csv"), index_col=0) + pf.add_observations("heads.csv", insfile="heads.csv.ins", index_cols="time", use_cols=list(df.columns.values), + prefix="hds", rebuild_pst=True) + + # test par mults are working + b_d = os.getcwd() + os.chdir(pf.new_d) + try: + pyemu.helpers.apply_list_and_array_pars( + arr_par_file="mult2model_info.csv", chunk_len=1) + except Exception as e: + os.chdir(b_d) + raise Exception(str(e)) + os.chdir(b_d) + + num_reals = 100 + pe = pf.draw(num_reals, use_specsim=True) + pe.to_binary(os.path.join(template_ws, "prior.jcb")) + assert pe.shape[1] == pst.npar_adj, "{0} vs {1}".format(pe.shape[0], pst.npar_adj) + assert pe.shape[0] == num_reals + + pst.control_data.noptmax = 0 + pst.pestpp_options["additional_ins_delimiters"] = "," + + pst.write(os.path.join(pf.new_d, "freyberg.pst")) + pyemu.os_utils.run("{0} freyberg.pst".format( + os.path.join("pestpp-ies")), cwd=pf.new_d) + + res_file = os.path.join(pf.new_d, "freyberg.base.rei") + assert os.path.exists(res_file), res_file + pst.set_res(res_file) + print(pst.phi) + assert pst.phi < 0.1, pst.phi + + + # turn direct recharge to min and direct wel to min and + # check that the model results are consistent + par = pst.parameter_data + rch_par = par.loc[par.parnme.apply(lambda x: "rch_gr" in x and "direct" in x),"parnme"] + wel_par = par.loc[par.parnme.apply(lambda x: "wel_grid" in x and "direct" in x),"parnme"] + par.loc[rch_par,"parval1"] = par.loc[rch_par,"parlbnd"] + # this should set wells to zero since they are negative values in the control file + par.loc[wel_par,"parval1"] = par.loc[wel_par,"parubnd"] + pst.write(os.path.join(pf.new_d, "freyberg.pst")) + pyemu.os_utils.run("{0} freyberg.pst".format( + os.path.join("pestpp-ies")), cwd=pf.new_d) + lst = flopy.utils.Mf6ListBudget(os.path.join(pf.new_d,"freyberg6.lst")) + flx,cum = lst.get_dataframes(diff=True) + wel_tot = flx.wel.apply(np.abs).sum() + print(flx.wel) + assert wel_tot < 1.0e-6,wel_tot + + rch_files = [f for f in os.listdir(pf.new_d) if ".rch_rechage" in f and f.endswith(".txt")] + rch_val = par.loc[rch_par,"parval1"][0] + for rch_file in rch_files: + arr = np.loadtxt(os.path.join(pf.new_d,rch_file)) + print(rch_file,rch_val,arr.mean(),arr.max(),arr.min) + if np.abs(arr.max() - rch_val) > 1.0e-6 or np.abs(arr.min() - rch_val) > 1.0e-6: + raise Exception("recharge too diff") + if __name__ == "__main__": #freyberg_test() #freyberg_prior_build_test() - mf6_freyberg_test() + #mf6_freyberg_test() #mf6_freyberg_shortnames_test() - #mf6_freyberg_da_test() \ No newline at end of file + #mf6_freyberg_da_test() + mf6_freyberg_direct_test() \ No newline at end of file diff --git a/examples/pstfrom_mf6.ipynb b/examples/pstfrom_mf6.ipynb index 794c8b678..f6c5ad748 100644 --- a/examples/pstfrom_mf6.ipynb +++ b/examples/pstfrom_mf6.ipynb @@ -623,8 +623,8 @@ "metadata": {}, "outputs": [], "source": [ - "print(pe.loc[:,'hk_layer_1_inst:0_i:0_j:1_x:375.00_y:9875.00_zone:1'])\n", - "pe.loc[:,'hk_layer_1_inst:0_i:0_j:1_x:375.00_y:9875.00_zone:1']._df.hist()" + "print(pe.loc[:,pst.adj_par_names[0]])\n", + "pe.loc[:,pst.adj_par_names[0]]._df.hist()" ] }, { @@ -757,7 +757,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.7.3" } }, "nbformat": 4, diff --git a/pyemu/prototypes/pst_from.py b/pyemu/prototypes/pst_from.py index fcc1333a4..24b44d8b3 100644 --- a/pyemu/prototypes/pst_from.py +++ b/pyemu/prototypes/pst_from.py @@ -13,6 +13,12 @@ import copy +#the tolerable percent difference (100 * (max - min)/mean) +#used when checking that constant and zone type parameters are in fact constant (within +#a given zone) +#DIRECT_PAR_PERCENT_DIFF_TOL = 1.0 + + def _get_datetime_from_str(sdt): # could be expanded if someone is feeling clever. if isinstance(sdt, str): @@ -49,12 +55,14 @@ class PstFrom(object): spatial_reference: zero_based: start_datetime: + """ # TODO: doc strings!!! def __init__(self, original_d, new_d, longnames=True, remove_existing=False, spatial_reference=None, - zero_based=True, start_datetime=None): # TODO geostruct? + zero_based=True, start_datetime=None): + # TODO geostruct? self.original_d = original_d self.new_d = new_d @@ -110,6 +118,8 @@ def __init__(self, original_d, new_d, longnames=True, self._pp_facs = {} self.pst = None + self.direct_org_files = [] + @property def parfile_relations(self): if isinstance(self._parfile_relations, list): @@ -248,8 +258,12 @@ def write_forward_run(self): if not isinstance(ilist, list): ilist = [ilist] for cmd in ilist: - self.logger.statement("forward_run line:{0}".format(cmd)) - alist.append("pyemu.os_utils.run(r'{0}')\n".format(cmd)) + new_sys_cmd = "pyemu.os_utils.run(r'{0}')\n".format(cmd) + if new_sys_cmd in alist: + self.logger.warn("sys_cmd '{0}' already in sys cmds, skipping...".format(new_sys_cmd)) + else: + self.logger.statement("forward_run line:{0}".format(new_sys_cmd)) + alist.append(new_sys_cmd) with open(os.path.join(self.new_d, self.py_run_file), 'w') as f: @@ -726,7 +740,7 @@ def add_observations(self, filename, insfile=None, for filename, sep in zip(filenames, seps): # should only ever be one but hey... self.logger.log("building insfile for tabular output file {0}" "".format(filename)) - df_temp = _get_tpl_or_ins_df(df, prefix, typ='obs', + df_temp = _get_tpl_or_ins_df(filename, df, prefix, typ='obs', index_cols=index_cols, use_cols=use_cols, longnames=self.longnames) @@ -765,7 +779,7 @@ def add_observations(self, filename, insfile=None, self.logger.log("adding observations from tabular output file") if rebuild_pst: if self.pst is not None: - self.logger.log("Adding pars to control file " + self.logger.log("Adding obs to control file " "and rewriting pst") self.build_pst(filename=self.pst.filename, update='obs') else: @@ -888,7 +902,8 @@ def add_parameters(self, filenames, par_type, zone_array=None, spatial_reference=None, geostruct=None, datetime=None, mfile_fmt='free', mfile_skip=None, ult_ubound=None, ult_lbound=None, rebuild_pst=False, - alt_inst_str='inst', comment_char=None): + alt_inst_str='inst', comment_char=None, + par_style="multiplier"): """ Add list or array style model input files to PstFrom object. This method @@ -952,17 +967,37 @@ def add_parameters(self, filenames, par_type, zone_array=None, This is not additive with `mfile_skip` option. Warning: currently comment lines within list-like tabular data will be lost. + par_style (`str`): either "multiplier" or "direct" where the former setups + up a multiplier parameter process against the exsting model input + array and the former setups a template file to write the model + input file directly. Default is "multiplier". """ # TODO need more support for temporal pars? # - As another partype using index_cols or an additional time_cols # TODO support passing par_file (i,j)/(x,y) directly where information # is not contained in model parameter file - e.g. no i,j columns + + #some checks for direct parameters + par_style = par_style.lower() + if par_style not in ["multiplier", "direct"]: + self.logger.lraise("add_parameters(): unrecognized 'style': {0}, should be either 'multiplier' or 'direct'". \ + format(par_style)) + if isinstance(filenames,str): + filenames = [filenames] + if par_style == "direct": + if len(filenames) != 1: + self.logger.lraise("add_parameters(): 'filenames' arg for 'direct' style must contain " + \ + "one and only one filename, not {0} files".format(len(filenames))) + if filenames[0] in self.direct_org_files: + self.logger.lraise("add_parameters(): original model input file '{0}' ".format(filenames[0]) + \ + " already used for 'direct' parameterization") # Default par data columns used for pst par_data_cols = pyemu.pst_utils.pst_config["par_fieldnames"] - self.logger.log("adding parameters for file(s) " - "{0}".format(str(filenames))) + self.logger.log("adding {0} type {1} style parameters for file(s) {2}".format(par_type, par_style, str(filenames))) if geostruct is not None: + if geostruct.sill != 1.0 and par_style != "multiplier": + self.logger.warn("geostruct sill != 1.0 for 'multiplier' style parameters") if geostruct.transform != transform: self.logger.warn("0) Inconsistency between " "geostruct transform and partrans.") @@ -1048,11 +1083,20 @@ def add_parameters(self, filenames, par_type, zone_array=None, par_name_store = par_name_base[0].replace(':', '') # for os filename # Define requisite filenames - mlt_filename = "{0}_{1}.csv".format(par_name_store, par_type) - # pst input file (for tpl->in pair) is multfile (in mult dir) - in_filepst = os.path.relpath(os.path.join( - self.mult_file_d, mlt_filename), self.new_d) - tpl_filename = mlt_filename + ".tpl" + if par_style == "multiplier": + mlt_filename = "{0}_{1}.csv".format(par_name_store, par_type) + # pst input file (for tpl->in pair) is multfile (in mult dir) + in_filepst = os.path.relpath(os.path.join( + self.mult_file_d, mlt_filename), self.new_d) + tpl_filename = mlt_filename + ".tpl" + in_fileabs = os.path.join(self.mult_file_d, mlt_filename) + else: + mlt_filename = np.NaN + # pst input file (for tpl->in pair) is multfile (in mult dir) + in_filepst = os.path.relpath(os.path.join( + self.original_file_d, filenames[0]), self.new_d) + tpl_filename = filenames[0] + ".tpl" + in_fileabs = os.path.join(self.new_d,in_filepst) pp_filename = None # setup placeholder variables fac_filename = None @@ -1075,15 +1119,17 @@ def add_parameters(self, filenames, par_type, zone_array=None, self.logger.log( "writing list-based template file '{0}'".format(tpl_filename)) # Generate tabular type template - also returns par data - df = write_list_tpl( - file_dict.values(), par_name_base, + dfs = [file_dict[filename] for filename in filenames] + df = write_list_tpl(filenames, + dfs, par_name_base, tpl_filename=os.path.join(self.new_d, tpl_filename), par_type=par_type, suffix='', index_cols=index_cols, use_cols=use_cols, zone_array=zone_array, gpname=pargp, longnames=self.longnames, ij_in_idx=ij_in_idx, xy_in_idx=xy_in_idx, get_xy=self.get_xy, zero_based=self.zero_based, - input_filename=os.path.join(self.mult_file_d, mlt_filename)) + input_filename=in_fileabs, + par_style=par_style) assert np.mod(len(df), len(use_cols)) == 0., ( "Parameter dataframe wrong shape for number of cols {0}" "".format(use_cols)) @@ -1108,8 +1154,7 @@ def add_parameters(self, filenames, par_type, zone_array=None, suffix='', par_type=par_type, zone_array=zone_array, shape=shp, longnames=self.longnames, get_xy=self.get_xy, fill_value=1.0, gpname=pargp, - input_filename=os.path.join(self.mult_file_d, - mlt_filename)) + input_filename=in_fileabs,par_style=par_style) self.logger.log( "writing template file" " {0} for {1}".format(tpl_filename, par_name_base)) @@ -1117,6 +1162,8 @@ def add_parameters(self, filenames, par_type, zone_array=None, elif par_type in {"pilotpoints", "pilot_points", "pilotpoint", "pilot_point", "pilot-point", "pilot-points"}: + if par_style == "direct": + self.logger.lraise("pilot points not supported for 'direct' par_style") # Setup pilotpoints for array type par files self.logger.log("setting up pilot point parameters") # finding spatial references for for setting up pilot points @@ -1271,8 +1318,7 @@ def add_parameters(self, filenames, par_type, zone_array=None, mult_dict = { "org_file": os.path.join( *os.path.split(self.original_file_d)[1:], mod_file), - "mlt_file": os.path.join( - *os.path.split(self.mult_file_d)[1:], mlt_filename), + "model_file": mod_file, "use_cols": use_cols, "index_cols": index_cols, @@ -1281,6 +1327,10 @@ def add_parameters(self, filenames, par_type, zone_array=None, "head_rows": skip_dict[mod_file], "upper_bound": ult_ubound, "lower_bound": ult_lbound} + if par_style == "multiplier": + mult_dict["mlt_file"] = os.path.join( + *os.path.split(self.mult_file_d)[1:], mlt_filename) + if pp_filename is not None: # if pilotpoint need to store more info assert fac_filename is not None, ( @@ -1568,14 +1618,14 @@ def _prep_arg_list_lengths(self, filenames, fmts=None, seps=None, return filenames, fmts, seps, skip_rows, index_cols, use_cols -def write_list_tpl(dfs, name, tpl_filename, index_cols, par_type, +def write_list_tpl(filenames, dfs, name, tpl_filename, index_cols, par_type, use_cols=None, suffix='', zone_array=None, gpname=None, longnames=False, get_xy=None, ij_in_idx=None, xy_in_idx=None, - zero_based=True, - input_filename=None): + zero_based=True,input_filename=None,par_style="multiplier"): """ Write template files for a list style input. Args: + filenames (`str` of `container` of `str`): original input filenames dfs (`pandas.DataFrame` or `container` of pandas.DataFrames): pandas representations of input file. name (`str` or container of str): parameter name prefixes. @@ -1609,17 +1659,25 @@ def write_list_tpl(dfs, name, tpl_filename, index_cols, par_type, are NOT zero-based indicies (e.g. MODFLOW row/cols). If False 1 with be subtracted from `index_cols`. input_filename (`str`): Path to input file (paired with tpl file) + par_style (`str`): either 'direct' or 'multiplier' Returns: """ # get dataframe with autogenerated parnames based on `name`, `index_cols`, # `use_cols`, `suffix` and `par_type` - df_tpl = _get_tpl_or_ins_df(dfs, name, index_cols, par_type, - use_cols=use_cols, suffix=suffix, gpname=gpname, - zone_array=zone_array, longnames=longnames, - get_xy=get_xy, ij_in_idx=ij_in_idx, - xy_in_idx=xy_in_idx, zero_based=zero_based) + if par_style == "direct": + df_tpl = _write_direct_df_tpl(filenames[0],tpl_filename,dfs[0],name,index_cols,par_type, + use_cols=use_cols,suffix=suffix,gpname=gpname, + zone_array=zone_array,longnames=longnames, + get_xy=get_xy,ij_in_idx=ij_in_idx, + xy_in_idx=xy_in_idx,zero_based=zero_based) + else: + df_tpl = _get_tpl_or_ins_df(filenames, dfs, name, index_cols, par_type, + use_cols=use_cols, suffix=suffix, gpname=gpname, + zone_array=zone_array, longnames=longnames, + get_xy=get_xy, ij_in_idx=ij_in_idx, + xy_in_idx=xy_in_idx, zero_based=zero_based) for col in use_cols: # corellations flagged using pargp df_tpl["covgp{0}".format(col)] = df_tpl.loc[:, @@ -1665,6 +1723,7 @@ def write_list_tpl(dfs, name, tpl_filename, index_cols, par_type, pargp = list( df_tpl.loc[:, ["pargp{0}".format(col) for col in use_cols]].values.flatten()) + covgp = list( df_tpl.loc[:, ["covgp{0}".format(col) for col in use_cols]].values.flatten()) @@ -1673,6 +1732,10 @@ def write_list_tpl(dfs, name, tpl_filename, index_cols, par_type, df_par = pd.DataFrame({"parnme": parnme, "pargp": pargp, 'covgp': covgp}, index=parnme) + parval_cols = [c for c in df_tpl.columns if "parval1" in str(c)] + parval = list( + df_tpl.loc[:, [pc for pc in parval_cols]].values.flatten()) + if par_type == 'grid' and 'x' in df_tpl.columns: # TODO work out if x,y needed for constant and zone pars too df_par['x'], df_par['y'] = np.concatenate( df_tpl.apply(lambda r: [[r.x, r.y] for _ in use_cols], @@ -1687,19 +1750,197 @@ def write_list_tpl(dfs, name, tpl_filename, index_cols, par_type, for use_col in use_cols: df_tpl.loc[:, use_col] = df_tpl.loc[:, use_col].apply( lambda x: "~ {0} ~".format(x)) - pyemu.helpers._write_df_tpl(filename=tpl_filename, df=df_tpl, sep=',', - tpl_marker='~') - - if input_filename is not None: - df_in = df_tpl.copy() - df_in.loc[:, use_cols] = 1.0 - df_in.to_csv(input_filename) + if par_style == "multiplier": + pyemu.helpers._write_df_tpl(filename=tpl_filename, df=df_tpl, sep=',', + tpl_marker='~') + + if input_filename is not None: + df_in = df_tpl.copy() + df_in.loc[:, use_cols] = 1.0 + df_in.to_csv(input_filename) df_par.loc[:, "tpl_filename"] = tpl_filename df_par.loc[:, "input_filename"] = input_filename + df_par.loc[:,"parval1"] = parval return df_par -def _get_tpl_or_ins_df(dfs, name, index_cols, typ, use_cols=None, +def _write_direct_df_tpl(in_filename, tpl_filename,df,name,index_cols,typ,use_cols=None, + suffix='',zone_array=None,longnames=False,get_xy=None, + ij_in_idx=None,xy_in_idx=None,zero_based=True,gpname=None): + + """ + Private method to auto-generate parameter or obs names from tabular + model files (input or output) read into pandas dataframes + Args: + tpl_filename (`str` ): template filename + df (`pandas.DataFrame`): DataFrame of list type input file + name (`str`): Parameter name prefix + index_cols (`str` or `list`): columns of dataframes to use as indicies + typ (`str`): 'constant','zone', or 'grid' used in parname generation. + If `constant`, one par is set up for each `use_cols`. + If `zone`, one par is set up for each zone for each `use_cols`. + If `grid`, one par is set up for every unique index combination + (from `index_cols`) for each `use_cols`. + use_cols (`list`): Columns to parameterise. If None, pars are set up + for all columns apart from index cols + suffix (`str`): Optional par name suffix. + zone_array (`np.ndarray`): Array defining zone divisions. + If not None and `par_type` is `grid` or `zone` it is expected that + `index_cols` provide the indicies for querying `zone_array`. + Therefore, array dimension should equal `len(index_cols)`. + longnames (`boolean`): Specify is obs/pars will be specified without the + 20/12 char restriction - recommended if using Pest++. + get_xy (`pyemu.PstFrom` method): Can be specified to get real-world xy + from `index_cols` passed (to include in obs/par name) + ij_in_idx (`list` or `array`): defining which `index_cols` contain i,j + xy_in_idx (`list` or `array`): defining which `index_cols` contain x,y + zero_based (`boolean`): IMPORTANT - pass as False if `index_cols` + are NOT zero-based indicies (e.g. MODFLOW row/cols). + If False 1 with be subtracted from `index_cols`. + + Returns: + pandas.DataFrame with paranme and pargroup define for each `use_col` + + """ + # work out the union of indices across all dfs + + sidx = set() + + didx = set(df.loc[:, index_cols].apply( + lambda x: tuple(x), axis=1)) + sidx.update(didx) + + df_ti = pd.DataFrame({"sidx": list(sidx)}, columns=["sidx"]) + # get some index strings for naming + if longnames: + j = '_' + fmt = "{0}:{1}" + if isinstance(index_cols[0], str): + inames = index_cols + else: + inames = ["idx{0}".format(i) for i in range(len(index_cols))] + else: + fmt = "{1:3}" + j = '' + if isinstance(index_cols[0], str): + inames = index_cols + else: + inames = ["{0}".format(i) for i in range(len(index_cols))] + + if not zero_based: + df_ti.loc[:, "sidx"] = df_ti.sidx.apply( + lambda x: tuple(xx - 1 for xx in x)) + df_ti.loc[:, "idx_strs"] = df_ti.sidx.apply( + lambda x: j.join([fmt.format(iname, xx) + for xx, iname in zip(x, inames)])).str.replace(' ', '') + + if get_xy is not None: + if xy_in_idx is not None: + # x and y already in index cols + df_ti[['x', 'y']] = pd.DataFrame( + df_ti.sidx.to_list()).iloc[:, xy_in_idx] + else: + df_ti.loc[:, 'xy'] = df_ti.sidx.apply(get_xy, ij_id=ij_in_idx) + df_ti.loc[:, 'x'] = df_ti.xy.apply(lambda x: x[0]) + df_ti.loc[:, 'y'] = df_ti.xy.apply(lambda x: x[1]) + + + if use_cols is None: + use_cols = [c for c in df_ti.columns if c not in index_cols] + + + direct_tpl_df = df.copy() + for iuc, use_col in enumerate(use_cols): + if not isinstance(name, str): + nname = name[iuc] + # if zone type, find the zones for each index position + else: + nname = name + if zone_array is not None and typ in ["zone", "grid"]: + if zone_array.ndim != len(index_cols): + raise Exception("get_tpl_or_ins_df() error: " + "zone_array.ndim " + "({0}) != len(index_cols)({1})" + "".format(zone_array.ndim, + len(index_cols))) + df_ti.loc[:, "zval"] = df_ti.sidx.apply( + lambda x: zone_array[x]) + + + if gpname is None or gpname[iuc] is None: + ngpname = nname + else: + if not isinstance(gpname, str): + ngpname = gpname[iuc] + else: + ngpname = gpname + df_ti.loc[:, "pargp{}".format(use_col)] = ngpname + if typ == "constant": + # one par for entire use_col column + if longnames: + df_ti.loc[:, use_col] = "{0}_use_col:{1}_{2}".format( + nname, use_col, "direct") + if suffix != '': + df_ti.loc[:, use_col] += "_{0}".format(suffix) + else: + df_ti.loc[:, use_col] = "{0}{1}".format(nname, use_col) + if suffix != '': + df_ti.loc[:, use_col] += suffix + + _check_diff(df.loc[:, use_col].values, in_filename) + df_ti.loc[:, "parval1_{0}".format(use_col)] = df.loc[:, use_col][0] + + elif typ == "zone": + # one par for each zone + if longnames: + df_ti.loc[:, use_col] = "{0}_use_col:{1}_{2}".format( + nname, use_col, "direct") + if zone_array is not None: + df_ti.loc[:, use_col] += df_ti.zval.apply( + lambda x: "_zone:{0}".format(x)) + if suffix != '': + df_ti.loc[:, use_col] += "_{0}".format(suffix) + else: + df_ti.loc[:, use_col] = "{0}{1}".format(nname, use_col) + if suffix != '': + df_ti.loc[:, use_col] += suffix + + #todo check that values are constant within zones and + #assign parval1 + raise NotImplementedError("list-based direct zone-type parameters not implemented") + + elif typ == "grid": + # one par for each index + if longnames: + df_ti.loc[:, use_col] = "{0}_use_col:{1}_{2}".format( + nname, use_col, "direct") + if zone_array is not None: + df_ti.loc[:, use_col] += df_ti.zval.apply( + lambda x: "_zone:{0}".format(x)) + df_ti.loc[:, use_col] += '_' + df_ti.idx_strs + if suffix != '': + df_ti.loc[:, use_col] += "_{0}".format(suffix) + + else: + df_ti.loc[:, use_col] = "{0}{1}".format(nname, use_col) + df_ti.loc[:, use_col] += df_ti.idx_strs + if suffix != '': + df_ti.loc[:, use_col] += suffix + + df_ti.loc[:, "parval1_{0}".format(use_col)] = df.loc[:, use_col].values + + else: + raise Exception("get_tpl_or_ins_df() error: " + "unrecognized 'typ', if not 'obs', " + "should be 'constant','zone', " + "or 'grid', not '{0}'".format(typ)) + direct_tpl_df.loc[:, use_col] = df_ti.loc[:, use_col].apply(lambda x: "~ {0} ~".format(x)).values + + pyemu.helpers._write_df_tpl(tpl_filename,direct_tpl_df,index=False,header=False) + + return df_ti + +def _get_tpl_or_ins_df(filenames, dfs, name, index_cols, typ, use_cols=None, suffix='', zone_array=None, longnames=False, get_xy=None, ij_in_idx=None, xy_in_idx=None, zero_based=True, gpname=None): @@ -1707,6 +1948,7 @@ def _get_tpl_or_ins_df(dfs, name, index_cols, typ, use_cols=None, Private method to auto-generate parameter or obs names from tabular model files (input or output) read into pandas dataframes Args: + filenames (`str` or `list` of str`): filenames dfs (`pandas.DataFrame` or `list`): DataFrames (can be list of DataFrames) to set up parameters or observations name (`str`): Parameter name or Observation name prefix @@ -1733,7 +1975,7 @@ def _get_tpl_or_ins_df(dfs, name, index_cols, typ, use_cols=None, xy_in_idx (`list` or `array`): defining which `index_cols` contain x,y zero_based (`boolean`): IMPORTANT - pass as False if `index_cols` are NOT zero-based indicies (e.g. MODFLOW row/cols). - If False 1 with be subtracted from `index_cols`. + If False 1 with be subtracted from `index_cols`.= Returns: if `typ`==`obs`: pandas.DataFrame with index strings for setting up obs @@ -1800,11 +2042,12 @@ def _get_tpl_or_ins_df(dfs, name, index_cols, typ, use_cols=None, if typ == 'obs': return df_ti #################### RETURN if OBS - # else - # use all non-index columns if use_cols not passed + if use_cols is None: use_cols = [c for c in df_ti.columns if c not in index_cols] + # if direct, we have more to deal with... for iuc, use_col in enumerate(use_cols): + if not isinstance(name, str): nname = name[iuc] # if zone type, find the zones for each index position @@ -1819,6 +2062,7 @@ def _get_tpl_or_ins_df(dfs, name, index_cols, typ, use_cols=None, len(index_cols))) df_ti.loc[:, "zval"] = df_ti.sidx.apply( lambda x: zone_array[x]) + if gpname is None or gpname[iuc] is None: ngpname = nname else: @@ -1827,6 +2071,7 @@ def _get_tpl_or_ins_df(dfs, name, index_cols, typ, use_cols=None, else: ngpname = gpname df_ti.loc[:, "pargp{}".format(use_col)] = ngpname + df_ti.loc[:, "parval1_{0}".format(use_col)] = 1.0 if typ == "constant": # one par for entire use_col column if longnames: @@ -1882,7 +2127,7 @@ def _get_tpl_or_ins_df(dfs, name, index_cols, typ, use_cols=None, def write_array_tpl(name, tpl_filename, suffix, par_type, zone_array=None, gpname=None, shape=None, longnames=False, fill_value=1.0, - get_xy=None, input_filename=None): + get_xy=None, input_filename=None,par_style="multiplier"): """ write a template file for a 2D array. Args: @@ -1900,6 +2145,7 @@ def write_array_tpl(name, tpl_filename, suffix, par_type, zone_array=None, fill_value: get_xy: input_filename: + par_style (`str`): either 'direct' or 'multiplier' Returns: df (`pandas.DataFrame`): a dataframe with parameter information @@ -1918,15 +2164,39 @@ def write_array_tpl(name, tpl_filename, suffix, par_type, zone_array=None, raise Exception("write_array_tpl() error: shape '{0}' not 2D" "".format(str(shape))) + par_style = par_style.lower() + if par_style == "direct": + if not os.path.exists(input_filename): + raise Exception("write_grid_tpl() error: couldn't find input file "+ + " {0}, which is required for 'direct' par_style"\ + .format(input_filename)) + org_arr = np.loadtxt(input_filename) + if par_type == "grid": + pass + elif par_type == "constant": + _check_diff(org_arr,input_filename) + elif par_type == "zone": + for zval in np.unique(zone_array): + if zval < 1: + continue + zone_org_arr = org_arr.copy() + zone_org_arr[zone_array!=zval] = np.NaN + _check_diff(zone_org_arr,input_filename,zval) + elif par_style == "multiplier": + org_arr = np.ones(shape) + else: + raise Exception("write_grid_tpl() error: unrecognized 'par_style' {0} ".format(par_style) + + "should be 'direct' or 'multiplier'") + def constant_namer(i, j): if longnames: - pname = "const_{0}".format(name) + pname = "{0}_const_{1}".format(par_style,name) if suffix != '': pname += "_{0}".format(suffix) else: - pname = "{0}{1}".format(name, suffix) + pname = "{1}{2}".format(par_style[0],name, suffix) if len(pname) > 12: - raise ("constant par name too long:" + raise Exception("constant par name too long:" "{0}".format(pname)) return pname @@ -1935,19 +2205,18 @@ def zone_namer(i, j): if zone_array is not None: zval = zone_array[i, j] if longnames: - pname = "{0}_zone:{1}".format(name, zval) + pname = "{0}_{1}_zone:{2}".format(par_style,name, zval) if suffix != '': pname += "_{0}".format(suffix) else: - - pname = "{0}_zn{1}".format(name, zval) + pname = "{1}_zn{2}".format(par_style[0],name, zval) if len(pname) > 12: - raise ("zone par name too long:{0}".format(pname)) + raise Exception("zone par name too long:{0}".format(pname)) return pname def grid_namer(i, j): if longnames: - pname = "{0}_i:{1}_j:{2}".format(name, i, j) + pname = "{0}_{1}_i:{2}_j:{3}".format(par_style,name, i, j) if get_xy is not None: pname += "_x:{0:0.2f}_y:{1:0.2f}".format(*get_xy([i, j])) if zone_array is not None: @@ -1955,9 +2224,9 @@ def grid_namer(i, j): if suffix != '': pname += "_{0}".format(suffix) else: - pname = "{0}{1:03d}{2:03d}".format(name, i, j) + pname = "{1}{2:03d}{3:03d}".format(par_style[0],name, i, j) if len(pname) > 12: - raise ("grid pname too long:{0}".format(pname)) + raise Exception("grid pname too long:{0}".format(pname)) return pname if par_type == "constant": @@ -1972,6 +2241,7 @@ def grid_namer(i, j): "'{0}'".format(par_type)) parnme = [] + org_par_val_dict = {} xx, yy, ii, jj = [], [], [], [] with open(tpl_filename, 'w') as f: f.write("ptf ~\n") @@ -1989,10 +2259,13 @@ def grid_namer(i, j): pname = namer(i, j) parnme.append(pname) + org_par_val_dict[pname] = org_arr[i,j] pname = " ~ {0} ~".format(pname) + f.write(pname) f.write("\n") df = pd.DataFrame({"parnme": parnme}, index=parnme) + df.loc[:,"parval1"] = df.parnme.apply(lambda x: org_par_val_dict[x]) if par_type == 'grid': df.loc[:, 'i'] = ii df.loc[:, 'j'] = jj @@ -2001,8 +2274,8 @@ def grid_namer(i, j): df.loc[:, 'y'] = yy if gpname is None: gpname = name - df.loc[:, "pargp"] = "{0}_{1}".format( - gpname, suffix.replace('_', '')).rstrip('_') + df.loc[:, "pargp"] = "{0}_{1}_{2}".format( + gpname, suffix.replace('_', ''),par_style).rstrip('_') df.loc[:, "tpl_filename"] = tpl_filename df.loc[:, "input_filename"] = input_filename if input_filename is not None: @@ -2010,3 +2283,13 @@ def grid_namer(i, j): np.savetxt(input_filename, arr, fmt="%2.1f") return df + +def _check_diff(org_arr, input_filename, zval=None): + percent_diff = 100. * np.abs(np.nanmax(org_arr) - np.nanmin(org_arr) / np.nanmean(org_arr)) + if percent_diff > DIRECT_PAR_PERCENT_DIFF_TOL: + message = "_check_diff() error: direct par for file '{0}'".format(input_filename) + \ + "exceeds tolerance for percent difference: {2} > {3}".\ + format(percent_diff, DIRECT_PAR_PERCENT_DIFF_TOL) + if zval is not None: + message += " in zone {0}".format(zval) + raise Exception(message) \ No newline at end of file diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py index 480286ca1..92d4defdc 100644 --- a/pyemu/utils/helpers.py +++ b/pyemu/utils/helpers.py @@ -3022,6 +3022,8 @@ def _process_model_file(model_file, df): org_arr = np.loadtxt(org_file[0]) for mlt in df_mf.mlt_file: + if pd.isna(mlt): + continue mlt_data = np.loadtxt(mlt) if org_arr.shape != mlt_data.shape: raise Exception("shape of org file {}:{} differs from mlt file {}:{}".format(org_file, org_arr.shape, @@ -3343,6 +3345,7 @@ def apply_genericlist_pars(df): print("org_data shape:",org_data.shape) new_df = org_data.copy() for mlt in df_mf.itertuples(): + try: new_df = new_df.reset_index().rename( columns={'index': 'oidx'}).set_index(mlt.index_cols) @@ -3351,21 +3354,24 @@ def apply_genericlist_pars(df): print("error setting mlt index_cols: ",str(mlt.index_cols)," for new_df with cols: ",list(new_df.columns)) raise Exception("error setting mlt index_cols: "+str(e)) - mlts = pd.read_csv(mlt.mlt_file) - # get mult index to align with org_data, - # mult idxs will always be written zero based - # if original model files is not zero based need to add 1 - add1 = int(mlt.zero_based==False) - mlts.index = pd.MultiIndex.from_tuples(mlts.sidx.apply( - lambda x: tuple(add1+np.array(literal_eval(x)))), - names=mlt.index_cols) - if mlts.index.nlevels < 2: # just in case only one index col is used - mlts.index = mlts.index.get_level_values(0) - common_idx = new_df.index.intersection(mlts.index).sort_values() - mlt_cols = [str(col) for col in mlt.use_cols] - new_df.loc[common_idx, mlt_cols] = (new_df.loc[common_idx, mlt_cols] - * mlts.loc[common_idx, mlt_cols] - ).values + if not hasattr(mlt,"mlt_file") or pd.isna(mlt.mlt_file): + print("null mlt file for org_file '" + org_file + "', continuing...") + else: + mlts = pd.read_csv(mlt.mlt_file) + # get mult index to align with org_data, + # mult idxs will always be written zero based + # if original model files is not zero based need to add 1 + add1 = int(mlt.zero_based == False) + mlts.index = pd.MultiIndex.from_tuples(mlts.sidx.apply( + lambda x: tuple(add1 + np.array(literal_eval(x)))), + names=mlt.index_cols) + if mlts.index.nlevels < 2: # just in case only one index col is used + mlts.index = mlts.index.get_level_values(0) + common_idx = new_df.index.intersection(mlts.index).sort_values() + mlt_cols = [str(col) for col in mlt.use_cols] + new_df.loc[common_idx, mlt_cols] = (new_df.loc[common_idx, mlt_cols] + * mlts.loc[common_idx, mlt_cols] + ).values # bring mult index back to columns AND re-order new_df = new_df.reset_index().set_index( 'oidx')[org_data.columns].sort_index()