From 7838612dafa99098a665a9712ee692c80d9436fd Mon Sep 17 00:00:00 2001 From: jwhite Date: Sun, 5 Oct 2025 14:14:59 -0600 Subject: [PATCH 01/10] starting on bound dialator --- pyemu/pst/pst_handler.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pyemu/pst/pst_handler.py b/pyemu/pst/pst_handler.py index 7be7609c..a6b1ef6d 100644 --- a/pyemu/pst/pst_handler.py +++ b/pyemu/pst/pst_handler.py @@ -3985,6 +3985,42 @@ def add_pars_as_obs(self,pst_path='.'): self.add_parameters(tpl_fname,in_fname,pst_path='.') self.add_observations(ins_fname,in_fname,pst_path='.') + def dialate_par_bounds(self,dialate_factor,center=True): + """ increase the distance between the parameter bounds while respecting the + log transformation status + + Args: + dialate_factor (varies): a factor to increase the distance between parameter + bounds. Can be a float or a dict of str-float pars. + center (bool): flag to dialate from the center point between the bounds. If + False, then the dialation is WRT the `parval1` values + """ + + if isinstance(dialate_factor,float): + temp = {} + for name in self.par_names: + temp[name] = dialate_factor + dialate_factor = temp + temp = None + self.add_transform_columns() + + par = self.parameter_data + par['dialat_factor'] = [dialate_factor.get(name,1.0) for name in par.parnme.values] + log_idx = par.partrans == "log" + if center: + par["center_point"] = ((par.parubnd_trans - par.parlbnd_trans) / 2.0) + else: + par["center_point"] = par.parval1_trans.copy() + + par["parubnd_org"] = par.parubnd.copy() + par["ubdist"] = par.parubnd_trans - par.center_point + par["parubnd"] = par.center_point + (par.ubdist * par.dialate_factor) + par["parlbnd_org"] = par.parlbnd.copy() + par["lbdist"] = par.center_point - par.parlbnd_trans + par["parubnd"] = par.center_point - (par.lbdist * par.dialate_factor) + + + def _replace_str_in_files(filelist, name_dict, file_obsparmap=None, pst_path='.'): import multiprocessing as mp From 2a6b2f34290c601cc4d1cd15cb2910ddf1e5de8d Mon Sep 17 00:00:00 2001 From: jwhite Date: Mon, 6 Oct 2025 18:59:22 +0200 Subject: [PATCH 02/10] added range obs --- pyemu/pst/pst_handler.py | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/pyemu/pst/pst_handler.py b/pyemu/pst/pst_handler.py index a6b1ef6d..a29d1182 100644 --- a/pyemu/pst/pst_handler.py +++ b/pyemu/pst/pst_handler.py @@ -3952,7 +3952,7 @@ def rename_observations(self, name_dict, pst_path=".", insmap=None): file_obsparmap=insmap, pst_path=pst_path) - def add_pars_as_obs(self,pst_path='.'): + def add_pars_as_obs(self,pst_path='.',par_sigma_range=4): """add all parameter values as observation values by creating a new template and instruction file and adding them to the control file @@ -3960,6 +3960,9 @@ def add_pars_as_obs(self,pst_path='.'): pst_path (str): the path to the control file from where python is running. Default is "." (python is running in the same directory as the control file) + par_sigma_range (int): number of standard deviations implied by the + distance between the parameter bounds. Used to set the weights + for the range observations """ @@ -3983,7 +3986,23 @@ def add_pars_as_obs(self,pst_path='.'): for name in parval1.index: f.write("l1 ~,~ !{0}!\n".format(name)) self.add_parameters(tpl_fname,in_fname,pst_path='.') - self.add_observations(ins_fname,in_fname,pst_path='.') + df = self.add_observations(ins_fname,in_fname,pst_path='.') + self.add_transform_columns() + obs = self.observation_data + par = self.parameter_data + if "greater_than" not in obs.columns: + obs["greater_than"] = np.nan + if "less_than" not in obs.columns: + obs["less_than"] = np.nan + + obs.loc[df.obsnme,"greater_than"] = par.loc[df.obsnme,"parlbnd"] + obs.loc[df.obsnme,"less_than"] = par.loc[df.obsnme,"parubnd"] + + log_idx = par.loc[df.obsnme,"partrans"] == "log" + stdev = (par.loc[df.obsnme,"parubnd_trans"] - par.loc[df.obsnme,"parlbnd_trans"]) / par_sigma_range + stdev.loc[log_idx] = 10**stdev.loc[log_idx] + obs.loc[df.obsnme,"weight"] = 1.0 / stdev.values + def dialate_par_bounds(self,dialate_factor,center=True): """ increase the distance between the parameter bounds while respecting the @@ -3993,7 +4012,7 @@ def dialate_par_bounds(self,dialate_factor,center=True): dialate_factor (varies): a factor to increase the distance between parameter bounds. Can be a float or a dict of str-float pars. center (bool): flag to dialate from the center point between the bounds. If - False, then the dialation is WRT the `parval1` values + False, then the dialation is from the `parval1` values """ if isinstance(dialate_factor,float): @@ -4007,19 +4026,22 @@ def dialate_par_bounds(self,dialate_factor,center=True): par = self.parameter_data par['dialat_factor'] = [dialate_factor.get(name,1.0) for name in par.parnme.values] log_idx = par.partrans == "log" + par["bnd_center"] = ((par.parubnd_trans - par.parlbnd_trans) / 2.0) if center: - par["center_point"] = ((par.parubnd_trans - par.parlbnd_trans) / 2.0) + par["center_point"] = par["bnd_center"] else: par["center_point"] = par.parval1_trans.copy() par["parubnd_org"] = par.parubnd.copy() - par["ubdist"] = par.parubnd_trans - par.center_point + par["ubdist"] = par.parubnd_trans - par.bnd_center par["parubnd"] = par.center_point + (par.ubdist * par.dialate_factor) - par["parlbnd_org"] = par.parlbnd.copy() - par["lbdist"] = par.center_point - par.parlbnd_trans - par["parubnd"] = par.center_point - (par.lbdist * par.dialate_factor) - + par["parlbnd_org"] = par.parlbnd.copy() + par["lbdist"] = par.bnd_center - par.parlbnd_trans + par["parlbnd"] = par.center_point - (par.lbdist * par.dialate_factor) + + par.loc[log_idx,"parubnd"] = 10**par.loc[log_idx,"parubnd"] + par.loc[log_idx,"parlbnd"] = 10**par.loc[log_idx,"parlbnd"] def _replace_str_in_files(filelist, name_dict, file_obsparmap=None, pst_path='.'): From 7e15b8ba556414eb5e54a3d9ba90ed0cd2ce30c6 Mon Sep 17 00:00:00 2001 From: jwhite Date: Tue, 7 Oct 2025 06:03:57 +0200 Subject: [PATCH 03/10] more work on and testing for relaxed par bound formulation --- autotest/pst_from_tests.py | 28 ++++++++++++++++--- autotest/pst_tests.py | 55 ++++++++++++++++++++++++++++++++++++-- autotest/pst_tests_2.py | 5 ++++ pyemu/pst/pst_handler.py | 4 +-- 4 files changed, 84 insertions(+), 8 deletions(-) diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py index dc916a9b..4a5910eb 100644 --- a/autotest/pst_from_tests.py +++ b/autotest/pst_from_tests.py @@ -6385,8 +6385,8 @@ def xsec_pars_as_obs_test(tmp_path): sr = m.modelgrid t_d = "template_xsec" pf = pyemu.utils.PstFrom(tmp_model_ws,t_d,remove_existing=True,spatial_reference=sr) - pf.add_parameters("hk_Layer_1.ref",par_type="grid",par_style="direct",upper_bound=25, - lower_bound=0.25) + pf.add_parameters("hk_Layer_1.ref",par_type="grid",par_style="direct",upper_bound=3, + lower_bound=2,transform="none") pf.add_parameters("hk_Layer_1.ref", par_type="grid", par_style="multiplier", upper_bound=10.0, lower_bound=0.1) @@ -6404,17 +6404,37 @@ def xsec_pars_as_obs_test(tmp_path): pf.mod_sys_cmds.append("mfnwt {0}".format(nam_file)) pst = pf.build_pst(None) - pst.add_pars_as_obs(pst_path=t_d) + par_sigma_range = 1 + pst.add_pars_as_obs(pst_path=t_d,par_sigma_range=par_sigma_range) diff = set(pst.par_names) - set(pst.obs_names) print(diff) assert len(diff) == 0 - pst.write(os.path.join(t_d,"pest.pst")) + pst.write(os.path.join(t_d,"pest.pst"),version=2) pyemu.os_utils.run("{0} {1}".format(ies_exe_path,"pest.pst"),cwd=t_d) pst = pyemu.Pst(os.path.join(t_d,"pest.pst")) print(pst.phi) assert np.isclose(pst.phi, 0), pst.phi + par = pst.parameter_data + obs = pst.observation_data.loc[par.parnme,:] + assert len(par) == len(obs) + print(obs) + assert np.isclose(np.abs((obs["less_than"] - par["parubnd"]).sum()),0) + assert np.isclose(np.abs((obs["greater_than"] - par["parlbnd"]).sum()), 0) + + logidx = par.partrans == "log" + obs.loc[logidx,"greater_than"] = np.log10(obs.loc[logidx,"greater_than"]) + obs.loc[logidx, "less_than"] = np.log10(obs.loc[logidx, "less_than"]) + obs["dist"] = obs.less_than - obs.greater_than + print(obs.dist) + obs.dist / par_sigma_range + obs.loc[logidx,"dist"] = 10**(obs.loc[logidx,"dist"]) + weight = 1/(obs.dist/par_sigma_range) + print(weight) + print(obs.weight) + assert np.isclose(np.abs(weight-obs.weight).sum(),0) + if __name__ == "__main__": diff --git a/autotest/pst_tests.py b/autotest/pst_tests.py index f0df4bcb..2b56d5dd 100644 --- a/autotest/pst_tests.py +++ b/autotest/pst_tests.py @@ -112,6 +112,10 @@ def res_test(tmp_path): def pst_manip_test(tmp_path): import os + + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + os.makedirs(tmp_path) from pyemu import Pst pst_dir = os.path.join("pst") org_path = os.path.join(pst_dir, "pest.pst") @@ -127,9 +131,46 @@ def pst_manip_test(tmp_path): new_pst = Pst(new_path) assert all(new_pst.observation_data.obsval == pst.observation_data.obsval) assert all(new_pst.parameter_data.parval1 == pst.parameter_data.parval1) - + org_par = pst.parameter_data.copy() + pst.dialate_par_bounds(1.0) + diff = np.abs(org_par.parubnd - pst.parameter_data.parubnd).sum() + assert diff < 1e-7 + diff = np.abs(org_par.parlbnd - pst.parameter_data.parlbnd).sum() + assert diff < 1e-7 + pst.dialate_par_bounds(1.0) + diff = np.abs(org_par.parubnd - pst.parameter_data.parubnd).sum() + assert diff < 1e-7 + diff = np.abs(org_par.parlbnd - pst.parameter_data.parlbnd).sum() + assert diff < 1e-7 + pst.dialate_par_bounds(2.0) + print(pst.parameter_data.loc[:,["parubnd","parubnd_org"]]) + pst.write(new_path) + pst = Pst(new_path) + pst.write(new_path, version=2) + new_pst = Pst(new_path) + new_par = new_pst.parameter_data + assert np.all(new_par.parubnd.values > org_par.parubnd.values) + assert np.all(new_par.parlbnd.values < org_par.parlbnd.values) + + pst.dialate_par_bounds(0.5) + new_par = pst.parameter_data + #print(new_par.parubnd) + #print(org_par.parubnd) + assert np.isclose(np.abs(new_par.parubnd - org_par.parubnd).sum(),0) + assert np.isclose(np.abs(new_par.parlbnd - org_par.parlbnd).sum(), 0) + + pst.parameter_data["parval1"] = pst.parameter_data.parubnd + pst.dialate_par_bounds(2.0) + pst.dialate_par_bounds(0.5) + new_par = pst.parameter_data + assert np.isclose(np.abs(new_par.parubnd - org_par.parubnd).sum(), 0) + assert np.isclose(np.abs(new_par.parlbnd - org_par.parlbnd).sum(), 0) def load_test(tmp_path): + import os + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + os.makedirs(tmp_path) from pyemu import Pst pst_dir = setup_tmp("pst", tmp_path) # just testing all sorts of different pst files @@ -177,6 +218,14 @@ def load_test(tmp_path): exceptions.append(pst_file + " v2 reload fail: " + str(e)) continue + org_par = p.parameter_data.copy() + p.dialate_par_bounds(1.0) + diff = np.abs(org_par.parubnd - p.parameter_data.parubnd).sum() + assert diff < 1e-5 + diff = np.abs(org_par.parlbnd - p.parameter_data.parlbnd).sum() + print(diff) + assert diff < 1e-5 + # with open("load_fails.txt",'w') as f: # [f.write(pst_file+'\n') for pst_file in load_fails] @@ -1533,7 +1582,9 @@ def interface_check_test(): with this. """ d = 'temp' - parrep_test(d) + #load_test(d) + pst_manip_test(d) + #parrep_test(d) #interface_check_test() # new_format_test_2() #write2_nan_test() diff --git a/autotest/pst_tests_2.py b/autotest/pst_tests_2.py index de115f1e..e7a5d19c 100644 --- a/autotest/pst_tests_2.py +++ b/autotest/pst_tests_2.py @@ -1098,6 +1098,11 @@ def results_mou_1_test(): #print(df) assert df is not None +def dialate_bound_test(): + import pyemu + + + if __name__ == "__main__": results_ies_3_test() results_ies_1_test() diff --git a/pyemu/pst/pst_handler.py b/pyemu/pst/pst_handler.py index a29d1182..92cc6ef8 100644 --- a/pyemu/pst/pst_handler.py +++ b/pyemu/pst/pst_handler.py @@ -4024,9 +4024,9 @@ def dialate_par_bounds(self,dialate_factor,center=True): self.add_transform_columns() par = self.parameter_data - par['dialat_factor'] = [dialate_factor.get(name,1.0) for name in par.parnme.values] + par['dialate_factor'] = [dialate_factor.get(name,1.0) for name in par.parnme.values] log_idx = par.partrans == "log" - par["bnd_center"] = ((par.parubnd_trans - par.parlbnd_trans) / 2.0) + par["bnd_center"] = par.parlbnd_trans + ((par.parubnd_trans - par.parlbnd_trans) / 2.0) if center: par["center_point"] = par["bnd_center"] else: From ab385a280c714c471fd91882c88571b1e4b4248e Mon Sep 17 00:00:00 2001 From: jwhite Date: Tue, 7 Oct 2025 09:45:22 +0200 Subject: [PATCH 04/10] check in --- pyemu/pst/pst_handler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyemu/pst/pst_handler.py b/pyemu/pst/pst_handler.py index 92cc6ef8..c00dffe2 100644 --- a/pyemu/pst/pst_handler.py +++ b/pyemu/pst/pst_handler.py @@ -4002,6 +4002,7 @@ def add_pars_as_obs(self,pst_path='.',par_sigma_range=4): stdev = (par.loc[df.obsnme,"parubnd_trans"] - par.loc[df.obsnme,"parlbnd_trans"]) / par_sigma_range stdev.loc[log_idx] = 10**stdev.loc[log_idx] obs.loc[df.obsnme,"weight"] = 1.0 / stdev.values + obs.loc[df.obsnme,"obgnme"] = "parbounds" def dialate_par_bounds(self,dialate_factor,center=True): @@ -4012,7 +4013,8 @@ def dialate_par_bounds(self,dialate_factor,center=True): dialate_factor (varies): a factor to increase the distance between parameter bounds. Can be a float or a dict of str-float pars. center (bool): flag to dialate from the center point between the bounds. If - False, then the dialation is from the `parval1` values + False, then the dialation is from the `parval1` values. Beware that using + center False can have produce the some strange results... """ if isinstance(dialate_factor,float): From d38ba05a990f644d3771b641c448552f366c689e Mon Sep 17 00:00:00 2001 From: jwhite Date: Tue, 7 Oct 2025 10:28:21 +0200 Subject: [PATCH 05/10] added notebook to demo how to relax the parameter bound constraints --- examples/pstfrom_relaxed.ipynb | 595 +++++++++++++++++++++++++++++++++ 1 file changed, 595 insertions(+) create mode 100644 examples/pstfrom_relaxed.ipynb diff --git a/examples/pstfrom_relaxed.ipynb b/examples/pstfrom_relaxed.ipynb new file mode 100644 index 00000000..efef8bf6 --- /dev/null +++ b/examples/pstfrom_relaxed.ipynb @@ -0,0 +1,595 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setting up a PEST interface from MODFLOW6 using the `PstFrom` class with `PyPestUtils` for advanced pilot point parameterization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import shutil\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import pyemu\n", + "import flopy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append(os.path.join(\"..\",\"..\",\"pypestutils\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pypestutils as ppu" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "An existing MODFLOW6 model is in the directory `freyberg_mf6`. Lets check it out:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "org_model_ws = os.path.join('freyberg_mf6')\n", + "os.listdir(org_model_ws)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can see that all the input array and list data for this model have been written \"externally\" - this is key to using the `PstFrom` class. \n", + "\n", + "Let's quickly viz the model top just to remind us of what we are dealing with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "id_arr = np.loadtxt(os.path.join(org_model_ws,\"freyberg6.dis_idomain_layer3.txt\"))\n", + "top_arr = np.loadtxt(os.path.join(org_model_ws,\"freyberg6.dis_top.txt\"))\n", + "top_arr[id_arr==0] = np.nan\n", + "plt.imshow(top_arr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's copy those files to a temporary location just to make sure we don't goof up those original files:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tmp_model_ws = \"temp_pst_from_ppu\"\n", + "if os.path.exists(tmp_model_ws):\n", + " shutil.rmtree(tmp_model_ws)\n", + "shutil.copytree(org_model_ws,tmp_model_ws)\n", + "os.listdir(tmp_model_ws)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we need just a tiny bit of info about the spatial discretization of the model - this is needed to work out separation distances between parameters for build a geostatistical prior covariance matrix later.\n", + "\n", + "Here we will load the flopy sim and model instance just to help us define some quantities later - flopy is not required to use the `PstFrom` class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sim = flopy.mf6.MFSimulation.load(sim_ws=tmp_model_ws)\n", + "m = sim.get_model(\"freyberg6\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we use the simple `SpatialReference` pyemu implements to help us spatially locate parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sr = pyemu.helpers.SpatialReference.from_namfile(\n", + " os.path.join(tmp_model_ws, \"freyberg6.nam\"),\n", + " delr=m.dis.delr.array, delc=m.dis.delc.array)\n", + "sr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can instantiate a `PstFrom` class instance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "template_ws = \"freyberg6_template\"\n", + "pf = pyemu.utils.PstFrom(original_d=tmp_model_ws, new_d=template_ws,\n", + " remove_existing=True,\n", + " longnames=True, spatial_reference=sr,\n", + " zero_based=False,start_datetime=\"1-1-2018\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Observations\n", + "\n", + "So now that we have a `PstFrom` instance, but its just an empty container at this point, so we need to add some PEST interface \"observations\" and \"parameters\". Let's start with observations using MODFLOW6 head. These are stored in `heads.csv`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(os.path.join(tmp_model_ws,\"heads.csv\"),index_col=0)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The main entry point for adding observations is (surprise) `PstFrom.add_observations()`. This method works on the list-type observation output file. We need to tell it what column is the index column (can be string if there is a header or int if no header) and then what columns contain quantities we want to monitor (e.g. \"observe\") in the control file - in this case we want to monitor all columns except the index column:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hds_df = pf.add_observations(\"heads.csv\",insfile=\"heads.csv.ins\",index_cols=\"time\",\n", + " use_cols=list(df.columns.values),prefix=\"hds\",)\n", + "hds_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that it returned a dataframe with lots of useful info: the observation names that were formed (`obsnme`), the values that were read from `heads.csv` (`obsval`) and also some generic weights and group names. At this point, no control file has been created, we have simply prepared to add this observations to the control file later. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "[f for f in os.listdir(template_ws) if f.endswith(\".ins\")]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nice! We also have a PEST-style instruction file for those obs.\n", + "\n", + "Now lets do the same for SFR observations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(os.path.join(tmp_model_ws, \"sfr.csv\"), index_col=0)\n", + "sfr_df = pf.add_observations(\"sfr.csv\", insfile=\"sfr.csv.ins\", index_cols=\"time\", use_cols=list(df.columns.values))\n", + "sfr_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sweet as! Now that we have some observations, let's add parameters!\n", + "\n", + "## Pilot points and `PyPestUtils`\n", + "\n", + "This notebook is mostly meant to demonstrate some advanced pilot point parameterization that is possible with `PyPestUtils`, so we will only focus on HK and VK pilot point parameters. This is just to keep the example short. In practice, please please please parameterize boundary conditions too!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "v = pyemu.geostats.ExpVario(contribution=1.0,a=5000,bearing=0,anisotropy=5)\n", + "pp_gs = pyemu.geostats.GeoStruct(variograms=v, transform='log')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pp_gs.plot()\n", + "print(\"spatial variogram\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's get the idomain array to use as a zone array - this keeps us from setting up parameters in inactive model cells:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ib = m.dis.idomain[0].array" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Find HK files for the upper and lower model layers (assuming model layer 2 is a semi-confining unit)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hk_arr_files = [f for f in os.listdir(tmp_model_ws) if \"npf_k_\" in f and f.endswith(\".txt\") and \"layer2\" not in f]\n", + "hk_arr_files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for arr_file in hk_arr_files:\n", + " tag = arr_file.split('.')[1].replace(\"_\",\"-\")\n", + " pf.add_parameters(filenames=arr_file,par_type=\"pilotpoints\",\n", + " par_name_base=tag,pargp=tag,zone_array=ib,\n", + " upper_bound=10.,lower_bound=0.1,ult_ubound=100,ult_lbound=0.01,\n", + " pp_options={\"pp_space\":3},geostruct=pp_gs)\n", + " #let's also add the resulting hk array that modflow sees as observations\n", + " # so we can make easy plots later...\n", + " pf.add_observations(arr_file,prefix=tag,\n", + " obsgp=tag,zone_array=ib)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you are familiar with how `PstFrom` has worked historically, we handed off the process to solve for the factor file (which requires solving the kriging equations for each active node) to a pure python (well, with pandas and numpy). This was ok for toy models, but hella slow for big ugly models. If you look at the log entries above, you should see that the instead, `PstFrom` successfully handed off the solve to `PyPestUtils`, which is exponentially faster for big models. sweet ez! " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tpl_files = [f for f in os.listdir(template_ws) if f.endswith(\".tpl\")]\n", + "tpl_files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(os.path.join(template_ws,tpl_files[0]),'r') as f:\n", + " for _ in range(2):\n", + " print(f.readline().strip())\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "So those might look like pretty redic parameter names, but they contain heaps of metadata to help you post process things later..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So those are you standard pilot points for HK in layer 1 - same as it ever was..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### build the control file, pest interface files, and forward run script\n", + "At this point, we have some parameters and some observations, so we can create a control file:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.mod_sys_cmds.append(\"mf6\")\n", + "pf.pre_py_cmds.insert(0,\"import sys\")\n", + "pf.pre_py_cmds.insert(1,\"sys.path.append(os.path.join('..','..','..','pypestutils'))\")\n", + "pf.build_pst()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = [print(line.rstrip()) for line in open(os.path.join(template_ws,\"forward_run.py\"))]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setting initial parameter bounds and values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, just for fun, lets push some initial parameter values (the `parval1` quantities) to their upper bounds before drawing the ensemble. This will result in some ugly prior draws with values \"stacked\" at the upper value. This is meant to mimic the situation where the initial parameter values arent \"centered\" WRT the bounds. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "par = pf.pst.parameter_data\n", + "par.pname.unique()\n", + "hk1par = par.loc[par.pname.str.contains(\"-layer1\"),:]\n", + "assert hk1par.shape[0] > 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "par.loc[hk1par.parnme,\"parval1\"] = hk1par.parubnd.values - (hk1par.parubnd.values*0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "par.loc[hk1par.parnme,:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.pst.write(os.path.join(pf.new_d,\"pest_org.pst\"),version=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generating a prior parameter ensemble, then run and viz a real" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(122341)\n", + "pe = pf.draw(num_reals=100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pe_org = pe._df.copy()\n", + "pe.enforce()\n", + "fig,ax = plt.subplots(1,1)\n", + "pe.loc[:,hk1par.parnme[0]]._df.plot(kind=\"hist\",ax=ax,fc=\"0.5\",density=True,alpha=0.5)\n", + "pe_org.loc[:,hk1par.parnme[0]].plot(kind=\"hist\",ax=ax,fc=\"b\",density=True,alpha=0.5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Yikes! This happens a lot...what can we do about? Well, in the optimization world, there is the idea that you can \"relax\" constraints by changing them from strict \"thou shalt not\" quantities to penalties, which express more of a desire or preference. In practice, how super sure are we that the bounds on HK are really the max and min values we are willing accept? Maybe we would tolerate some transgressions across these bounds? If you are in this camp, here is a way to \"relax\" the problem.\n", + "\n", + "First, we need to add the parameter values as observations, so that each time we run the model, we record the parameter values as output quantities we are monitoring:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.pst.add_pars_as_obs(pst_path=pf.new_d)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see what we added:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.pst.observation_data.tail()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "See how we now have observations that match the parameter names, but with \"greater_than\" and \"less_than\" column values and that they are set to the current lower and upper parameter bounds, respectively? PESTPP-IES treats these observations as \"range observations\" or double-inequality values so that any value between these limits is accepted without penalty. Notice also that the weights have been set as proportional to the distance between the bounds - this results in the penalty for values outside of the acceptable range being proportional to the standard deviation implied by the distance between the bounds - nice! \n", + "\n", + "Now, we can increase the distance between bounds...there is a method for that:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.pst.dialate_par_bounds(dialate_factor=2.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.pst.parameter_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(122341)\n", + "pe_dialated = pf.draw(num_reals=100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pe_dialated.enforce()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig,axes = plt.subplots(3,1,sharex=True)\n", + "pe.loc[:,hk1par.parnme[0]]._df.plot(kind=\"hist\",ax=axes[0],fc=\"0.5\",density=True,alpha=0.5)\n", + "axes[0].set_title(\"original with enforcement - yuck!\")\n", + "pe_org.loc[:,hk1par.parnme[0]].plot(kind=\"hist\",ax=axes[1],fc=\"b\",density=True,alpha=0.5)\n", + "axes[1].set_title(\"original without enforcement\")\n", + "pe_dialated.loc[:,hk1par.parnme[0]]._df.plot(kind=\"hist\",ax=axes[2],fc=\"m\",density=True,alpha=0.5)\n", + "axes[2].set_title(\"dilated bounds with enforcement\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 21990e2be613956b227db2441fac89e4594079cb Mon Sep 17 00:00:00 2001 From: jwhite Date: Tue, 14 Oct 2025 10:43:37 +0200 Subject: [PATCH 06/10] more work on choosing a pst version to write --- pyemu/pst/pst_handler.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/pyemu/pst/pst_handler.py b/pyemu/pst/pst_handler.py index c00dffe2..897a65d0 100644 --- a/pyemu/pst/pst_handler.py +++ b/pyemu/pst/pst_handler.py @@ -1898,10 +1898,7 @@ def write(self, new_filename, version=None, check_interface=False): print(vstring) if version is None: - if self.npar > 10000: - version = 2 - else: - version = 1 + version = self._decide_version() if version == 1: return self._write_version1(new_filename=new_filename) @@ -1912,6 +1909,22 @@ def write(self, new_filename, version=None, check_interface=False): "Pst.write() error: version must be 1 or 2, not '{0}'".format(version) ) + def _decide_version(self): + if self.npar > 10000: + return 2 + if self.nobs > 10000: + return 2 + key_cols = ["standard_deviation","upper_bound","lower_bound", + "cycle", "state_par_link","drop_violations", + "greater_than","less_than","link_to"] + for col in key_cols: + if col in self.parameter_data.columns: + return 2 + if col in self.observation_data.columns: + return 2 + + return 1 + def _rectify_parchglim(self): """private method to just fix the parchglim vs cross zero issue""" par = self.parameter_data From 330eff5890032693194cd868334c326dd3f782fc Mon Sep 17 00:00:00 2001 From: jwhite Date: Wed, 29 Oct 2025 16:55:19 -0600 Subject: [PATCH 07/10] added pe.draw_new_ensemble() method and test --- autotest/en_tests.py | 65 +++++++++++++++++++++++++++++++++- pyemu/en.py | 84 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 143 insertions(+), 6 deletions(-) diff --git a/autotest/en_tests.py b/autotest/en_tests.py index b47c7af5..41217e34 100644 --- a/autotest/en_tests.py +++ b/autotest/en_tests.py @@ -769,7 +769,70 @@ def mixed_par_draw_2_test(): assert pst.npar == npar +def draw_new_test(): + + import os + import numpy as np + import pyemu + pst = pyemu.Pst(os.path.join("en", "pest.pst")) + cov = pyemu.Cov.from_binary(os.path.join("en", "cov.jcb")) + print(pst.npar, cov.shape) + num_reals = 10000 + + pe = pyemu.ParameterEnsemble.from_gaussian_draw(pst, cov=cov, num_reals=num_reals, factor="cholesky") + + sub_pe = pe.iloc[:10000,:] + + new_pe_nonoise = sub_pe.draw_new_ensemble(num_reals=num_reals) + new_pe_stdnoise = sub_pe.draw_new_ensemble(num_reals=num_reals,include_noise=True) + new_pe_usernoise = sub_pe.draw_new_ensemble(num_reals=num_reals,include_noise=1./np.sqrt(sub_pe.shape[0])) + new_pe_ennoise = sub_pe.draw_new_ensemble(num_reals=num_reals,include_noise=True,noise_reals=pe) + + #pes = [pe,sub_pe,new_pe_nonoise,new_pe_stdnoise,new_pe_usernoise,new_pe_ennoise] + colors = ["r","y","b","g","m","c"] + + pes = [pe,new_pe_stdnoise,new_pe_usernoise,new_pe_ennoise] + for ppe in pes: + ppe.transform() + #ppe.enforce() + + for pname in pst.adj_par_names: + std = [ppe.loc[:,pname].std() for ppe in pes] + mean = [ppe.loc[:,pname].mean() for ppe in pes] + sdiff = [np.abs(std[0] - s) for s in std[1:]] + + print(pname,max(sdiff)) + assert max(sdiff) < 0.05 + + # pst.add_transform_columns() + # ubnd = pst.parameter_data.parubnd_trans.to_dict() + # lbnd = pst.parameter_data.parlbnd_trans.to_dict() + + + # import matplotlib.pyplot as plt + # from matplotlib.backends.backend_pdf import PdfPages + # with PdfPages("check.pdf") as pdf: + # for pname in pst.adj_par_names: + # fig,ax = plt.subplots(1,1,figsize=(10,10)) + # for ppe,c in zip(pes,colors): + + # ax.hist(ppe.loc[:,pname],fc=c,alpha=0.5,bins=10,density=True) + # ylim = ax.get_ylim() + # ax.plot([ubnd[pname],ubnd[pname]],ylim,"k--",lw=3) + # ax.plot([lbnd[pname],lbnd[pname]],ylim,"k--",lw=3) + + # ax.grid() + # ax.set_title(pname) + # plt.tight_layout() + # pdf.savefig() + # plt.close(fig) + # print(pname) + + + + if __name__ == "__main__": + draw_new_test() #par_gauss_draw_consistency_test() #obs_gauss_draw_consistency_test() # phi_vector_test() @@ -784,7 +847,7 @@ def mixed_par_draw_2_test(): # uniform_draw_test() #fill_test() #factor_draw_test() - emp_cov_test() + #emp_cov_test() #emp_cov_draw_test() #mixed_par_draw_2_test() #binary_test() diff --git a/pyemu/en.py b/pyemu/en.py index 5ed6cb6d..22e249a5 100644 --- a/pyemu/en.py +++ b/pyemu/en.py @@ -225,11 +225,11 @@ def __getattr__(self, item): pst=self.pst, df=lhs, istransformed=self.istransformed ) elif "DataFrame" in str(lhs): - warnings.warn( - "return type uncaught, losing Ensemble type, returning DataFrame", - PyemuWarning, - ) - print("return type uncaught, losing Ensemble type, returning DataFrame") + #warnings.warn( + # "return type uncaught, losing Ensemble type, returning DataFrame", + # PyemuWarning, + #) + #rint("return type uncaught, losing Ensemble type, returning DataFrame") return lhs else: return lhs @@ -1396,6 +1396,80 @@ def from_parfiles(cls, pst, parfile_names, real_names=None): return ParameterEnsemble(pst=pst, df=df_all) + def draw_new_ensemble(self,num_reals,include_noise=True,noise_reals=None): + """Draw a new (potentially larger) ParameterEnsemble instance using the realizations + in `self`. + + Args: + num_reals (int) : number of realizations to generate + include_noise (varies): a bool or a float the describes the standard deviation of + noise to add to the new realizations. This is to help with the issue of + under-varied new realizations resulting from npar >> nreals in `self`. If True, + The standard devation is set to one over the square root on number of reals in + `self`. + noise_reals (ParameterEnsemble): other existing realizations (likely prior realizations) + that are used as noise realizations in place of IID noise that is used if `include_noise` + is True and `noise_reals` is None. + + Returns + ParameterEnsemble + + Note: + any fixed and/or tied parameters in self are omitted in the returned ParameterEnsemble + + """ + + back_trans = False + if not self.istransformed: + self.transform() + back_trans = True + adj_names = self.pst.adj_par_names + + proj = (self.get_deviations() * (1./(np.sqrt(self.shape[0])-1))).transpose() + proj = proj.loc[adj_names,:] + + mu_vec = self._df.loc[:,adj_names].mean() + + snv_draws = np.random.standard_normal((num_reals,self.shape[0])) + + noise = 0.0 + if include_noise is not False: + if include_noise is True: + noise = 1./np.sqrt(self.shape[0]) + else: + noise = float(include_noise) + + if noise_reals is not None: + missing = set(self.columns.to_list()) - set(noise_reals.columns) + if len(missing) > 0: + raise Exception("the following par names are not in `noise_reals`: "+",".join(missing)) + noise_real_choices = np.random.choice(noise_reals.index,num_reals) + noise_back_trans = False + if not noise_reals.istransformed: + noise_reals.transform() + noise_back_trans = True + noise_deviations = noise_reals.get_deviations() + + reals = [] + for i,snv_draw in enumerate(snv_draws): + real = mu_vec + np.dot(proj.values,snv_draw) + reals.append(real) + if noise != 0.0: + if noise_reals is None: + noise_real = np.random.normal(0.0,noise,real.shape[0]) + else: + noise_real = noise * noise_deviations.loc[noise_real_choices[i],adj_names].values + reals[-1] += noise_real + + reals = pd.DataFrame(reals,columns=adj_names,index=np.arange(num_reals)) + reals = pyemu.ParameterEnsemble(df=reals,pst=self.pst,istransformed=True) + reals.back_transform() + if back_trans: + self.back_transform() + if noise_reals is not None and noise_back_trans: + noise_reals.back_transform() + return reals + def back_transform(self): """back transform parameters with respect to `partrans` value. From 4d614d48284e4d4d95b0344e9f4051ce52cd258f Mon Sep 17 00:00:00 2001 From: jwhite Date: Thu, 30 Oct 2025 07:17:11 -0600 Subject: [PATCH 08/10] added pe.draw_new_ensemble() method and test --- autotest/en_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autotest/en_tests.py b/autotest/en_tests.py index 41217e34..a80d088e 100644 --- a/autotest/en_tests.py +++ b/autotest/en_tests.py @@ -802,7 +802,7 @@ def draw_new_test(): sdiff = [np.abs(std[0] - s) for s in std[1:]] print(pname,max(sdiff)) - assert max(sdiff) < 0.05 + assert max(sdiff) < 0.1 # pst.add_transform_columns() # ubnd = pst.parameter_data.parubnd_trans.to_dict() From b94f20916c92865ba37b7e07a645ff22ffb60f02 Mon Sep 17 00:00:00 2001 From: jwhite Date: Thu, 30 Oct 2025 07:47:20 -0600 Subject: [PATCH 09/10] added pe.draw_new_ensemble() method and test --- pyemu/en.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyemu/en.py b/pyemu/en.py index 22e249a5..37e51ce0 100644 --- a/pyemu/en.py +++ b/pyemu/en.py @@ -1462,7 +1462,7 @@ def draw_new_ensemble(self,num_reals,include_noise=True,noise_reals=None): reals[-1] += noise_real reals = pd.DataFrame(reals,columns=adj_names,index=np.arange(num_reals)) - reals = pyemu.ParameterEnsemble(df=reals,pst=self.pst,istransformed=True) + reals = ParameterEnsemble(df=reals,pst=self.pst,istransformed=True) reals.back_transform() if back_trans: self.back_transform() From 7a4f3fc391ce4b78953ca0689e1e3b4e9748a2db Mon Sep 17 00:00:00 2001 From: jwhite Date: Thu, 30 Oct 2025 07:58:01 -0600 Subject: [PATCH 10/10] dropped 3.9 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eacab181..73a03424 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] run-type: [std] test-path: ["."] include: