diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py index 170c48b4e..d7dc0b990 100644 --- a/autotest/pst_from_tests.py +++ b/autotest/pst_from_tests.py @@ -482,6 +482,12 @@ def generic_function(): return df +def another_generic_function(some_arg): + import pandas as pd + import numpy as np + print(some_arg) + + def mf6_freyberg_test(): import numpy as np import pandas as pd @@ -596,6 +602,12 @@ def mf6_freyberg_test(): # add the function call to make generic to the forward run script pf.add_py_function("pst_from_tests.py","generic_function()",is_pre_cmd=False) + # add a function that isnt going to be called directly + pf.add_py_function("pst_from_tests.py","another_generic_function(some_arg)",is_pre_cmd=None) + + + + #pf.post_py_cmds.append("generic_function()") df = pd.read_csv(os.path.join(tmp_model_ws, "sfr.csv"), index_col=0) pf.add_observations("sfr.csv", insfile="sfr.csv.ins", index_cols="time", use_cols=list(df.columns.values)) diff --git a/examples/helpers.py b/examples/helpers.py new file mode 100644 index 000000000..f0023fd79 --- /dev/null +++ b/examples/helpers.py @@ -0,0 +1,7 @@ + +def process_model_outputs(): + import numpy as np + print("processing model outputs") + arr = np.random.random(100) + np.savetxt("test.dat",arr) + \ No newline at end of file diff --git a/examples/pstfrom_mf6.ipynb b/examples/pstfrom_mf6.ipynb index f30b5c243..1b802d259 100644 --- a/examples/pstfrom_mf6.ipynb +++ b/examples/pstfrom_mf6.ipynb @@ -151,6 +151,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "## Observations\n", + "\n", "So now that we have a `PstFrom` instance, but its just an empty container at this point, so we need to add some PEST interface \"observations\" and \"parameters\". Let's start with observations using MODFLOW6 head. These are stored in `heads.csv`:" ] }, @@ -355,6 +357,47 @@ "So those might look like pretty redic parameter names, but they contain heaps of metadata to help you post process things later..." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extra pre- and post-processing functions\n", + "\n", + "You will also certainly need to include some additional processing steps. These are supported thru the `PstFrom.pre_py cmds` and `PstFrom.post_py_cmds`, which are lists for pre and post model run python commands and `PstFrom.pre_sys_cmds` and `PstFrom.post_sys_cmds`, which are lists for pre and post model run system commands (these are wrapped in `pyemu.os_utils.run()`. But what if your additional steps are actually an entire python function? Well, we got that too! `PstFrom.add_py_function()`. For example, let's say you have a post processing function called `process_model_outputs()` in a python source file called `helpers.py`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = [print(line.rstrip()) for line in open(\"helpers.py\",'r').readlines()]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can add the function `process_model_outputs()` to the forward run script like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pf.add_py_function(\"helpers.py\",\"process_model_outputs()\",is_pre_cmd=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see what happened, but first we need to do a few more things..." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -395,9 +438,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open(os.path.join(template_ws,\"forward_run.py\"),'r') as f:\n", - " for line in f:\n", - " print(line.strip())" + "_ = [print(line.rstrip()) for line in open(os.path.join(template_ws,\"forward_run.py\"))]" ] }, { @@ -426,9 +467,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open(os.path.join(template_ws,\"forward_run.py\"),'r') as f:\n", - " for line in f:\n", - " print(line,end=\"\")" + "_ = [print(line.rstrip()) for line in open(os.path.join(template_ws,\"forward_run.py\"))]" ] }, { @@ -437,6 +476,10 @@ "source": [ "That's better! See the last line in `main()`? \n", "\n", + "We also see that we now have a function called `process_model_outputs()` added to the forward run script and the function is being called after the model run call.\n", + "\n", + "## Generating geostatistical prior covariance matrices and ensembles\n", + "\n", "So that's nice, but how do we include spatial correlation in these parameters? It simple: just pass the `geostruct` arg to `PstFrom.add_parameters()`" ] }, diff --git a/pyemu/pst/pst_handler.py b/pyemu/pst/pst_handler.py index c0068e628..7065ec39d 100644 --- a/pyemu/pst/pst_handler.py +++ b/pyemu/pst/pst_handler.py @@ -2236,21 +2236,15 @@ def add_observations(self,ins_file,out_file=None,pst_path=None,inschek=True): sexist = set(self.obs_names) sint = sobsnme.intersection(sexist) if len(sint) > 0: - raise Exception("the following obs instruction file {0} are already in the control file:{1}". + raise Exception("the following obs in instruction file {0} are already in the control file:{1}". format(ins_file,','.join(sint))) - # find "new" parameters that are not already in the control file - new_obsnme = [o for o in obsnme if o not in self.observation_data.obsnme] - - if len(new_obsnme) == 0: - raise Exception("no new observations found in instruction file {0}".format(ins_file)) - # extend observation_data - new_obs_data = pst_utils._populate_dataframe(new_obsnme, pst_utils.pst_config["obs_fieldnames"], + new_obs_data = pst_utils._populate_dataframe(obsnme, pst_utils.pst_config["obs_fieldnames"], pst_utils.pst_config["obs_defaults"], pst_utils.pst_config["obs_dtype"]) - new_obs_data.loc[new_obsnme,"obsnme"] = new_obsnme - new_obs_data.index = new_obsnme + new_obs_data.loc[obsnme,"obsnme"] = obsnme + new_obs_data.index = obsnme self.observation_data = self.observation_data.append(new_obs_data) cwd = '.' if pst_path is not None: diff --git a/pyemu/pst/pst_utils.py b/pyemu/pst/pst_utils.py index f909c67f5..1c90a6b1c 100644 --- a/pyemu/pst/pst_utils.py +++ b/pyemu/pst/pst_utils.py @@ -468,24 +468,26 @@ def parse_ins_file(ins_file): def _parse_ins_string(string): """ split up an instruction file line to get the observation names """ - istart_markers = ["[","(","!"] - iend_markers = ["]",")","!"] + istart_markers = set(["[","(","!"]) + marker_dict = {"[":"]","(":")","!":"!"} + #iend_markers = set(["]",")","!"]) obs_names = [] - + slen = len(string) idx = 0 while True: - if idx >= len(string) - 1: + if idx >= slen - 1: break char = string[idx] if char in istart_markers: - em = iend_markers[istart_markers.index(char)] + #em = iend_markers[istart_markers.index(char)] + em = marker_dict[char] # print("\n",idx) # print(string) # print(string[idx+1:]) # print(string[idx+1:].index(em)) # print(string[idx+1:].index(em)+idx+1) - eidx = min(len(string),string[idx+1:].index(em)+idx+1) + eidx = min(slen,string[idx+1:].index(em)+idx+1) obs_name = string[idx+1:eidx] if obs_name.lower() != "dum": obs_names.append(obs_name) @@ -789,7 +791,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None # process only_cols if only_cols is None: - only_cols = set(df.columns) + only_cols = set(df.columns.map(lambda x: x.lower().strip()).tolist()) else: if isinstance(only_cols, str): # incase it is a single name only_cols = [only_cols] @@ -797,7 +799,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None only_cols = {c.lower() if isinstance(c, str) else c for c in only_cols} if only_rows is None: - only_rows = set(df.index) + only_rows = set(df.index.map(lambda x: x.lower().strip()).tolist()) else: if isinstance(only_rows, str): # incase it is a single name only_rows = [only_rows] @@ -839,6 +841,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None clabels.append(clabel) if cname in only_cols: only_clabels.append(clabel) + only_clabels = set(only_clabels) if ins_filename is None: if not isinstance(csv_filename,str): @@ -848,6 +851,10 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None onames = [] ovals = [] ognames = [] + only_clabels_len = len(only_clabels) + clabels_len = len(clabels) + prefix_is_str = isinstance(prefix, str) + vals = df.values.copy() # wasteful but way faster with open(ins_filename,'w') as f: f.write("pif {0}\n".format(marker)) [f.write("l1\n") for _ in range(head_lines_len)] @@ -858,10 +865,10 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None c_count = 0 for j,clabel in enumerate(clabels): # loop over columns oname = '' - if c_count < len(only_clabels): # if we haven't yet set up all obs + if c_count < only_clabels_len: # if we haven't yet set up all obs if rlabel in only_rlabels and clabel in only_clabels: # define obs names - if not isinstance(prefix, str): + if not prefix_is_str: nprefix = prefix[c_count] else: nprefix = prefix @@ -872,7 +879,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None nname = nprefix+clabel oname = nprefix+rlabel+"_"+clabel onames.append(oname) # append list of obs - ovals.append(df.iloc[i, j]) # store current obs val + ovals.append(vals[i, j]) # store current obs val # defin group name if gpname is False or gpname[c_count] is False: # keeping consistent behaviour @@ -889,7 +896,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None oname = " !{0}!".format(oname) c_count += 1 # else: # not a requested observation; add spacer - if j < len(clabels) - 1: + if j < clabels_len - 1: if sep == ',': oname = "{0} {1},{1}".format(oname, marker) else: @@ -1086,6 +1093,7 @@ def _execute_ins_line(self,ins_line,ins_lcount): #for ii,ins in enumerate(ins_line): ii = 0 all_markers = True + line_seps = set([","," ","\t"]) while True: if ii >= len(ins_line): break @@ -1117,7 +1125,7 @@ def _execute_ins_line(self,ins_line,ins_lcount): format(nlines, ins, ins_lcount)) elif ins == 'w': raw = line[cursor_pos:].replace(","," ").split() - if line[cursor_pos] in [","," ","\t"]: + if line[cursor_pos] in line_seps: raw.insert(0,'') if len(raw) == 1: self.throw_out_error("no whitespaces found on output line {0} past {1}".format(line,cursor_pos)) diff --git a/pyemu/utils/pst_from.py b/pyemu/utils/pst_from.py index 9f53b54f0..9cf7a1621 100644 --- a/pyemu/utils/pst_from.py +++ b/pyemu/utils/pst_from.py @@ -725,7 +725,9 @@ def add_py_function(self,file_name,function_name, is_pre_cmd=True): `file_name` is_pre_cmd (`bool`): flag to include `function_name` in PstFrom.pre_py_cmds. If False, `function_name` is - added to PstFrom.post_py_cmds instead. Default is True. + added to PstFrom.post_py_cmds instead. If passed as `None`, + then the function `function_name` is added to the forward run + script but is not called. Default is True. Returns: None @@ -764,7 +766,7 @@ def add_py_function(self,file_name,function_name, is_pre_cmd=True): while True: line = f.readline() if line == '': - self.logger.lraise("add_py_function(): EOF while searching for function '[0}'".\ + self.logger.lraise("add_py_function(): EOF while searching for function '{0}'".\ format(search_str)) if line.startswith(search_str): #case sens and no strip since 'def' should be flushed left func_lines.append(line) @@ -778,10 +780,13 @@ def add_py_function(self,file_name,function_name, is_pre_cmd=True): break self._function_lines_list.append(func_lines) - if is_pre_cmd: + if is_pre_cmd is True: self.pre_py_cmds.append(function_name) - else: + elif is_pre_cmd is False: self.post_py_cmds.append(function_name) + else: + self.logger.warn("add_py_function() command: {0} is not being called directly".\ + format(function_name)) def add_observations(self, filename, insfile=None, index_cols=None, use_cols=None, @@ -1550,8 +1555,8 @@ def _load_listtype_file(self, filename, index_cols, use_cols, self.logger.lraise("unrecognized type for index_cols or use_cols " "should be str or int and both should be of the " "same type, not {0} or {1}". - format(str(type(index_cols)), - str(type(use_cols)))) + format(str(type(index_cols[0])), + str(type(use_cols[0])))) itype = type(index_cols) utype = type(use_cols) if itype != utype: