pypest · jwhite-usgs · Aug 14, 2020 · Aug 11, 2020 · Aug 12, 2020 · Aug 13, 2020
diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py
@@ -482,6 +482,12 @@ def generic_function():
     return df
 
 
+def another_generic_function(some_arg):
+    import pandas as pd
+    import numpy as np
+    print(some_arg)
+
+
 def mf6_freyberg_test():
     import numpy as np
     import pandas as pd
@@ -596,6 +602,12 @@ def mf6_freyberg_test():
     # add the function call to make generic to the forward run script
     pf.add_py_function("pst_from_tests.py","generic_function()",is_pre_cmd=False)
 
+    # add a function that isnt going to be called directly
+    pf.add_py_function("pst_from_tests.py","another_generic_function(some_arg)",is_pre_cmd=None)
+
+
+
+
     #pf.post_py_cmds.append("generic_function()")
     df = pd.read_csv(os.path.join(tmp_model_ws, "sfr.csv"), index_col=0)
     pf.add_observations("sfr.csv", insfile="sfr.csv.ins", index_cols="time", use_cols=list(df.columns.values))

diff --git a/examples/helpers.py b/examples/helpers.py
@@ -0,0 +1,7 @@
+
+def process_model_outputs():
+	import numpy as np
+	print("processing model outputs")
+	arr = np.random.random(100)
+	np.savetxt("test.dat",arr)
+
diff --git a/examples/pstfrom_mf6.ipynb b/examples/pstfrom_mf6.ipynb
@@ -151,6 +151,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "## Observations\n",
+    "\n",
     "So now that we have a `PstFrom` instance, but its just an empty container at this point, so we need to add some PEST interface \"observations\" and \"parameters\".  Let's start with observations using MODFLOW6 head.  These are stored in `heads.csv`:"
    ]
   },
@@ -355,6 +357,47 @@
     "So those might look like pretty redic parameter names, but they contain heaps of metadata to help you post process things later..."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Extra pre- and post-processing functions\n",
+    "\n",
+    "You will also certainly need to include some additional processing steps.  These are supported thru the `PstFrom.pre_py cmds` and `PstFrom.post_py_cmds`, which are lists for pre and post model run python commands and `PstFrom.pre_sys_cmds` and `PstFrom.post_sys_cmds`, which are lists for pre and post model run system commands (these are wrapped in `pyemu.os_utils.run()`.  But what if your additional steps are actually an entire python function? Well, we got that too! `PstFrom.add_py_function()`. For example, let's say you have a post processing function called `process_model_outputs()` in a python source file called `helpers.py`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "_ = [print(line.rstrip()) for line in open(\"helpers.py\",'r').readlines()]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can add the function `process_model_outputs()` to the forward run script like this:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pf.add_py_function(\"helpers.py\",\"process_model_outputs()\",is_pre_cmd=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's see what happened, but first we need to do a few more things..."
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -395,9 +438,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open(os.path.join(template_ws,\"forward_run.py\"),'r') as f:\n",
-    "    for line in f:\n",
-    "        print(line.strip())"
+    "_ = [print(line.rstrip()) for line in open(os.path.join(template_ws,\"forward_run.py\"))]"
    ]
   },
   {
@@ -426,9 +467,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open(os.path.join(template_ws,\"forward_run.py\"),'r') as f:\n",
-    "    for line in f:\n",
-    "        print(line,end=\"\")"
+    "_ = [print(line.rstrip()) for line in open(os.path.join(template_ws,\"forward_run.py\"))]"
    ]
   },
   {
@@ -437,6 +476,10 @@
    "source": [
     "That's better!  See the last line in `main()`?  \n",
     "\n",
+    "We also see that we now have a function called `process_model_outputs()` added to the forward run script and the function is being called after the model run call.\n",
+    "\n",
+    "## Generating geostatistical prior covariance matrices and ensembles\n",
+    "\n",
     "So that's nice, but how do we include spatial correlation in these parameters?  It simple: just pass the `geostruct` arg to `PstFrom.add_parameters()`"
    ]
   },

diff --git a/pyemu/pst/pst_handler.py b/pyemu/pst/pst_handler.py
@@ -2236,21 +2236,15 @@ def add_observations(self,ins_file,out_file=None,pst_path=None,inschek=True):
         sexist = set(self.obs_names)
         sint = sobsnme.intersection(sexist)
         if len(sint) > 0:
-            raise Exception("the following obs instruction file {0} are already in the control file:{1}".
+            raise Exception("the following obs in instruction file {0} are already in the control file:{1}".
                             format(ins_file,','.join(sint)))
 
-        # find "new" parameters that are not already in the control file
-        new_obsnme = [o for o in obsnme if o not in self.observation_data.obsnme]
-
-        if len(new_obsnme) == 0:
-            raise Exception("no new observations found in instruction file {0}".format(ins_file))
-
         # extend observation_data
-        new_obs_data = pst_utils._populate_dataframe(new_obsnme, pst_utils.pst_config["obs_fieldnames"],
+        new_obs_data = pst_utils._populate_dataframe(obsnme, pst_utils.pst_config["obs_fieldnames"],
                                                      pst_utils.pst_config["obs_defaults"],
                                                      pst_utils.pst_config["obs_dtype"])
-        new_obs_data.loc[new_obsnme,"obsnme"] = new_obsnme
-        new_obs_data.index = new_obsnme
+        new_obs_data.loc[obsnme,"obsnme"] = obsnme
+        new_obs_data.index = obsnme
         self.observation_data = self.observation_data.append(new_obs_data)
         cwd = '.'
         if pst_path is not None:

diff --git a/pyemu/pst/pst_utils.py b/pyemu/pst/pst_utils.py
@@ -468,24 +468,26 @@ def parse_ins_file(ins_file):
 def _parse_ins_string(string):
     """ split up an instruction file line to get the observation names
     """
-    istart_markers = ["[","(","!"]
-    iend_markers = ["]",")","!"]
+    istart_markers = set(["[","(","!"])
+    marker_dict = {"[":"]","(":")","!":"!"}
+    #iend_markers = set(["]",")","!"])
 
     obs_names = []
-
+    slen = len(string)
     idx = 0
     while True:
-        if idx >= len(string) - 1:
+        if idx >= slen - 1:
             break
         char = string[idx]
         if char in istart_markers:
-            em = iend_markers[istart_markers.index(char)]
+            #em = iend_markers[istart_markers.index(char)]
+            em = marker_dict[char]
             # print("\n",idx)
             # print(string)
             # print(string[idx+1:])
             # print(string[idx+1:].index(em))
             # print(string[idx+1:].index(em)+idx+1)
-            eidx = min(len(string),string[idx+1:].index(em)+idx+1)
+            eidx = min(slen,string[idx+1:].index(em)+idx+1)
             obs_name = string[idx+1:eidx]
             if obs_name.lower() != "dum":
                 obs_names.append(obs_name)
@@ -789,15 +791,15 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
 
     # process only_cols
     if only_cols is None:
-        only_cols = set(df.columns)
+        only_cols = set(df.columns.map(lambda x: x.lower().strip()).tolist())
     else:
         if isinstance(only_cols, str): # incase it is a single name
             only_cols = [only_cols]
         only_cols = set(only_cols)
     only_cols = {c.lower() if isinstance(c, str) else c for c in only_cols}
 
     if only_rows is None:
-        only_rows = set(df.index)
+        only_rows = set(df.index.map(lambda x: x.lower().strip()).tolist())
     else:
         if isinstance(only_rows, str): # incase it is a single name
             only_rows = [only_rows]
@@ -839,6 +841,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
         clabels.append(clabel)
         if cname in only_cols:
             only_clabels.append(clabel)
+    only_clabels = set(only_clabels)
 
     if ins_filename is None:
         if not isinstance(csv_filename,str):
@@ -848,6 +851,10 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
     onames = []
     ovals = []
     ognames = []
+    only_clabels_len = len(only_clabels)
+    clabels_len = len(clabels)
+    prefix_is_str = isinstance(prefix, str)
+    vals = df.values.copy() # wasteful but way faster
     with open(ins_filename,'w') as f:
         f.write("pif {0}\n".format(marker))
         [f.write("l1\n") for _ in range(head_lines_len)]
@@ -858,10 +865,10 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
             c_count = 0
             for j,clabel in enumerate(clabels):  # loop over columns
                 oname = ''
-                if c_count < len(only_clabels):  # if we haven't yet set up all obs
+                if c_count < only_clabels_len:  # if we haven't yet set up all obs
                     if rlabel in only_rlabels and clabel in only_clabels:
                         # define obs names
-                        if not isinstance(prefix, str):
+                        if not prefix_is_str:
                             nprefix = prefix[c_count]
                         else:
                             nprefix = prefix
@@ -872,7 +879,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
                             nname = nprefix+clabel
                             oname = nprefix+rlabel+"_"+clabel
                         onames.append(oname)  # append list of obs
-                        ovals.append(df.iloc[i, j])  # store current obs val
+                        ovals.append(vals[i, j])  # store current obs val
                         # defin group name
                         if gpname is False or gpname[c_count] is False:
                             # keeping consistent behaviour
@@ -889,7 +896,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
                         oname = " !{0}!".format(oname)
                         c_count += 1
                     # else:  # not a requested observation; add spacer
-                    if j < len(clabels) - 1:
+                    if j < clabels_len - 1:
                         if sep == ',':
                             oname = "{0} {1},{1}".format(oname, marker)
                         else:
@@ -1086,6 +1093,7 @@ def _execute_ins_line(self,ins_line,ins_lcount):
         #for ii,ins in enumerate(ins_line):
         ii = 0
         all_markers = True
+        line_seps = set([","," ","\t"])
         while True:
             if ii >= len(ins_line):
                 break
@@ -1117,7 +1125,7 @@ def _execute_ins_line(self,ins_line,ins_lcount):
                                              format(nlines, ins, ins_lcount))
             elif ins == 'w':
                 raw = line[cursor_pos:].replace(","," ").split()
-                if line[cursor_pos] in [","," ","\t"]:
+                if line[cursor_pos] in line_seps:
                     raw.insert(0,'')
                 if len(raw) == 1:
                     self.throw_out_error("no whitespaces found on output line {0} past {1}".format(line,cursor_pos))

diff --git a/pyemu/utils/pst_from.py b/pyemu/utils/pst_from.py
@@ -725,7 +725,9 @@ def add_py_function(self,file_name,function_name, is_pre_cmd=True):
                 `file_name`
             is_pre_cmd (`bool`): flag to include `function_name` in
                 PstFrom.pre_py_cmds.  If False, `function_name` is
-                added to PstFrom.post_py_cmds instead. Default is True.
+                added to PstFrom.post_py_cmds instead. If passed as `None`,
+                then the function `function_name` is added to the forward run
+                script but is not called.  Default is True.
         Returns:
             None
 
@@ -764,7 +766,7 @@ def add_py_function(self,file_name,function_name, is_pre_cmd=True):
             while True:
                 line = f.readline()
                 if line == '':
-                    self.logger.lraise("add_py_function(): EOF while searching for function '[0}'".\
+                    self.logger.lraise("add_py_function(): EOF while searching for function '{0}'".\
                                        format(search_str))
                 if line.startswith(search_str): #case sens and no strip since 'def' should be flushed left
                     func_lines.append(line)
@@ -778,10 +780,13 @@ def add_py_function(self,file_name,function_name, is_pre_cmd=True):
                     break
 
         self._function_lines_list.append(func_lines)
-        if is_pre_cmd:
+        if is_pre_cmd is True:
             self.pre_py_cmds.append(function_name)
-        else:
+        elif is_pre_cmd is False:
             self.post_py_cmds.append(function_name)
+        else:
+            self.logger.warn("add_py_function() command: {0} is not being called directly".\
+                             format(function_name))
 
     def add_observations(self, filename, insfile=None,
                          index_cols=None, use_cols=None,
@@ -1550,8 +1555,8 @@ def _load_listtype_file(self, filename, index_cols, use_cols,
             self.logger.lraise("unrecognized type for index_cols or use_cols "
                                "should be str or int and both should be of the "
                                "same type, not {0} or {1}".
-                               format(str(type(index_cols)),
-                                      str(type(use_cols))))
+                               format(str(type(index_cols[0])),
+                                      str(type(use_cols[0]))))
         itype = type(index_cols)
         utype = type(use_cols)
         if itype != utype: