From cb5a610db2012a40b139f1681e610a32567de886 Mon Sep 17 00:00:00 2001
From: White <jtwhite1000@gmail.com>
Date: Tue, 11 Aug 2020 09:43:59 -0600
Subject: [PATCH 1/5] added option for add_py_function's is_pre_cmd arg to be
 None

---
 autotest/pst_from_tests.py | 12 ++++++++++++
 pyemu/utils/pst_from.py    | 13 +++++++++----
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py
index 320b62d90..bf513f7f5 100644
--- a/autotest/pst_from_tests.py
+++ b/autotest/pst_from_tests.py
@@ -482,6 +482,12 @@ def generic_function():
     return df
 
 
+def another_generic_function(some_arg):
+    import pandas as pd
+    import numpy as np
+    print(some_arg)
+
+
 def mf6_freyberg_test():
     import numpy as np
     import pandas as pd
@@ -596,6 +602,12 @@ def mf6_freyberg_test():
     # add the function call to make generic to the forward run script
     pf.add_py_function("pst_from_tests.py","generic_function()",is_pre_cmd=False)
 
+    # add a function that isnt going to be called directly
+    pf.add_py_function("pst_from_tests.py","another_generic_function(some_arg)",is_pre_cmd=None)
+
+
+
+
     #pf.post_py_cmds.append("generic_function()")
     df = pd.read_csv(os.path.join(tmp_model_ws, "sfr.csv"), index_col=0)
     pf.add_observations("sfr.csv", insfile="sfr.csv.ins", index_cols="time", use_cols=list(df.columns.values))
diff --git a/pyemu/utils/pst_from.py b/pyemu/utils/pst_from.py
index 9f53b54f0..31efb11f1 100644
--- a/pyemu/utils/pst_from.py
+++ b/pyemu/utils/pst_from.py
@@ -725,7 +725,9 @@ def add_py_function(self,file_name,function_name, is_pre_cmd=True):
                 `file_name`
             is_pre_cmd (`bool`): flag to include `function_name` in
                 PstFrom.pre_py_cmds.  If False, `function_name` is
-                added to PstFrom.post_py_cmds instead. Default is True.
+                added to PstFrom.post_py_cmds instead. If passed as `None`,
+                then the function `function_name` is added to the forward run
+                script but is not called.  Default is True.
         Returns:
             None
 
@@ -764,7 +766,7 @@ def add_py_function(self,file_name,function_name, is_pre_cmd=True):
             while True:
                 line = f.readline()
                 if line == '':
-                    self.logger.lraise("add_py_function(): EOF while searching for function '[0}'".\
+                    self.logger.lraise("add_py_function(): EOF while searching for function '{0}'".\
                                        format(search_str))
                 if line.startswith(search_str): #case sens and no strip since 'def' should be flushed left
                     func_lines.append(line)
@@ -778,10 +780,13 @@ def add_py_function(self,file_name,function_name, is_pre_cmd=True):
                     break
 
         self._function_lines_list.append(func_lines)
-        if is_pre_cmd:
+        if is_pre_cmd is True:
             self.pre_py_cmds.append(function_name)
-        else:
+        elif is_pre_cmd is False:
             self.post_py_cmds.append(function_name)
+        else:
+            self.logger.warn("add_py_function() command: {0} is not being called directly".\
+                             format(function_name))
 
     def add_observations(self, filename, insfile=None,
                          index_cols=None, use_cols=None,

From 72716b128a9eda1c5c5912d7e0625bca6780ed42 Mon Sep 17 00:00:00 2001
From: White <jtwhite1000@gmail.com>
Date: Thu, 13 Aug 2020 10:46:08 -0600
Subject: [PATCH 2/5] some opt in add obs for redic large ins files

---
 pyemu/pst/pst_handler.py | 14 ++++----------
 pyemu/pst/pst_utils.py   | 20 ++++++++++++--------
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/pyemu/pst/pst_handler.py b/pyemu/pst/pst_handler.py
index c0068e628..7065ec39d 100644
--- a/pyemu/pst/pst_handler.py
+++ b/pyemu/pst/pst_handler.py
@@ -2236,21 +2236,15 @@ def add_observations(self,ins_file,out_file=None,pst_path=None,inschek=True):
         sexist = set(self.obs_names)
         sint = sobsnme.intersection(sexist)
         if len(sint) > 0:
-            raise Exception("the following obs instruction file {0} are already in the control file:{1}".
+            raise Exception("the following obs in instruction file {0} are already in the control file:{1}".
                             format(ins_file,','.join(sint)))
 
-        # find "new" parameters that are not already in the control file
-        new_obsnme = [o for o in obsnme if o not in self.observation_data.obsnme]
-
-        if len(new_obsnme) == 0:
-            raise Exception("no new observations found in instruction file {0}".format(ins_file))
-
         # extend observation_data
-        new_obs_data = pst_utils._populate_dataframe(new_obsnme, pst_utils.pst_config["obs_fieldnames"],
+        new_obs_data = pst_utils._populate_dataframe(obsnme, pst_utils.pst_config["obs_fieldnames"],
                                                      pst_utils.pst_config["obs_defaults"],
                                                      pst_utils.pst_config["obs_dtype"])
-        new_obs_data.loc[new_obsnme,"obsnme"] = new_obsnme
-        new_obs_data.index = new_obsnme
+        new_obs_data.loc[obsnme,"obsnme"] = obsnme
+        new_obs_data.index = obsnme
         self.observation_data = self.observation_data.append(new_obs_data)
         cwd = '.'
         if pst_path is not None:
diff --git a/pyemu/pst/pst_utils.py b/pyemu/pst/pst_utils.py
index f909c67f5..407bf9d24 100644
--- a/pyemu/pst/pst_utils.py
+++ b/pyemu/pst/pst_utils.py
@@ -468,24 +468,26 @@ def parse_ins_file(ins_file):
 def _parse_ins_string(string):
     """ split up an instruction file line to get the observation names
     """
-    istart_markers = ["[","(","!"]
-    iend_markers = ["]",")","!"]
+    istart_markers = set(["[","(","!"])
+    marker_dict = {"[":"]","(":")","!":"!"}
+    #iend_markers = set(["]",")","!"])
 
     obs_names = []
-
+    slen = len(string)
     idx = 0
     while True:
-        if idx >= len(string) - 1:
+        if idx >= slen - 1:
             break
         char = string[idx]
         if char in istart_markers:
-            em = iend_markers[istart_markers.index(char)]
+            #em = iend_markers[istart_markers.index(char)]
+            em = marker_dict[char]
             # print("\n",idx)
             # print(string)
             # print(string[idx+1:])
             # print(string[idx+1:].index(em))
             # print(string[idx+1:].index(em)+idx+1)
-            eidx = min(len(string),string[idx+1:].index(em)+idx+1)
+            eidx = min(slen,string[idx+1:].index(em)+idx+1)
             obs_name = string[idx+1:eidx]
             if obs_name.lower() != "dum":
                 obs_names.append(obs_name)
@@ -848,6 +850,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
     onames = []
     ovals = []
     ognames = []
+    vals = df.values.copy() # wasteful but way faster
     with open(ins_filename,'w') as f:
         f.write("pif {0}\n".format(marker))
         [f.write("l1\n") for _ in range(head_lines_len)]
@@ -872,7 +875,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
                             nname = nprefix+clabel
                             oname = nprefix+rlabel+"_"+clabel
                         onames.append(oname)  # append list of obs
-                        ovals.append(df.iloc[i, j])  # store current obs val
+                        ovals.append(vals[i, j])  # store current obs val
                         # defin group name
                         if gpname is False or gpname[c_count] is False:
                             # keeping consistent behaviour
@@ -1086,6 +1089,7 @@ def _execute_ins_line(self,ins_line,ins_lcount):
         #for ii,ins in enumerate(ins_line):
         ii = 0
         all_markers = True
+        line_seps = set([","," ","\t"])
         while True:
             if ii >= len(ins_line):
                 break
@@ -1117,7 +1121,7 @@ def _execute_ins_line(self,ins_line,ins_lcount):
                                              format(nlines, ins, ins_lcount))
             elif ins == 'w':
                 raw = line[cursor_pos:].replace(","," ").split()
-                if line[cursor_pos] in [","," ","\t"]:
+                if line[cursor_pos] in line_seps:
                     raw.insert(0,'')
                 if len(raw) == 1:
                     self.throw_out_error("no whitespaces found on output line {0} past {1}".format(line,cursor_pos))

From 90b5e837dd44c78bb70f89411bfebcb0b715f296 Mon Sep 17 00:00:00 2001
From: White <jtwhite1000@gmail.com>
Date: Thu, 13 Aug 2020 11:23:18 -0600
Subject: [PATCH 3/5] more opt

---
 pyemu/pst/pst_utils.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pyemu/pst/pst_utils.py b/pyemu/pst/pst_utils.py
index 407bf9d24..7d8df6832 100644
--- a/pyemu/pst/pst_utils.py
+++ b/pyemu/pst/pst_utils.py
@@ -841,6 +841,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
         clabels.append(clabel)
         if cname in only_cols:
             only_clabels.append(clabel)
+    only_clabels = set(only_clabels)
 
     if ins_filename is None:
         if not isinstance(csv_filename,str):
@@ -850,6 +851,9 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
     onames = []
     ovals = []
     ognames = []
+    only_clabels_len = len(only_clabels)
+    clabels_len = len(clabels)
+    prefix_is_str = isinstance(prefix, str)
     vals = df.values.copy() # wasteful but way faster
     with open(ins_filename,'w') as f:
         f.write("pif {0}\n".format(marker))
@@ -861,10 +865,10 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
             c_count = 0
             for j,clabel in enumerate(clabels):  # loop over columns
                 oname = ''
-                if c_count < len(only_clabels):  # if we haven't yet set up all obs
+                if c_count < only_clabels_len:  # if we haven't yet set up all obs
                     if rlabel in only_rlabels and clabel in only_clabels:
                         # define obs names
-                        if not isinstance(prefix, str):
+                        if not prefix_is_str:
                             nprefix = prefix[c_count]
                         else:
                             nprefix = prefix
@@ -892,7 +896,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
                         oname = " !{0}!".format(oname)
                         c_count += 1
                     # else:  # not a requested observation; add spacer
-                    if j < len(clabels) - 1:
+                    if j < clabels_len - 1:
                         if sep == ',':
                             oname = "{0} {1},{1}".format(oname, marker)
                         else:

From d86714dc0711fb4b3a9c5424491d7807dc383084 Mon Sep 17 00:00:00 2001
From: White <jtwhite1000@gmail.com>
Date: Thu, 13 Aug 2020 14:56:51 -0600
Subject: [PATCH 4/5] more opt

---
 pyemu/pst/pst_utils.py  | 4 ++--
 pyemu/utils/pst_from.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyemu/pst/pst_utils.py b/pyemu/pst/pst_utils.py
index 7d8df6832..1c90a6b1c 100644
--- a/pyemu/pst/pst_utils.py
+++ b/pyemu/pst/pst_utils.py
@@ -791,7 +791,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
 
     # process only_cols
     if only_cols is None:
-        only_cols = set(df.columns)
+        only_cols = set(df.columns.map(lambda x: x.lower().strip()).tolist())
     else:
         if isinstance(only_cols, str): # incase it is a single name
             only_cols = [only_cols]
@@ -799,7 +799,7 @@ def csv_to_ins_file(csv_filename,ins_filename=None,only_cols=None,only_rows=None
     only_cols = {c.lower() if isinstance(c, str) else c for c in only_cols}
 
     if only_rows is None:
-        only_rows = set(df.index)
+        only_rows = set(df.index.map(lambda x: x.lower().strip()).tolist())
     else:
         if isinstance(only_rows, str): # incase it is a single name
             only_rows = [only_rows]
diff --git a/pyemu/utils/pst_from.py b/pyemu/utils/pst_from.py
index 31efb11f1..9cf7a1621 100644
--- a/pyemu/utils/pst_from.py
+++ b/pyemu/utils/pst_from.py
@@ -1555,8 +1555,8 @@ def _load_listtype_file(self, filename, index_cols, use_cols,
             self.logger.lraise("unrecognized type for index_cols or use_cols "
                                "should be str or int and both should be of the "
                                "same type, not {0} or {1}".
-                               format(str(type(index_cols)),
-                                      str(type(use_cols))))
+                               format(str(type(index_cols[0])),
+                                      str(type(use_cols[0]))))
         itype = type(index_cols)
         utype = type(use_cols)
         if itype != utype:

From 33b7b7b16918ac26840522781630c3db0d501b8e Mon Sep 17 00:00:00 2001
From: White <jtwhite1000@gmail.com>
Date: Thu, 13 Aug 2020 16:26:49 -0600
Subject: [PATCH 5/5] added pre and post command info to pstfrom demo notebook

---
 examples/helpers.py        |  7 +++++
 examples/pstfrom_mf6.ipynb | 55 +++++++++++++++++++++++++++++++++-----
 2 files changed, 56 insertions(+), 6 deletions(-)
 create mode 100644 examples/helpers.py

diff --git a/examples/helpers.py b/examples/helpers.py
new file mode 100644
index 000000000..f0023fd79
--- /dev/null
+++ b/examples/helpers.py
@@ -0,0 +1,7 @@
+
+def process_model_outputs():
+	import numpy as np
+	print("processing model outputs")
+	arr = np.random.random(100)
+	np.savetxt("test.dat",arr)
+	
\ No newline at end of file
diff --git a/examples/pstfrom_mf6.ipynb b/examples/pstfrom_mf6.ipynb
index f30b5c243..1b802d259 100644
--- a/examples/pstfrom_mf6.ipynb
+++ b/examples/pstfrom_mf6.ipynb
@@ -151,6 +151,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "## Observations\n",
+    "\n",
     "So now that we have a `PstFrom` instance, but its just an empty container at this point, so we need to add some PEST interface \"observations\" and \"parameters\".  Let's start with observations using MODFLOW6 head.  These are stored in `heads.csv`:"
    ]
   },
@@ -355,6 +357,47 @@
     "So those might look like pretty redic parameter names, but they contain heaps of metadata to help you post process things later..."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Extra pre- and post-processing functions\n",
+    "\n",
+    "You will also certainly need to include some additional processing steps.  These are supported thru the `PstFrom.pre_py cmds` and `PstFrom.post_py_cmds`, which are lists for pre and post model run python commands and `PstFrom.pre_sys_cmds` and `PstFrom.post_sys_cmds`, which are lists for pre and post model run system commands (these are wrapped in `pyemu.os_utils.run()`.  But what if your additional steps are actually an entire python function? Well, we got that too! `PstFrom.add_py_function()`. For example, let's say you have a post processing function called `process_model_outputs()` in a python source file called `helpers.py`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "_ = [print(line.rstrip()) for line in open(\"helpers.py\",'r').readlines()]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can add the function `process_model_outputs()` to the forward run script like this:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pf.add_py_function(\"helpers.py\",\"process_model_outputs()\",is_pre_cmd=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's see what happened, but first we need to do a few more things..."
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -395,9 +438,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open(os.path.join(template_ws,\"forward_run.py\"),'r') as f:\n",
-    "    for line in f:\n",
-    "        print(line.strip())"
+    "_ = [print(line.rstrip()) for line in open(os.path.join(template_ws,\"forward_run.py\"))]"
    ]
   },
   {
@@ -426,9 +467,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open(os.path.join(template_ws,\"forward_run.py\"),'r') as f:\n",
-    "    for line in f:\n",
-    "        print(line,end=\"\")"
+    "_ = [print(line.rstrip()) for line in open(os.path.join(template_ws,\"forward_run.py\"))]"
    ]
   },
   {
@@ -437,6 +476,10 @@
    "source": [
     "That's better!  See the last line in `main()`?  \n",
     "\n",
+    "We also see that we now have a function called `process_model_outputs()` added to the forward run script and the function is being called after the model run call.\n",
+    "\n",
+    "## Generating geostatistical prior covariance matrices and ensembles\n",
+    "\n",
     "So that's nice, but how do we include spatial correlation in these parameters?  It simple: just pass the `geostruct` arg to `PstFrom.add_parameters()`"
    ]
   },