In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import flopy
import pyemu
import shutil
import geopandas as gpd
import herebedragons as hbd
from pathlib import Path
from mf6rtm import utils, mup3d

# Getting Started

In [None]:
# folder containing original model files
org_d = Path('model')

# a dir to hold a copy of the org model files
tmp_d = Path('tmp')

if Path.exists(tmp_d):
    shutil.rmtree(tmp_d)
shutil.copytree(org_d,tmp_d)

# get executables if 00_model not run
hbd.get_bins(tmp_d)

In [None]:
# run the model once to make sure it works
# pyemu.os_utils.run("mf6rtm",cwd=tmp_d)

In [None]:
# load simulation
sim = flopy.mf6.MFSimulation.load(sim_ws=tmp_d)
# load flow model
gwf = sim.get_model()

This model was build using a `DISV` grid. `PstFrom` is going to require the `flopy` model grid object to setup pilot points and spatialy varying covariance:

In [None]:
sr = gwf.modelgrid
sr

# Initializing PST FROM

In [None]:
# specify a template directory (i.e. the PstFrom working folder)
template_ws = Path("pst_template")

# instantiate PstFrom
pf = pyemu.utils.PstFrom(original_d=tmp_d,       # where the model is stored
                            new_d=template_ws,   # the PEST template folder
                            remove_existing=True, # ensures a clean start
                            longnames=True,      # set False if using PEST/PEST_HP
                            spatial_reference=sr, #the spatial reference we generated earlier
                            zero_based=False, # does the MODEL use zero based indices? For example, MODFLOW does NOT
                            echo=False) # to stop PstFrom from writing lots of information to the notebook; experiment by setting it as True to see the difference; useful for troubleshooting

# Add Parameters

If you are familiar with highly parameterized groundwater models, you will know that even when considering only flow properties—such as hydraulic conductivity and storage or storativity—the number of parameters can already be large. When transport and reactive transport are included, the number of properties that can be parameterized can grow rapidly.

For the sake of this tutorial, we will keep things simple and parameterize just one flow property, one transport property, and one reaction property.

## Hydraulic conductivity

No one specifgied the `idomain` in the original model setup, so lets just create a "zone array". `PstFrom` expects this when we setup pilot points and so on later. Note that the shape of `ib` is the same as the shape of the model arrays:

In [None]:
ib = np.ones(sr.ncpl, dtype=int)
ib

In [None]:
# we need to get the Hk files. For that we are gonna use a helper function that gets the filenames from a tag

tag = "npf_k_"
files = hbd.get_input_filenames(tag, template_ws=template_ws, extension='.txt')

# flopy does not write nice and tidy array files so we are gonna do that here

for f in files:
    fpath = template_ws / f
    hbd.tidy_array(fpath)


In [None]:
#Let's check one file against idomain
k = np.loadtxt(template_ws / files[0])

k.shape, ib.shape # all good!

In [None]:
# exponential variogram for spatially varying parameters
v_pp = pyemu.geostats.ExpVario(contribution=1.0, #sill
                                    a=1, # range of correlation; length units of the model. In our case 'meters'
                                    anisotropy=1, #name says it all
                                    bearing=0 #angle in degrees East of North corresponding to anisotropy ellipse
                                    )

# geostatistical structure for spatially varying parameters
pp_gs = pyemu.geostats.GeoStruct(variograms=v_pp, transform='log') 

In [None]:
lb = 0.01
ub = 2.0
for f in files:
    tag = f.split('.')[1].replace("_",".")
    df_pp = pf.add_parameters(f,
                    zone_array=ib,
                    par_type="pilotpoints",
                    par_style="m",
                    geostruct=pp_gs,
                    par_name_base=tag,
                    pargp=tag.split(".")[0],
                    lower_bound=lb,
                    upper_bound=ub,
                    ult_ubound = 10, # max poro is 1
                    ult_lbound = 1e-5, # tiny
                    pp_options={"prep_hyperpars":False,
                                "pp_space":25} #in this case is meters
                    )
    df_cn = pf.add_parameters(f,
                    zone_array=ib,
                    par_type="zone",
                    par_style="m",
                    geostruct=pp_gs,
                    par_name_base=tag,
                    pargp=tag.split(".")[0],
                    lower_bound=lb,
                    upper_bound=ub,
                    ult_ubound = 10, # max poro is 1
                    ult_lbound = 1e-5, # tiny
                    )

In [None]:
files[0]

In [None]:
fig, ax = plt.subplots(1,1)
mv = flopy.plot.PlotMapView(gwf)
mv.plot_grid()
ax.scatter(df_pp.x, df_pp.y, s=10)

## Porosity

Let’s try working through one of the transport properties now. As you can see by inspecting the files, properties such as porosity (`mst_porosity`) or longitudinal dispersivity (`dsp_alh`) are defined separately for each transported species. This is by design in MODFLOW 6, as it allows the modeler to vary these properties between species, which is technically correct.

For most practical problems, however, it usually makes sense to use the same values for all species. Therefore, we have a couple of options for handling these properties—let’s focus on porosity for now:

1. Parameterize porosity for all species and define relationships in PEST (e.g., using tied parameters).
2. Parameterize porosity for only one species and then apply those parameters to the remaining species.

Here, we will choose the second option (easier :)). To do this, we will need to write a small function to “copy” the parameters before each forward run. Don’t worry—this is not particularly difficult to implement, and PyEMU is especially well suited for this task.


In [None]:
# let's get a list of species first.

species = sim.model_names[1:] # the flow model is always fist so we can skip that one

species

We are gonna parameterize H2O and then use those parameters for the rest

In [None]:
tag = "h2o.mst_porosity_"
files = hbd.get_input_filenames(tag, template_ws=template_ws, extension='.txt')

# flopy does not write nice and tidy array files so we are gonna do that here

for f in files:
    fpath = template_ws / f
    hbd.tidy_array(fpath)


lb = 0.05
ub = 1.5
for f in files:
    tag = f.split('.')[1].replace("_",".")
    df_pp = pf.add_parameters(f,
                    zone_array=ib,
                    par_type="pilotpoints",
                    par_style="m",
                    geostruct=pp_gs,
                    par_name_base=tag,
                    pargp=tag.split(".")[0],
                    lower_bound=lb,
                    upper_bound=ub,
                    ult_ubound = 0.6, # max porosity is 1 but we don't wanna go over 0.6
                    ult_lbound = 5e-2, # tiny just in case
                    pp_options={"prep_hyperpars":False,
                                "pp_space":25} #in this case is meters
                    )
    df_cn = pf.add_parameters(f,
                    zone_array=ib,
                    par_type="zone",
                    par_style="m",
                    geostruct=pp_gs,
                    par_name_base=tag,
                    pargp=tag.split(".")[0],
                    lower_bound=lb,
                    upper_bound=ub,
                    ult_ubound = 10, # max poro is 1
                    ult_lbound = 1e-5, # tiny
                    )

## Equilibrium phases: Organic Matter 

We are going to parameterize the initial mass of Organic matter that is available to reach equilibrium. 

In [None]:
tag = "equilibrium_phases.orgmatter.m0"
files = hbd.get_input_filenames(tag, template_ws=template_ws, extension='.txt')

# mf6rtm writes these files as tidy array already.. Winning

lb = 0.05
ub = 1.5

for f in files:
    tag = f.split('.txt')[0].replace("_",".").lower()
    df_pp = pf.add_parameters(f,
                    zone_array=ib,
                    par_type="pilotpoints",
                    geostruct=pp_gs,
                    par_name_base=tag,
                    pargp=tag.split(".layer")[0],
                    lower_bound=lb,
                    upper_bound=ub,
                    ult_ubound = 15, #
                    ult_lbound = 1e-5, # tiny just in case
                    pp_options={"prep_hyperpars":False,
                                "pp_space":25} #in this case is meters
                    )
    df_cn = pf.add_parameters(f,
                    zone_array=ib,
                    par_type="zone",
                    par_style="m",
                    geostruct=pp_gs,
                    par_name_base=tag,
                    pargp=tag.split(".layer")[0],
                    lower_bound=lb,
                    upper_bound=ub,
                    ult_ubound = 10, #
                    ult_lbound = 1e-5, # tiny
                    )
    _ = pf.add_observations(f, prefix=tag, obsgp=tag.split(".layer")[0])


# Pre- and post-processing functions

As we discussed earlier, we want to have some functions before and after we run the forward model. PyEMU is all about python, so we are going to do a forward_run.py script here using pyEMU. 

In [None]:
pf.extra_py_imports.append("flopy")
pf.extra_py_imports.append("shutil")
pf.extra_py_imports.append("from pathlib import Path")

pf.add_py_function("herebedragons.py","tidy_array()",is_pre_cmd=None)
pf.add_py_function("herebedragons.py","get_input_filenames()",is_pre_cmd=None)
pf.add_py_function("herebedragons.py","extract_layer_number()",is_pre_cmd=None)


pf.add_py_function("herebedragons.py","copy_parameterized_transport_files()",is_pre_cmd=True)
pf.mod_sys_cmds.append("mf6rtm")

In [None]:
pf.write_forward_run()

In [None]:
pst = pf.build_pst()


In [None]:
pst.write(template_ws / 'pest.pst',version=2)

In [None]:
pst.parameter_data

# Add Observations

In [None]:
# Let's add an extra function named process_sim_conc .. This function basically handles the concentration outputs from the models and matches the equivalent (with time interpolation) measured observation

pf.extra_py_imports.append("from flopy.utils.gridintersect import GridIntersect")
pf.add_py_function("herebedragons.py","node_to_layer_icell2d()",is_pre_cmd=None)
pf.add_py_function("herebedragons.py","time_interpolate()",is_pre_cmd=None)
pf.add_py_function("herebedragons.py","process_sim_conc()",is_pre_cmd=False)

pst = pf.build_pst()




# Test runs

It is good practice to do some test runs and sanity checks. This part is especially good for checking parameter ranges and adjust your conceptual and /or numerical understanding of your site. 



In [None]:


# pyemu.os_utils.run('pestpp-ies pest.pst', cwd=template_ws)

In [None]:
pst.control_data.noptmax = -2
pst.pestpp_options["ies_run_realname"] = 40

pst.write(template_ws / "test.pst",version=2)
pyemu.os_utils.run('pestpp-ies test.pst', cwd=template_ws)