In [None]:
import pyemu
import flopy as fp
import os, shutil
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## define a location for the background files to start with, and a setup directory to copy to and work in

In [None]:
org_d = Path('../pest_background_files/')
t_d = Path('../pest_ies_setup')

In [None]:
if t_d.exists():
    shutil.rmtree(t_d)
shutil.copytree(org_d,t_d)


### get spatial reference for the model - pilot points are based on geographic coordinates (not on layer/row/column)

In [None]:
sim = fp.mf6.MFSimulation.load(sim_ws = str(t_d))
gwf = sim.get_model()

In [None]:
sr = pyemu.helpers.SpatialReference.from_namfile(
        str(t_d / "at.nam"),
        delr=gwf.dis.delr.array, delc=gwf.dis.delc.array)
sr

### set up variograms for K pilot points

In [None]:
pp_aq = 3
# exponential variogram for spatially varying parameters
v_aq = pyemu.geostats.ExpVario(contribution=1.0, #sill
                                    a=pp_aq * 3 * sr.delc[0], # range of correlation; length units of the model. In our case 'meters'
                                    anisotropy=1.0, #name says it all
                                    bearing=0.0 #angle in degrees East of North corresponding to anisotropy ellipse
                                    )

# geostatistical structure for spatially varying parameters
aq_gs = pyemu.geostats.GeoStruct(variograms=v_aq, transform='log') 

In [None]:
v_aq.plot()

In [None]:
pp_clay=6
# exponential variogram for spatially varying parameters
v_clay = pyemu.geostats.ExpVario(contribution=1.0, #sill
                                    a=pp_clay * 5 * sr.delc[0], # range of correlation; length units of the model. In our case 'meters'
                                    anisotropy=1.0, #name says it all
                                    bearing=0.0 #angle in degrees East of North corresponding to anisotropy ellipse
                                    )

# geostatistical structure for spatially varying parameters
clay_gs = pyemu.geostats.GeoStruct(variograms=v_clay, transform='log') 

In [None]:
v_clay.plot()

In [None]:
ib = gwf.dis.idomain.array[0]


### we only need pilot points for two unique K files - k_aq and k_clay

In [None]:
k_aq = np.loadtxt(t_d/'k_aq.ref')
k_clay = np.loadtxt(t_d/'k_clay.ref')


In [None]:
fig,ax = plt.subplots(1,2,figsize=(8,4))
c = ax[0].imshow(k_aq)
plt.colorbar(c, ax=ax[0])
ax[0].set_title('aquifer')
c1 = ax[1].imshow(k_clay)
plt.colorbar(c1, ax=ax[1])
ax[1].set_title('clay')


### we can define spatial zones based on the initial K values, assuming they are grouped as homogeneous and unique values

In [None]:
uniq_aq = np.unique(k_aq)
aq_zones_dict = dict(zip(np.arange(1,len(uniq_aq)+1), uniq_aq))
aq_zones_dict

In [None]:
uniq_clay = np.unique(k_clay)
clay_zones_dict = dict(zip(np.arange(1,len(uniq_clay)+1), uniq_clay))
clay_zones_dict

### make integer arrays of zones for the unique values of the aquifer ... 

In [None]:
aq_zones = np.zeros_like(k_aq)
for ck, cv in aq_zones_dict.items():
    aq_zones[k_aq==cv] = ck


### ... and the clay

In [None]:
# only put pilot points in the clay where the clay is actually present
clay_zones = np.zeros_like(k_aq)
clay_zones[k_clay==clay_zones_dict[1]] = 1

In [None]:
fig,ax = plt.subplots(2,2,figsize=(8,6))
ax=ax.ravel()
c = ax[0].imshow(k_aq)
plt.colorbar(c, ax=ax[0])
ax[0].set_title('K aquifer')
c1 = ax[1].imshow(k_clay)
plt.colorbar(c1, ax=ax[1])
ax[1].set_title('K clay')

c2 = ax[2].imshow(aq_zones)
plt.colorbar(c2, ax=ax[2])
ax[2].set_title('Zones aquifer')

c3 = ax[3].imshow(clay_zones)
plt.colorbar(c3, ax=ax[3])
ax[3].set_title('Zones clay');


## let's set up pilot points for the k files

In [None]:
aq_pp_df = pyemu.pp_utils.setup_pilotpoints_grid(sr=sr,  # model spatial reference
                                              ibound=aq_zones, # to which cells to setup ppoints
                                              # ibound=np.ones_like(aq_zones),
                                              prefix_dict={0:['hk_aq']}, #prefix to add to parameter names
                                              pp_dir=t_d, 
                                              tpl_dir=t_d, 
                                                 shapename=str(t_d / 'pp_aq.shp'),
                                                use_ibound_zones=True,
                                              every_n_cell=pp_aq) # pilot point spacing

In [None]:
plt.scatter(x=aq_pp_df.x,y=aq_pp_df.y, c=aq_pp_df.zone)
plt.axis('square')
plt.xlim([0,12500])

In [None]:
clay_pp_df = pyemu.pp_utils.setup_pilotpoints_grid(sr=sr,  # model spatial reference
                                              ibound=clay_zones, # to which cells to setup ppoints
                                              prefix_dict={0:['hk_clay']}, #prefix to add to parameter names
                                              pp_dir=t_d, 
                                              tpl_dir=t_d, 
                                            shapename=str(t_d / 'pp_clay.shp'),
                                                use_ibound_zones=True,
                                              every_n_cell=pp_clay,
                                              ) # pilot point spacing

In [None]:
plt.scatter(x=clay_pp_df.x,y=clay_pp_df.y, c=clay_pp_df.zone)
plt.axis('square')
plt.ylim([0,20000])
plt.xlim([0,12500])

In [None]:
list(t_d.glob('*.tpl'))

In [None]:
list(t_d.glob('*.ins'))

In [None]:
aq_pp_df.pargp

In [None]:
ok_aq = pyemu.geostats.OrdinaryKrige(aq_gs,aq_pp_df.loc[aq_pp_df.pargp=='hk_aq'])
ok_clay = pyemu.geostats.OrdinaryKrige(clay_gs,clay_pp_df.loc[clay_pp_df.pargp=='hk_clay'])


In [None]:
aq_fac_df = ok_aq.calc_factors_grid(sr, minpts_interp=1,maxpts_interp=10, )
clay_fac_df = ok_clay.calc_factors_grid(sr, minpts_interp=1,maxpts_interp=10, )


In [None]:
ok_aq.to_grid_factors_file(str(t_d / 'pp_aq.fac'))
ok_clay.to_grid_factors_file(str(t_d / 'pp_clay.fac'))


In [None]:
list(t_d.glob('*.fac'))

In [None]:
aq_fac_df

### now make a PST file

In [None]:
cwd = Path(os.getcwd())
os.chdir(t_d)
pst = pyemu.Pst.from_io_files(*pyemu.helpers.parse_dir_for_io_files('.'))
os.chdir(cwd)

### set initial parameter values

In [None]:
pars = pst.parameter_data

### first set constant parameter values

In [None]:
pars.loc[pars.parnme.str.contains('aniso')]

In [None]:
pars.loc['rv1', 'parval1'] = 1e+05
pars.loc['rv1', 'parlbnd'] = 1e+03
pars.loc['rv1', 'parubnd'] = 1e+06
pars.loc['rv1', 'pargp'] = 'riv_cond'

pars.loc['rch', 'parval1'] = 0.003641
pars.loc['rch', 'parlbnd'] = 0.003641*.9
pars.loc['rch', 'parubnd'] = 0.003641*1.1
pars.loc['rch', 'pargp'] = 'rch'

pars.loc['kaniso', 'parval1'] = 0.2
pars.loc['kaniso', 'parlbnd'] = 0.001
pars.loc['kaniso', 'parubnd'] = 0.85
pars.loc['kaniso', 'pargp'] = 'anisotropy'

pars.loc['kaniso_clay', 'parval1'] = 0.5
pars.loc['kaniso_clay', 'parlbnd'] = 0.001
pars.loc['kaniso_clay', 'parubnd'] = 0.85
pars.loc['kaniso_clay', 'pargp'] = 'anisotropy_clay'

### next group parameters by their arrays and zones

In [None]:
pars.loc[~pars.zone.isnull(), 'pargp'] = [f"{i.split('_i:')[0]}_{int(float(z))}" for 
                                          i,z in zip(pars.loc[~pars.zone.isnull()].parnme,
                                                     pars.loc[~pars.zone.isnull()].zone)]

In [None]:
pars.zone = [float(i) for i in pars.zone]

### Without enforcing too much structure, we can adjust a few starting values and bounds to inform ies of at least some of the general patterns of K values

In [None]:
pars.loc[pars.pargp.str.contains('hk_aq'), 'parval1'] = 200
pars.loc[pars.pargp.str.contains('hk_aq'), 'parlbnd'] = .01
pars.loc[pars.pargp.str.contains('hk_aq'), 'parubnd'] = 1e3
pars.loc[pars.pargp.str.contains('hk_clay'), 'parval1'] = 1
pars.loc[pars.pargp.str.contains('hk_clay'), 'parlbnd'] = .0001
pars.loc[pars.pargp.str.contains('hk_clay'), 'parubnd'] = 1e2
pars.loc[pars.pargp=='hk_aq_5', 'parval1'] = 200
pars.loc[pars.pargp=='hk_aq_4', 'parval1'] = 200
pars.loc[pars.pargp=='hk_aq_3', 'parval1'] = 200
pars.loc[pars.pargp=='hk_aq_4', 'parubnd'] = 750
pars.loc[pars.pargp=='hk_aq_3', 'parubnd'] = 500
pars.loc[pars.pargp=='hk_clay_1', 'parval1'] = 1
pars.loc[pars.pargp=='hk_clay_2', 'parval1'] = 150
pars.loc[pars.pargp=='hk_clay_2', 'parlbnd'] = 1
pars.loc[pars.pargp=='hk_clay_2', 'parubnd'] = 1000

## read in the observation values and set them

In [None]:
obsvals = pd.read_csv(t_d / 'obsvalues.dat', delim_whitespace=True, index_col=0)
obsvals.sample(5)

In [None]:
pst.observation_data.sample(5)

In [None]:
pst.observation_data.loc[obsvals.index,'obgnme'] = obsvals.obgnme
pst.observation_data.loc[obsvals.index,'obsval'] = obsvals.obsval
pst.observation_data.loc[obsvals.index,'weight'] = obsvals.weight

pst.observation_data

### now we need to write out the `forward_run` script that includes pilot point interpolation, running MF6, and postprocesses the observations to prepare them for reading

In [None]:
with open(t_d / 'forward_run.py','w') as f:
    #add imports
    f.write("import os\nimport shutil\nimport numpy as np\nimport pyemu\nimport flopy\n")
    f.write("import pandas as pd\n")
    # preprocess pilot points to grid
    f.write("_ = pyemu.geostats.fac2real('hk_aqpp.dat', factors_file='pp_aq.fac',out_file='k_aq.ref')\n")
    f.write("_ = pyemu.geostats.fac2real('hk_claypp.dat', factors_file='pp_clay.fac',out_file='k_clay.ref')\n")
    # run MF6
    f.write("pyemu.os_utils.run('mf6')\n")
    # process the observations
    f.write("outfiles = ['./riv.csv','./at.wt.csv','./chd.csv','./at.csv']\n")
    f.write("obs = pd.concat([pd.read_csv(i).T.iloc[1:] for i in outfiles])\n")
    f.write("obs.loc['DS'] = obs.loc['DS'] + obs.loc['PF']\n") # note that we have to add the two river obs
    f.write("obs.loc['UW02'] = obs.loc['U02']-obs.loc['W02']\n") # head difference targets
    f.write("obs.loc['UW08'] = obs.loc['U08']-obs.loc['W08']\n") # head difference targets
    f.write("obs.loc['UW15'] = obs.loc['U15']-obs.loc['W15']\n") # head difference targets
    f.write("obs.columns=['obsname']\n")
    f.write('obs.to_csv("allobs.dat", sep = " ")')
    

### now set a few ies-specific values and write out the file

In [None]:
pst.model_command = ['python forward_run.py']
pst.control_data.noptmax = 0

pst.pestpp_options['ies_num_reals'] = 150
pst.pestpp_options['par_sigma_range'] = 6
pst.pestpp_options['ies_no_noise']='true'
pst.write(str(t_d / 'mv.ies.pst'))

### copy the run folder over to be a master for parallel runs

In [None]:
rundir = Path('../tmprun')
if rundir.exists():
    shutil.rmtree(rundir)
shutil.copytree(t_d, rundir)

## now we need to balance the objective function - we can start based on the initial run

### run the model once through PEST

In [None]:
pyemu.os_utils.run('pestpp-ies mv.ies.pst', str(rundir))

### now read in the residuals and see how things look

In [None]:
pst_new = pyemu.Pst(str(rundir / 'mv.ies.pst'))

In [None]:
pst_new.plot(kind='phi_pie')

### what are the components of the objective function?

In [None]:
pst_new.phi_components

In [None]:
phi = pst_new.phi
phi

In [None]:
new_components = {'headdiffgroup': phi*.15,
                  'headgroup': phi*.225,
            'lakegroup': phi*.225,
            'rivgroup' : phi*.4}

In [None]:
pst_new.adjust_weights(obsgrp_dict=new_components)

In [None]:
pst_new.phi_components

In [None]:
pst_new.plot(kind='phi_pie')

In [None]:
pst_new.observation_data

In [None]:
pst_new.control_data.noptmax=3

In [None]:
pst_new.write(str(rundir / 'mv.ies.pst'))
pst_new.write(str(t_d / 'mv.ies.pst'))


### now run the parameter estimation in parallel

In [None]:
run_flag = True
if run_flag:
    pyemu.os_utils.start_workers(str(t_d),num_workers=15, master_dir=str(rundir), 
                             exe_rel_path='pestpp-ies',
                            pst_rel_path='mv.ies.pst')