# Setting and running Freyberg with pilot points...

### This should be super easy...

In [None]:
%matplotlib inline
import os, shutil
import sys
sys.path.append("..")
import numpy as np
from IPython.display import Image
import pandas as pd
import matplotlib.pyplot as plt

import flopy as flopy
import pyemu

In [None]:
import freyberg_setup as fs
fs.setup_pest_pp()
working_dir = fs.WORKING_DIR_PP
pst_name = fs.PST_NAME_PP

## Let's go explore the files that were created for us.
### somethings to note:
### - what are the parameters that start with ``w``?
### - how many pilot points are there?
### - anything else look different?

In [None]:
pst = pyemu.Pst(os.path.join(working_dir,pst_name))
pst.control_data.noptmax

In [None]:
pst.pestpp_options

### Ok - let's run this bad boy...

In [None]:
os.chdir(working_dir)
pyemu.helpers.start_slaves('.',"pestpp",pst_name,num_slaves=15,master_dir='.')
os.chdir('..')

### Let's see how we did:

In [None]:
pst.phi

### Wow - we slayed ``phi`` this time

In [None]:
pst.res.loc[pst.nnz_obs_names,:]

In [None]:
df = pd.read_csv(os.path.join(working_dir,pst_name.replace(".pst",".iobj")))
ax = plt.subplot(111)
ax.plot(df.model_runs_completed,df.total_phi,marker='.')
ax.set_xlabel("model runs")
ax.set_ylabel("$\phi$")

In [None]:
ax = plt.subplot(111,aspect="equal") 
wl_names = [n for n in pst.nnz_obs_names if n.startswith("c")]
ax.scatter(pst.res.loc[wl_names,"modelled"],pst.res.loc[wl_names,"measured"],marker='.')
ylim,xlim = ax.get_ylim(),ax.get_xlim()
mn = min(ylim[0],xlim[0])
mx = max(ylim[1],xlim[1])
ax.plot([mn,mx],[mn,mx])
ax.set_ylim(mn,mx)
ax.set_xlim(mn,mx)

### Sweet!  We nailed the heads!  This must be the perfect model in every way!  But, before we finalize the report, let's check out the parameter values:

In [None]:
df_paru = pd.read_csv(os.path.join(working_dir,pst_name.replace(".pst",".par.usum.csv")),index_col=0)
hk_pars = [p.upper() for p in pst.par_names if p.startswith("hk")]
df_hk = df_paru.loc[hk_pars,:]
ax = pyemu.helpers.plot_summary_distributions(df_hk,label_post=True)
mn = np.log10(pst.parameter_data.loc[hk_pars[0].lower(),"parlbnd"])
mx = np.log10(pst.parameter_data.loc[hk_pars[0].lower(),"parubnd"])
ax.plot([mn,mn],ax.get_ylim(),"k--")
ax.plot([mx,mx],ax.get_ylim(),"k--")

### Lots of HK pars at their bounds...not a good sign.  We should probably see how the ``HK`` field looks:

In [None]:
os.chdir(working_dir)
pst.parrep(pst_name.replace(".pst",".parb"))
pyemu.pst_utils.write_parvals_in_tplfiles(pst)
pyemu.gw_utils.fac2real("hkpp.dat",factors_file="hkpp.dat.fac",out_file="hk_layer_1.ref")
os.chdir("..")

In [None]:
df_pp = pyemu.gw_utils.pp_tpl_to_dataframe(os.path.join(working_dir,"hkpp.dat.tpl"))
m = flopy.modflow.Modflow.load(fs.MODEL_NAM,model_ws=working_dir)
ax = m.upw.hk[0].plot(colorbar=True,alpha=0.5)
ax.scatter(df_pp.x,df_pp.y,marker='x')

In [None]:
m.upw.hk[0] = os.path.join(fs.BASE_MODEL_DIR,"hk.truth.ref")
ax = m.upw.hk[0].plot(colorbar=True,alpha=0.5)
ax.scatter(df_pp.x,df_pp.y,marker='x')

### Something is wrong...how does the calibrated HK field have so much more variability than the "truth"?  We better checkout the forecasts:

In [None]:
figs, axes = pyemu.helpers.plot_summary_distributions(os.path.join(working_dir,
                    pst_name.replace(".pst",".pred.usum.csv")),subplots=True)
for ax in axes:
    fname = ax.get_title().lower()
    ylim = ax.get_ylim()
    v = pst.observation_data.loc[fname,"obsval"]
    ax.plot([v,v],ylim,"b--")
    ax.set_ylim(ylim)

### Doh! We are way off for lots of forecasts (compared to the "truth").  So what happened?  Answer: overfitting: we specified lots of parameters, so we are able to fit the observations really well - too well.  

### Even though we are able to measure water levels very precisely, the model has problems (model error), so we shouldn't expect the model to reproduce the observations so well.  But how do we control this overfitting??? 