# how many parameters is "enough"?

### During these excersizes, we have seen a traditional parameter estimation then uncertainty analysis workflow.  We also saw for the pilot points notebooks, about 500 model runs to "calibrate" the model and then another 500 to 1000 to find a decent set (ensemble) of realizations that fit the data acceptably well.  

### But...even using pilot points as a parameterization device is a form of regularization: ideally, we would have an HK parameter in every model cell. But, that is too expensive in terms of model runs for calibration and uncertainty analysis (at least as we have learned it so far).

In [None]:
%matplotlib inline
import os, shutil
import sys
sys.path.append("..")
import numpy as np
from IPython.display import Image
import pandas as pd
import matplotlib.pyplot as plt

import flopy as flopy
import pyemu

In [None]:
import freyberg_setup as fs
fs.setup_pest_gr()
working_dir = fs.WORKING_DIR_GR
pst_name = fs.PST_NAME_GR

In [None]:
fs.plot_model(working_dir, pst_name)

In [None]:
pst = pyemu.Pst(os.path.join(working_dir,pst_name))
"number of parameters: {0} : WTF!".format(pst.npar)

In [None]:
par = pst.parameter_data
par.loc[par.pargp=="hk","partrans"] = "none"
#par.loc[par.pargp=="hk","parubnd"] = 15.0
par

In [None]:
pst.plot(kind="prior",unique_only=True)

In [None]:
pst.pestpp_options["ies_subset_size"] = 5
pst.pestpp_options["ies_num_reals"] = 30
pst.pestpp_options["ies_lambda_mults"] = [0.01,1.0,10.0]
#pst.pestpp_options["parcov_filename"] = "prior.cov"
pst.pestpp_options["lambda_scale_fac"] = [0.95,1.1]
pst.control_data.noptmax = 4
pst.write(os.path.join(working_dir,pst_name))

In [None]:
os.chdir(working_dir)
pyemu.helpers.start_slaves(".","pestpp-ies",pst_name,num_slaves=15,master_dir=".")
os.chdir("..")

In [None]:
it = 4
m = flopy.modflow.Modflow.load(fs.MODEL_NAM,model_ws=working_dir,load_only=["upw"],check=False)
par = pst.parameter_data

init_par = pst.filename.replace(".pst",".0.par.csv")
final_par = pst.filename.replace(".pst",".{0}.par.csv".format(it))
init_par = pd.read_csv(init_par,index_col=0)
init_par.columns = init_par.columns.str.lower()
final_par = pd.read_csv(final_par,index_col=0)
final_par.columns = final_par.columns.str.lower()
gr_par = par.loc[par.pargp=="hk",:].copy()
gr_par.loc[:,"i"] = gr_par.parnme.apply(lambda x: int(x.split('_')[1][1:]))
gr_par.loc[:,"j"] = gr_par.parnme.apply(lambda x: int(x.split('_')[2][1:]))
gr_par.loc[:,"parval1"] = init_par.loc["1",gr_par.parnme]
mn,mx = gr_par.parval1.min(),gr_par.parval1.max()
ax1,ax2 = plt.subplot(121),plt.subplot(122)
arr = np.zeros((m.nrow,m.ncol))
arr[gr_par.i,gr_par.j] = gr_par.parval1
c = ax1.imshow(arr,vmin=mn,vmax=mx)
#plt.colorbar(c)
gr_par.loc[:,"parval1"] = final_par.loc["1",gr_par.parnme]
arr = np.zeros((m.nrow,m.ncol))
arr[gr_par.i,gr_par.j] = gr_par.parval1
c = ax2.imshow(arr,vmin=mn,vmax=mx)
plt.colorbar(c)

In [None]:
pyemu.plot_utils.ensemble_helper({"0.5":init_df.loc[:,pst.forecast_names],"b":final_df.loc[:,pst.forecast_names]},
                                 deter_vals=pst.observation_data.loc[pst.forecast_names,"obsval"].to_dict(),bins=20,
                                deter_range=True,std_window=1)

## It's (always) about the Prior

pestpp-ies, like every tool we use for Bayes, is highly dependent on the Prior parameter distributions.  pestpp-ies makes that more explicit.  So let's build and use a full Prior covariance matrix:

In [None]:
grid_groups = []
grid_prefixes = ['hk','r0','r1','ss','sy',"pr"]
for prefix in grid_prefixes:
    gr_par = par.loc[par.pargp==prefix].copy()
    gr_par.loc[:,"i"] = gr_par.parnme.apply(lambda x: int(x.split('_')[1][1:]))
    gr_par.loc[:,"j"] = gr_par.parnme.apply(lambda x: int(x.split('_')[2][1:]))
    gr_par.loc[:,"x"] = m.sr.xcentergrid[gr_par.i,gr_par.j]
    gr_par.loc[:,"y"] = m.sr.ycentergrid[gr_par.i,gr_par.j]
    grid_groups.append(gr_par)


In [None]:
v = pyemu.geostats.ExpVario(contribution=1.0,a=10000.0)
gs = pyemu.geostats.GeoStruct(variograms=v)
cov = pyemu.helpers.geostatistical_prior_builder(pst=pst,struct_dict={gs:grid_groups},sigma_range=6)
cov.to_ascii(os.path.join(working_dir,"prior.cov"))
plt.imshow(np.ma.masked_where(cov.x==0,cov.x))

In [None]:
#par.loc[par.pargp=="pr","partrans"] = "fixed"
#pst.npar_adj

## run pestpp-ies

First let's run without regularization just to see how good we can do...

In [None]:
pst.pestpp_options["ies_subset_size"] = 5
pst.pestpp_options["ies_num_reals"] = 30
pst.pestpp_options["ies_lambda_mults"] = [0.01,1.0,10.0]
pst.pestpp_options["parcov_filename"] = "prior.cov"
pst.pestpp_options["lambda_scale_fac"] = [0.95,1.1]
pst.control_data.noptmax = 4
pst.write(os.path.join(working_dir,pst_name))

In [None]:
os.chdir(working_dir)
pyemu.helpers.start_slaves(".","pestpp-ies",pst_name,num_slaves=15,master_dir=".")
os.chdir("..")

### post processing

In [None]:
df = pd.read_csv(pst.filename.replace(".pst",".phi.actual.csv"))
plt.plot(df.total_runs,df.loc[:,"mean"],marker='.')
plt.show()

### 1 to 1

In [None]:
init_obs = pst.filename.replace(".pst",".0.obs.csv")
#pick which interation results to visualize
it = 4
final_obs = init_obs.replace("0","{0}".format(it)) 
init_df = pd.read_csv(init_obs,index_col=0)
init_df.columns = init_df.columns.map(str.lower)
final_df = pd.read_csv(final_obs,index_col=0)
final_df.columns = final_df.columns.map(str.lower)
fig = pyemu.plot_utils.ensemble_res_1to1({'0.5':init_df,"b":final_df},pst=pst)

### forecast posteriors

In [None]:
pyemu.plot_utils.ensemble_helper({"0.5":init_df.loc[:,pst.forecast_names],"b":final_df.loc[:,pst.forecast_names]},
                                 deter_vals=pst.observation_data.loc[pst.forecast_names,"obsval"].to_dict(),bins=10,
                                deter_range=True,std_window=1)

ruh-roh...

### plot a realization of hk...

In [None]:
it = 4
m = flopy.modflow.Modflow.load(fs.MODEL_NAM,model_ws=working_dir,load_only=["upw"],check=False)
par = pst.parameter_data

init_par = pst.filename.replace(".pst",".0.par.csv")
final_par = pst.filename.replace(".pst",".{0}.par.csv".format(it))
init_par = pd.read_csv(init_par,index_col=0)
init_par.columns = init_par.columns.str.lower()
final_par = pd.read_csv(final_par,index_col=0)
final_par.columns = final_par.columns.str.lower()
gr_par = par.loc[par.pargp=="hk",:].copy()
gr_par.loc[:,"i"] = gr_par.parnme.apply(lambda x: int(x.split('_')[1][1:]))
gr_par.loc[:,"j"] = gr_par.parnme.apply(lambda x: int(x.split('_')[2][1:]))
gr_par.loc[:,"parval1"] = init_par.loc["1",gr_par.parnme]
mn,mx = gr_par.parval1.min(),gr_par.parval1.max()
ax1,ax2 = plt.subplot(121),plt.subplot(122)
arr = np.zeros((m.nrow,m.ncol))
arr[gr_par.i,gr_par.j] = gr_par.parval1
c = ax1.imshow(arr,vmin=mn,vmax=mx)
#plt.colorbar(c)
gr_par.loc[:,"parval1"] = final_par.loc["1",gr_par.parnme]
arr = np.zeros((m.nrow,m.ncol))
arr[gr_par.i,gr_par.j] = gr_par.parval1
c = ax2.imshow(arr,vmin=mn,vmax=mx)
plt.colorbar(c)

## Let's regularize!

In [None]:
pst.pestpp_options["ies_subset_size"] = 4
pst.pestpp_options["ies_lambda_mults"] = [0.1,1.0,10.0]
pst.pestpp_options["lambda_scale_fac"] = [0.5,0.75,1.0]
pst.pestpp_options["parcov_filename"] = "prior.cov"
#pst.pestpp_options["ies_use_prior_scaling"] = True
pst.pestpp_options["ies_reg_factor"] = 1.0
pst.pestpp_options["ies_initial_lambda"] = 5000.0
pst.control_data.noptmax = 4
pst.write(os.path.join(working_dir,pst_name))

In [None]:
os.chdir(working_dir)
pyemu.helpers.start_slaves(".","pestpp-ies",pst_name,num_slaves=15,master_dir=".")
os.chdir("..")

### re-post process

In [None]:
df = pd.read_csv(pst.filename.replace(".pst",".phi.actual.csv"))
plt.plot(df.total_runs,df.loc[:,"mean"],marker='.')
plt.show()

In [None]:
init_obs = pst.filename.replace(".pst",".0.obs.csv")
#pick which interation results to visualize
it = 4
final_obs = init_obs.replace("0","{0}".format(it)) 
init_df = pd.read_csv(init_obs,index_col=0)
init_df.columns = init_df.columns.map(str.lower)
final_df = pd.read_csv(final_obs,index_col=0)
final_df.columns = final_df.columns.map(str.lower)
fig = pyemu.plot_utils.ensemble_res_1to1({'0.5':init_df,"b":final_df},pst=pst)

In [None]:
pyemu.plot_utils.ensemble_helper({"0.5":init_df.loc[:,pst.forecast_names],"b":final_df.loc[:,pst.forecast_names]},
                                 deter_vals=pst.observation_data.loc[pst.forecast_names,"obsval"].to_dict(),bins=20,
                                deter_range=True,std_window=0.5)

In [None]:
it = 4
m = flopy.modflow.Modflow.load(fs.MODEL_NAM,model_ws=working_dir,load_only=["upw"],check=False)
par = pst.parameter_data

init_par = pst.filename.replace(".pst",".0.par.csv")
final_par = pst.filename.replace(".pst",".{0}.par.csv".format(it))
init_par = pd.read_csv(init_par,index_col=0)
init_par.columns = init_par.columns.str.lower()
final_par = pd.read_csv(final_par,index_col=0)
final_par.columns = final_par.columns.str.lower()
gr_par = par.loc[par.pargp=="hk",:].copy()
gr_par.loc[:,"i"] = gr_par.parnme.apply(lambda x: int(x.split('_')[1][1:]))
gr_par.loc[:,"j"] = gr_par.parnme.apply(lambda x: int(x.split('_')[2][1:]))
gr_par.loc[:,"parval1"] = init_par.loc["1",gr_par.parnme]
mn,mx = gr_par.parval1.min(),gr_par.parval1.max()
ax1,ax2 = plt.subplot(121),plt.subplot(122)
arr = np.zeros((m.nrow,m.ncol))
arr[gr_par.i,gr_par.j] = gr_par.parval1
c = ax1.imshow(arr,vmin=mn,vmax=mx)
#plt.colorbar(c)
gr_par.loc[:,"parval1"] = final_par.loc["1",gr_par.parnme]
arr = np.zeros((m.nrow,m.ncol))
arr[gr_par.i,gr_par.j] = gr_par.parval1
c = ax2.imshow(arr,vmin=mn,vmax=mx)
plt.colorbar(c)