# A gentle introduction to iterative ensemble smoothers

In [None]:
%matplotlib inline
import os, shutil
import sys
sys.path.append("..")
import numpy as np
from IPython.display import Image
import pandas as pd
import matplotlib.pyplot as plt

import flopy as flopy
import pyemu

## using the K-R model since we can plot parameter space

In [None]:
import freyberg_setup as fs
fs.setup_pest_kr()
working_dir = fs.WORKING_DIR_KR
pst_name = fs.PST_NAME_KR

In [None]:
fs.plot_model(working_dir, pst_name)

## unfix ``rch_0`` and reset bounds to be consistent with the response surface plot

In [None]:
# m = flopy.modflow.Modflow.load(fs.MODEL_NAM,model_ws=working_dir,load_only=["upw"],check=False)
pst = pyemu.Pst(os.path.join(working_dir,pst_name))
par = pst.parameter_data
pst.parameter_data.loc["rch_0","partrans"] = 'log'
pst.parameter_data
par.loc['hk', 'parlbnd'] = 3
par.loc['hk', 'parubnd'] = 12
pst.observation_data.loc[pst.nnz_obs_names,:]

### run pestpp-ies

The iterative ensemble smoother can be thought of as a blend of deterministic gauss-levenburg-marquardt and monte carlo.  In short, we use an ensemble of parameters instead of a single parameter set and we use an ensemble of observations + realizations of measurement noise as "targets".  Let's see it in action:

In [None]:
pst.pestpp_options = {"forecasts":pst.pestpp_options["forecasts"]} #reset pestpp options
pst.pestpp_options["ies_subset_size"] = 3 #run only the first 4 realizations to test phis
pst.pestpp_options["ies_lambda_mults"] = [0.1,1.0,10.0] # test several lambda values
pst.pestpp_options["lambda_scale_fac"] = [0.95,1.1] #test some line searches along each lambda
pst.pestpp_options["ies_num_reals"] = 20 #num of realizations
pst.pestpp_options["par_sigma_range"] = 3 # parameter bounds represent 3 stds
#pst.pestpp_options["ies_reg_factor"] = 2000.0 # regularization portion of composite phi
pst.control_data.noptmax = 4
pst.write(os.path.join(working_dir,pst_name))

In [None]:
os.chdir(working_dir)
pyemu.helpers.start_slaves(".","pestpp-ies",pst_name,num_slaves=15,master_dir=".")
os.chdir("..")

In [None]:
# par = pd.read_csv(pst.filename.replace(".pst",".0.par.csv"),index_col=0)
# par.columns = par.columns.map(str.lower)

## process the results

plot the initial and iteration parameter values

In [None]:
df = pd.read_csv(os.path.join(working_dir,pst_name.replace(".pst",".phi.actual.csv")))
ax = plt.subplot(111)
ax.plot(df.total_runs,df.loc[:,"mean"],lw=1.5)
j = [ax.plot(df.total_runs,df.loc[:,col], lw=0.25,color='b') for col in df.columns[7:]]

In [None]:
colors = ["b","g","m",'r']
#ax = plt.subplot(111)
#ax.set_xlabel("hk")
#ax.set_ylabel("rch_0")
import response_surface as rs
wd = os.path.join("..","freyberg_k_and_r_response_surface","freyberg_kr")
fig,ax = rs.plot_response_surface(parnames=["hk","rch_0"],
                                  pstfile=pst_name.replace(".pst",".r3.pst"),
                                  WORKING_DIR=wd,alpha=0.35,
                                  label=False, figsize=(10,10))

dfs = {}
for i,c in enumerate(colors):
    f = os.path.join(working_dir,pst_name.replace(".pst",".{0}.par.csv".format(i)))
    if not os.path.exists(f):
        continue
    print(c,f)
    df = pd.read_csv(f) 
    dfs[c] = df.loc[:,["HK","RCH_0"]]
    ax.scatter(df.HK,df.RCH_0,color=c,alpha=1.0,zorder=10)
    if i > 0:
        for ii in df.index:
            x = [df.loc[ii,"HK"],dflast.loc[ii,"HK"]]
            y = [df.loc[ii,"RCH_0"],dflast.loc[ii,"RCH_0"]]
            ax.plot(x,y,color=colors[i-1],lw=0.5,alpha=1.0,zorder=10+i)
    dflast = df
par = pst.parameter_data
ax.set_xlim(par.loc['hk',['parlbnd','parubnd']].values)
ax.set_ylim(par.loc['rch_0',['parlbnd','parubnd']].values)
plt.show()

### sweet as!

So we see each realization starts at a random location in the 2D parameter space (grey).  After the first update, the most realizations reproduce the observations much better (blue).  The final iterations (red and green) just polish...

### just for clarity

In [None]:
deter_vals = pst.observation_data.loc[pst.nnz_obs_names,"obsval"].to_dict()
obs_base = pd.read_csv(os.path.join(working_dir,pst_name.replace(".pst",".base.obs.csv")))
obs_base.columns = obs_base.columns.str.lower()
obs_base = obs_base.loc[:,pst.nnz_obs_names]
pyemu.plot_utils.ensemble_helper(obs_base,deter_vals=deter_vals)

In [None]:

pyemu.plot_utils.ensemble_helper(dfs,sync_bins=False,std_window=None)