# Run PESTPP-IES

In [None]:
%matplotlib inline
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
plt.rcParams['font.size']=12
import flopy
import pyemu
%matplotlib inline

## SUPER IMPORTANT: SET HOW MANY PARALLEL WORKERS TO USE

In [None]:
num_workers = 20

In [None]:
t_d = "template"
m_d = "master_ies"

In [None]:
pst = pyemu.Pst(os.path.join(t_d,"freyberg.pst"))
pst.write_par_summary_table(filename="none")

### Run PESTPP-IES in original mode and post process

In [None]:
#pst.pestpp_options = {}
pst.pestpp_options["ies_num_reals"] = 75  # enough?
pst.pestpp_options["ies_par_en"] = "prior.jcb"
#pst.pestpp_options["ies_bad_phi_sigma"] = 1.75
pst.pestpp_options["overdue_giveup_fac"] = 10.0
pst.control_data.noptmax = 3



In [None]:
pst.write(os.path.join(t_d,"freyberg_ies.pst"))

In [None]:
pyemu.os_utils.start_slaves(t_d,"pestpp-ies","freyberg_ies.pst",num_slaves=num_workers,master_dir=m_d)

A cheap phi progress plot

In [None]:
phi = pd.read_csv(os.path.join(m_d,"freyberg_ies.phi.actual.csv"),index_col=0)
phi.index = phi.total_runs
phi.iloc[:,6:].apply(np.log10).plot(legend=False,lw=0.5,color='k')
plt.ylabel('log \$Phi$')
plt.figure()
phi.iloc[-1,6:].hist()
plt.title('Final $\Phi$ Distribution');

Plot forecast prior and posterior histograms with "truth" (red line)

In [None]:
oe_pr = pd.read_csv(os.path.join(m_d,"freyberg_ies.0.obs.csv"),index_col=0)
oe_pt = pd.read_csv(os.path.join(m_d,"freyberg_ies.{0}.obs.csv".format(pst.control_data.noptmax)),index_col=0)
obs = pst.observation_data
fnames = pst.pestpp_options["forecasts"].split(",")
for forecast in fnames:
    ax = plt.subplot(111)
    oe_pr.loc[:,forecast].hist(ax=ax,color="0.5",alpha=0.5, label='prior')
    oe_pt.loc[:,forecast].hist(ax=ax,color="b",alpha=0.5, label='posterior')
    ax.plot([obs.loc[forecast,"obsval"],obs.loc[forecast,"obsval"]],ax.get_ylim(),"r", label='truth')
    ax.set_title(forecast)
    ax.legend(loc='upper right')
    plt.show()

Plot parameter histograms by group

In [None]:
pe_pr = pd.read_csv(os.path.join(m_d,"freyberg_ies.0.par.csv"),index_col=0)
pe_pt = pd.read_csv(os.path.join(m_d,"freyberg_ies.{0}.par.csv".format(pst.control_data.noptmax)),index_col=0)
par = pst.parameter_data
pdict = par.groupby("pargp").groups
pyemu.plot_utils.ensemble_helper({"0.5":pe_pr,"b":pe_pt},plot_cols=pdict)

In [None]:
# pyemu.plot_utils.ensemble_change_summary(pe_pr,pe_pt,pst=pst,bins=20)
# par = pst.parameter_data
# li = par.partrans=="log"
# pe_pr.loc[:,li] = pe_pr.loc[:,li].apply(np.log10)
# pe_pr.shape

### PESTPP-IES with simple temporal localization (and common sense)

Now let's add some localization.  The obvious stuff is temporal - scenario parameters can't influence historic observations (and the inverse is true) so let's tell PESTPP-IES about this.  Also, should porosity be adjusted at all given the observations we have???

In [None]:
par = pst.parameter_data
dont_groups = [g for g in pst.par_groups if "pr" in g]  # porosity parameter groups
dont_groups.extend(["gr_rech3","pp_rech1","cn_rech5"])  #parameter groups for future recharge
dont_groups = [g for g in dont_groups if g in pst.adj_par_groups]
dont_groups.append("welflux_001")  # future wel flux
dont_groups

In [None]:
dont_pars = par.loc[par.pargp.apply(lambda x: x in dont_groups),"parnme"].tolist()
dont_pars.append("welflux_001")

In [None]:
cols = pst.adj_par_groups
cols.remove("welflux")
cols.extend(["welflux_000","welflux_001"])  # for two kpers
loc = pyemu.Matrix.from_names(pst.nnz_obs_names,cols).to_dataframe()
loc.loc[:,:]= 1.0
loc.loc[:,dont_groups] = 0.0
pyemu.Matrix.from_dataframe(loc).to_ascii(os.path.join(t_d,"loc.mat"))

In [None]:
pst.pestpp_options["ies_localizer"] = "loc.mat"
pst.write(os.path.join(t_d,"freyberg_ies.pst"))
pyemu.os_utils.start_slaves(t_d,"pestpp-ies","freyberg_ies.pst",num_slaves=num_workers,master_dir=m_d)

In [None]:
phi = pd.read_csv(os.path.join(m_d,"freyberg_ies.phi.actual.csv"),index_col=0)
phi.index = phi.total_runs
phi.iloc[:,6:].apply(np.log10).plot(legend=False,lw=0.5,color='k')
plt.ylabel('log $\Phi$')
plt.show()
phi.iloc[-1,6:].hist()
plt.title('Final $\Phi$ Distribution');

In [None]:
oe_pr = pd.read_csv(os.path.join(m_d,"freyberg_ies.0.obs.csv"),index_col=0)
oe_pt = pd.read_csv(os.path.join(m_d,"freyberg_ies.{0}.obs.csv".format(pst.control_data.noptmax)),index_col=0)
obs = pst.observation_data
fnames = pst.pestpp_options["forecasts"].split(",")
for forecast in fnames:
    ax = plt.subplot(111)
    oe_pr.loc[:,forecast].hist(ax=ax,color="0.5",alpha=0.5, label='prior')
    oe_pt.loc[:,forecast].hist(ax=ax,color="b",alpha=0.5, label='posterior')
    ax.plot([obs.loc[forecast,"obsval"],obs.loc[forecast,"obsval"]],ax.get_ylim(),"r", label='truth')
    ax.set_title(forecast)
    ax.legend(loc='upper right')
    plt.show()

In [None]:
pe_pr = pd.read_csv(os.path.join(m_d,"freyberg_ies.0.par.csv"),index_col=0)
pe_pt = pd.read_csv(os.path.join(m_d,"freyberg_ies.{0}.par.csv".format(pst.control_data.noptmax)),index_col=0)
#pe_pr.index = pe_pt.index
#par = pst.parameter_data
print(pe_pr.shape,pe_pt.shape)
pdict = par.groupby("pargp").groups
pyemu.plot_utils.ensemble_helper({"0.5":pe_pr,"b":pe_pt},plot_cols=pdict)
#pyemu.plot_utils.ensemble_change_summary(pe_pr,pe_pt,pst=pst,bins=20)

### PESTPP-IES with par-by-par distance based localization


In [None]:
m = flopy.modflow.Modflow.load("freyberg.nam",model_ws="template")

In [None]:
par = pst.parameter_data

gr_par = par.loc[par.pargp.apply(lambda x: "gr" in x and "prsity" not in x),:].copy()
print(gr_par.pargp.unique())
gr_par.groupby("pargp").groups
gr_par.loc[:,"i"] = gr_par.parnme.apply(lambda x: int(x[-6:-3]))
gr_par.loc[:,"j"] = gr_par.parnme.apply(lambda x: int(x[-3:]))
gr_par.loc[:,"x"] = gr_par.apply(lambda x: m.sr.xcentergrid[x.i,x.j],axis=1)
gr_par.loc[:,"y"] = gr_par.apply(lambda x: m.sr.ycentergrid[x.i,x.j],axis=1)

obs = pst.observation_data

nobs = obs.loc[obs.obgnme=="calhead",:].copy()
nobs.loc[:,"i"] = nobs.obsnme.apply(lambda x: int(x.split('_')[2]))
nobs.loc[:,"j"] = nobs.obsnme.apply(lambda x: int(x.split('_')[3]))
nobs.loc[:,"x"] = nobs.apply(lambda x: m.sr.xcentergrid[x.i,x.j],axis=1)
nobs.loc[:,"y"] = nobs.apply(lambda x: m.sr.ycentergrid[x.i,x.j],axis=1)

In [None]:
pp_tpl = [f for f in os.listdir(t_d) if "pp" in f and f.endswith(".tpl")]
pp_tpl_dfs = [pyemu.pp_utils.pp_tpl_to_dataframe(os.path.join(t_d,f)) for f in pp_tpl]
pp_par = pd.concat(pp_tpl_dfs)
pp_par.index = pp_par.parnme
#pp_par = par.loc[par.pargp.apply(lambda x: "pp" in x),:].copy()
pp_par.head()

## We will set up localization such that parameters are only informed by observations within a user-specfied distance (we can use 5000 meters)

In [None]:
loc = pyemu.Matrix.from_names(pst.nnz_obs_names,pst.adj_par_names).to_dataframe()
loc.loc[:,:] = 1.0
loc_dist = 5000.0
sadj = set(pst.adj_par_names)
print('obsname            fraction grid retained   fraction pilot points retained')
for oname in obs.loc[obs.obgnme=="calhead","obsnme"]:
    xx,yy = nobs.loc[oname,['x','y']]
    
    # localization for grid-based parameters
    gr_par.loc[:,"dist"] = gr_par.apply(lambda x: (x.x - xx)**2 + (x.y - yy)**2,axis=1).apply(np.sqrt)
    gr_too_far = gr_par.loc[gr_par.dist > loc_dist,"parnme"]
    gr_too_far = gr_too_far.loc[gr_too_far.apply(lambda x: x in sadj)]
    loc.loc[:,gr_too_far] = 0.0
    
    # localization for pilot point parameters
    pp_par.loc[:,"dist"] = pp_par.apply(lambda x: (x.x - xx)**2 + (x.y - yy)**2,axis=1).apply(np.sqrt)
    pp_too_far = pp_par.loc[pp_par.dist > loc_dist,"parnme"]
    pp_too_far = pp_too_far.loc[pp_too_far.apply(lambda x: x in sadj)]
    loc.loc[oname,pp_too_far] = 0.0
    print(oname,gr_too_far.shape[0]/gr_par.shape[0],pp_too_far.shape[0]/pp_par.shape[0])
    
loc.loc[:,dont_pars] = 0.0
#spars = par.loc[par.parnme.apply(lambda x: "ss" in x or "sy" in x),"parnme"]
#loc.loc[:,spars] = 0.0
print('\n\nTotal number of parameters still informed by each observation')
loc.sum(axis=1)

In [None]:
pyemu.Matrix.from_dataframe(loc).to_coo(os.path.join(t_d,"loc.jcb"))
pst.pestpp_options["ies_localizer"] = "loc.jcb"
pst.write(os.path.join(t_d,"freyberg_ies.pst"))

### now let's run it

In [None]:
pyemu.os_utils.start_slaves(t_d,"pestpp-ies","freyberg_ies.pst",num_slaves=num_workers,master_dir=m_d)

In [None]:
phi = pd.read_csv(os.path.join(m_d,"freyberg_ies.phi.actual.csv"),index_col=0)
phi.index = phi.total_runs
phi.iloc[:,6:].apply(np.log10).plot(legend=False,lw=0.5,color='k')
plt.ylabel('log $\Phi$')
plt.show()
phi.iloc[-1,6:].hist()
plt.title('Final $\Phi$ Distribution');

In [None]:
oe_pr = pd.read_csv(os.path.join(m_d,"freyberg_ies.0.obs.csv"),index_col=0)
oe_pt = pd.read_csv(os.path.join(m_d,"freyberg_ies.{0}.obs.csv".format(pst.control_data.noptmax)),index_col=0)
obs = pst.observation_data
fnames = pst.pestpp_options["forecasts"].split(",")
for forecast in fnames:
    ax = plt.subplot(111)
    oe_pr.loc[:,forecast].hist(ax=ax,color="0.5",alpha=0.5, label='prior')
    oe_pt.loc[:,forecast].hist(ax=ax,color="b",alpha=0.5, label='posterior')
    ax.plot([obs.loc[forecast,"obsval"],obs.loc[forecast,"obsval"]],ax.get_ylim(),"r", label='truth')
    ax.set_title(forecast)
    ax.legend(loc='upper right')
    plt.show()

In [None]:
pe_pr = pd.read_csv(os.path.join(m_d,"freyberg_ies.0.par.csv"),index_col=0)
pe_pt = pd.read_csv(os.path.join(m_d,"freyberg_ies.{0}.par.csv".format(pst.control_data.noptmax)),index_col=0)
par = pst.parameter_data
pdict = par.groupby("pargp").groups
pyemu.plot_utils.ensemble_helper({"0.5":pe_pr,"b":pe_pt},plot_cols=pdict)
#pyemu.plot_utils.ensemble_change_summary(pe_pr,pe_pt,pst=pst,bins=20)

## DIY: ensemble size limbo: how low can you go?