# Run and process the prior monte carlo and pick a "truth" realization

A great advantage of exploring a synthetic model is that we can enforce a "truth" and then evaluate how our various attempts to estimate it perform. One way to do this is to run a monte carlo ensemble of multiple parameter realizations and then choose one of them to represent the "truth". That will be accomplished in this notebook.

In [None]:
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
plt.rcParams['font.size']=12
import flopy
import pyemu
%matplotlib inline

## SUPER IMPORTANT: SET HOW MANY PARALLEL WORKERS TO USE

In [None]:
num_workers = 20

### set the `t_d` or "template directory" variable to point at the template folder and read in the PEST control file

In [None]:
t_d = "template_history"
pst = pyemu.Pst(os.path.join(t_d,"freyberg.pst"))

Load the previously generated parameter ensemble and inspect (again!)...

In [None]:
pe = pyemu.ParameterEnsemble.from_binary(pst=pst,filename=os.path.join(t_d,"prior.jcb"))
#pe.loc[:,should_fix] = 1.0
pe.to_csv(os.path.join(t_d,"sweep_in.csv"))
pe.shape

In [None]:
pe.loc[:,"hk031"]

In [None]:
pe.loc[:,"hk031"].hist(bins=50,facecolor="0.5")

look! hk is log-normal-ish

Lets run the first realization through the pest interface for a test:

In [None]:
# replace the par vals with the first row in the par ensemble
pst.parameter_data.loc[pe.columns,"parval1"] = pe.iloc[0,:]
pst.control_data.noptmax = 0
pst.pestpp_options["overdue_giveup_fac"] = 1.5
pst.write(os.path.join(t_d,"test.pst"))
pyemu.os_utils.run("pestpp-ies test.pst",cwd=t_d)
res = pyemu.pst_utils.read_resfile(os.path.join(t_d,"test.base.rei"))
res.loc[pst.nnz_obs_names,:]

In [None]:
nz_obs = pst.observation_data.loc[pst.nnz_obs_names,:].copy()
nz_obs.loc[:,"datetime"] = pd.to_datetime(nz_obs.obsnme.apply(lambda x: x.split("_")[-1]))    
for nz_group in pst.nnz_obs_groups:
    nz_obs_group = nz_obs.loc[nz_obs.obgnme==nz_group,:]
    fig,ax = plt.subplots(1,1,figsize=(10,2))
    ax.plot(nz_obs_group.datetime,nz_obs_group.obsval,"r-",label="observed")
    ax.plot(nz_obs_group.datetime,pst.res.loc[nz_obs_group.obsnme,"modelled"],color="0.5",label="simulated")
    ax.set_title(nz_group)
    ax.legend()
plt.show()

### run the prior ensemble in parallel locally
This takes advantage of the program `pestpp-swp` which runs a parameter sweep through a set of parameters. By default, `pestpp-swp` reads in the ensemble from a file called `sweep_in.csv` which in this case we made just above.

In [None]:
m_d = "master_prior_sweep"
pst = pyemu.Pst(os.path.join(t_d,"freyberg.pst"))
pst.pestpp_options["overdue_giveup_fac"] = 1.5
pst.write(os.path.join(t_d,"freyberg_sweep.pst"))
pyemu.os_utils.start_workers(t_d,"pestpp-swp","freyberg_sweep.pst",num_workers=num_workers,worker_root=".",master_dir=m_d)

### Load the output ensemble and plot a few things



In [None]:
obs_df = pd.read_csv(os.path.join(m_d,"sweep_out.csv"),index_col=0)
print('number of realization in the ensemble before dropping: ' + str(obs_df.shape[0]))

### drop any failed runs 

In [None]:
obs_df = obs_df.loc[obs_df.failed_flag==0,:]
print('number of realization in the ensemble **after** dropping: ' + str(obs_df.shape[0]))

In [None]:
obs_df.iloc[0,:]

### confirm which quantities were identified as forecasts

In [None]:
fnames = pst.forecast_names
fnames

### now we can plot the distributions of each forecast

In [None]:
for forecast in fnames:
    plt.figure()
    ax = obs_df.loc[:,forecast].plot(kind="hist",color="0.5",alpha=0.5)
    ax.set_title(forecast)

### Many modeling analyses could stop right here to avoid the ill-effects of history matching...

### Let's also plot observed vs (stochastic) simulated:

In [None]:
nz_obs = pst.observation_data.loc[pst.nnz_obs_names,:].copy()
nz_obs.loc[:,"datetime"] = pd.to_datetime(nz_obs.obsnme.apply(lambda x: x.split("_")[-1]))
pst_base = pyemu.Pst(os.path.join(t_d,"freyberg.pst"))
for nz_group in pst.nnz_obs_groups:
    nz_obs_group = nz_obs.loc[nz_obs.obgnme==nz_group,:]
    fig,ax = plt.subplots(1,1,figsize=(10,2))
    
    [ax.plot(nz_obs_group.datetime,obs_df.loc[r,nz_obs_group.obsnme],color="0.5",lw=0.1) for r in obs_df.index]
    #[ax.plot(nz_obs_group.datetime,oe_pt.loc[r,nz_obs_group.obsnme],color="b",lw=0.1,alpha=0.5) for r in oe_pt.index]
    ax.plot(nz_obs_group.datetime,nz_obs_group.obsval,"r-")
    #[ax.plot(nz_obs_group.datetime,oe_base.loc[r,nz_obs_group.obsnme],color="r",lw=0.1,alpha=0.5) for r in oe_base.index]
    
    ax.set_title(nz_group)
    #vmin = min(nz_obs_group.obsval.min(),oe_pt.loc[:,nz_obs_group.obsnme].min().min())
    #vmax = max(nz_obs_group.obsval.max(),oe_pt.loc[:,nz_obs_group.obsnme].max().max())
    vmin = nz_obs_group.obsval.min() * 0.9
    vmax = nz_obs_group.obsval.max() * 1.1
    ax.set_ylim(vmin,vmax)
plt.show()

In [None]:
pyemu.plot.plot_utils.ensemble_res_1to1(obs_df,pst)

### Everything ok with these plots?  Any comments?