In [None]:
%matplotlib inline
from __future__ import print_function
import os
import sys
sys.path.append("..")
import shutil
import numpy as np
import pandas as pd
from matplotlib.patches import Rectangle as rect
import matplotlib.pyplot as plt
import flopy
import pyemu

## Monte Carlo with Freyberg Pilot Points Model

### This is essentially the same work flow as ``freyberg_kr`` and ``freyberg_zone`` monte carlo notebooks - again, the power of scripting!


## Using `pyemu`

In [None]:
import freyberg_setup as fs
fs.setup_pest_pp()
working_dir = fs.WORKING_DIR_PP
pst_name = fs.PST_NAME_PP


In [None]:
fs.plot_model(working_dir, pst_name)

In [None]:
pst = pyemu.Pst(os.path.join(working_dir,pst_name))

In [None]:
mc = pyemu.MonteCarlo(pst=pst,verbose=False)

### Important: since we didn't pass a ``parcov`` arg to ``MonteCarlo``, that means the prior parameter covariance matrix was constructed from the parameter bounds (it does NOT include spatial correlation information for the pilot points - remember all the geostatics?)

In [None]:
plt.imshow(mc.parcov.as_2d)

### ```draw```

In [None]:
mc.draw(num_reals=500)
print(mc.parensemble.shape)
print(mc.parensemble.mean().head())

## plotting

### Since ```ParameterEnsemble``` is dervied from ```pandas.DataFrame```, it has all the cool methods and attributes we all love.  Let's compare the results of drawing from a uniform vs a gaussian distribution.  This may take some time.

In [None]:
mc.parensemble.columns

In [None]:
ax = plt.subplot(111)
partoplot = 'hk10'
mc.parensemble.loc[:,partoplot].plot(kind="hist",bins=50,ax=ax,alpha=0.5)

In [None]:
def plot_first_10_reals(paren):
    arrs = []
    pp_df = pyemu.gw_utils.pp_tpl_to_dataframe(os.path.join(working_dir,"hkpp.dat.tpl"))
    pp_df.loc[:,"iidx"] = pp_df.index
    for real in paren.index.values[:10]:
        pp_df.index = pp_df.parnme
        pp_df.loc[:,"parval1"] = paren.loc[real,pp_df.parnme].T
        pp_df.index = pp_df.iidx
        arr = pyemu.utils.gw_utils.fac2real(pp_df,os.path.join(working_dir,"hkpp.dat.fac"))
        arrs.append(np.log10(np.loadtxt(arr)))
    mx,mn = -1.0E+10,1.0e+10
    for arr in arrs:
        mx = max(mx,arr.max())
        mn = min(mn,arr.min())
    fig = plt.figure(figsize=(10,10))
    m = flopy.modflow.Modflow.load(fs.MODEL_NAM,model_ws=working_dir)
    for i,arr in enumerate(arrs):
        ax = plt.subplot(2,5,i+1,aspect="equal")
        m.upw.hk[0] = arr
        m.upw.hk[0].plot(axes=[ax],alpha=0.5)
        ax.scatter(pp_df.x,pp_df.y,marker='.',color='k',s=4)
        ax.set_yticklabels([])
        ax.set_xticklabels([])
    plt.show()
    
    

In [None]:
plot_first_10_reals(mc.parensemble)

### Do these look "right" (from a geologic stand point)? Lots of "random" variation (pilot points spatially near each other can have very different values)...not much structure...why?

### Let's use a full covariance matrix and see how that looks

In [None]:
v = pyemu.geostats.ExpVario(contribution=1.0,a=2500,anisotropy=1.0,bearing=0.0)
gs = pyemu.utils.geostats.GeoStruct(variograms=[v])
pp_tpl = os.path.join(working_dir,"hkpp.dat.tpl")
cov = pyemu.helpers.geostatistical_prior_builder(pst=mc.pst,struct_dict={gs:pp_tpl})
plt.imshow(cov.x,interpolation="nearest")
cov.to_dataframe().head()

In [None]:
mc_full = pyemu.MonteCarlo(pst=pst,parcov=cov)

In [None]:
mc_full.draw(500)

In [None]:
plot_first_10_reals(mc_full.parensemble)

### Much better! now let's run the parameter ensemble through the model using sweep

In [None]:
mc_full.parensemble.to_csv(os.path.join(working_dir,"sweep_in.csv"))

### Remember, this is going to give your machine a work out....watch what is going on the terminal and wait for the "*" to become a number

In [None]:
os.chdir(working_dir)
pyemu.helpers.start_slaves('.',"sweep",pst_name,num_slaves=15,master_dir='.')
os.chdir("..")

In [None]:
df_out = pd.read_csv(os.path.join(working_dir,"sweep_out.csv"),index_col=0)
df_out.columns = [c.lower() for c in df_out.columns]

In [None]:

df_out = df_out.loc[df_out.failed_flag==0,:] #drop an failed runs
df_out.head()

### So now we need to specify our "good enough" threshold.  Look back at your ``freyberg_pilot_points_2\freyberg_pilot_points_setup`` notebook and use a value slightly larger than your ``phimlim`` value...makes sense right?  We should expect "good enough" realizations to atleast be in the ball park of the target objective function....

In [None]:
acceptable_phi = 550.0
good_enough = df_out.loc[df_out.phi<acceptable_phi].index.values
print(good_enough)

# Oh crap! what happened?

In [None]:
df_out.phi.min()

In [None]:
df_out.phi.hist()

### Here is a major problem with "rejection sampling" in high dimensions: you have to run the model many many many many many times to find even a few realizations that fit the data acceptably well.  

### With all these parameters, there are so many possible combinations, that very few realizations fit the data very well...we will address this problem later, so for now, let bump our "good enough" threshold to some realizations to plot:

In [None]:
acceptable_phi = 1500.0
good_enough = df_out.loc[df_out.phi<acceptable_phi].index.values
print(good_enough)

In [None]:
for forecast in mc.pst.forecast_names:
    ax = df_out.loc[:,forecast].hist(alpha=0.5,color='0.5',normed=True)
    ax.set_yticklabels([])
    df_out.loc[good_enough,forecast].hist(ax=ax,alpha=0.5,color='b',normed=True)
    ax.set_title(forecast)   
    plt.show()
    

### Interesting results!!! We see that for the river flux and travel time forecasts, the posterior uncertainty is very similar to the prior...that means "calibration" hasn't helped us learn about these forecasts...

### As before, let's now use our knowledge of the "truth" to see how we are doing...

In [None]:
for forecast in mc.pst.forecast_names:
    ax = df_out.loc[:,forecast].hist(alpha=0.5,color='0.5',normed=True)
    ax.set_yticklabels([])
    df_out.loc[good_enough,forecast].hist(ax=ax,alpha=0.5,color='b',normed=True)
    ax.set_title(forecast)   
    ylim = ax.get_ylim()
    v = mc.pst.observation_data.loc[forecast,"obsval"]
    ax.plot([v,v],ylim,"k--",lw=2.0)
    ax.set_title(forecast)
    plt.show()

### Its hard to say how the posterior compares to the prior with so few "good enough" realizations.  To fix this problem, we have two choices:
### - run the model more times for Monte Carlo (!)
### - generate realizations that fix the data better before hand