In [None]:
%matplotlib inline
from __future__ import print_function
import os
import numpy as np
import pandas as pd
from matplotlib.patches import Rectangle as rect
import matplotlib.pyplot as plt
import platform
if 'window' in platform.platform().lower():
    pref = ''
else:
    pref = './'


## Null-Space Monte Carlo with Freyberg Model

Adapted from the examples provided with `pyemu`


## Using `pyemu`

In [None]:
import pyemu
import shutil, os

basedir = os.path.join('..','..','models','Freyberg','Freyberg_pilotpoints')
[shutil.copy2(os.path.join(basedir,cf),cf) for cf in os.listdir(basedir)];
    

First create a linear analysis object.  We will use `MonteCarlo`  derived type, which allows us to use some sampling based methods.  We pass it the name of the jacobian matrix file.  Since we don't pass an explicit argument for `parcov` or `obscov`, `pyemu` attempts to build them from the parameter bounds and observation weights in a pest control file (.pst) with the same base case name as the jacobian.  Since we are interested in forecast uncertainty as well as parameter uncertainty, we also pass the names of the forecast sensitivity vectors we are interested in, which are stored in the jacobian as well.  Note that the `forecasts` argument can be a mixed list of observation names, other jacobian files or PEST-compatible ASCII matrix files.

In [None]:
# get the list of forecast names from the pest++ argument in the pest control file
jco_file = 'freyberg_pp.jcb'
pst_file = 'freyberg_pp.pst'
pst = pyemu.Pst(pst_file)

In [None]:
mc = pyemu.MonteCarlo(jco=jco_file, forecasts=pst.pestpp_options["forecasts"].split(','),verbose=False)
print("observations,parameters in jacobian:",mc.jco.shape)

In [None]:
mc.jco.df()

In [None]:
pst.pestpp_options["forecasts"].split(',')

## Drawing from the prior
Each ``MonteCarlo`` instance has a ``parensemble`` attribute which itself is an instance of ``Ensemble`` class, which is derived from ``pandas.DataFrame``.  What all that means is that the parameter ensembles behave just like ```DataFrame```s

### ```draw```
The ``draw`` method is the main entry point into getting realizations. It accepts several optional arguments.  Without any args, it makes a single draw from the prior, which uses a $\boldsymbol{\mu}$ (mean) vector of the parameter values listed in the pest control file:


In [None]:
mc.draw()
print(mc.parensemble.shape)

``draw`` also accepts a ``num_reals`` argument to specify the number of draws to make:

In [None]:
mc.draw(num_reals=200)
print(mc.parensemble.shape)
print(mc.parensemble.mean().head())

Notice that each call to ``draw`` overwrites the previous draws.  ```draw``` also accepts a ``par_file`` argument in the case that you want to use a pest .par file as the $\boldsymbol{\mu}$ vector.

In [None]:
mc.draw(num_reals=200)
print(mc.parensemble.mean().head())

Notice how the mean value for ``rond00`` is different.  ``draw`` also accepts an ``obs`` boolean flag to control include drawing a realization of observation noise.  If ```obs``` is True, then a complimentary ```obsensemble``` attribute is also populated.  The last optional flag for ```draw``` is ``enforce_bounds``, which controls whether parameter bounds are explicitly respected:

```.draw``` also accepts an optional ``how`` argument that controls the type of distribution to draw from.  ``how`` can be either "gaussian" (default) or "uniform".  

## plotting

Since ```ParameterEnsemble``` is dervied from ```pandas.DataFrame```, it has all the cool methods and attributes we all love.  Let's compare the results of drawing from a uniform vs a gaussian distribution.  This may take some time.

In [None]:
mc.parensemble.columns

In [None]:
mc.draw(num_reals=500,how="uniform")
ax = plt.subplot(111)
partoplot = 'hkpp10'
mc.parensemble.loc[:,partoplot].plot(kind="hist",bins=50,ax=ax,alpha=0.5, normed=True)
mc.draw(num_reals=500,how="gaussian")
mc.parensemble.loc[:,partoplot].plot(kind="hist",bins=50,ax=ax,alpha=0.5, normed=True)
plt.legend(['uniform','gaussian'])

## null-space projection 

This is too easy.  Once you have drawn parameter realization, use the ```project_parensemble()``` method.  This method accepts 3 optional arguemnts: ``nsing``: number of singular components to demarcate the solution-null space boundary, ``par_file``: a pest .par file to use as the final parameter values, and ``inplace``, which is a boolean flag to control whether a new ```Ensemble``` instance should be created and returned.  The most important of these is ``nsing``.  If it is not passed, then ``nsing`` is set based on the ratio between the largest and smallest singular values >= 1.0e-6 

In [None]:
mc.draw(num_reals=1000,enforce_bounds=True)
unprojected50 = mc.parensemble.copy()
mc.project_parensemble(nsing=50,par_file="freyberg_pp.par") #use nsing=50 for demonstration purposes
projected50 = mc.parensemble.copy()

In [None]:
partoplot = 'hkpp10'
ax = plt.subplot(111)
unprojected50.loc[:,partoplot].plot(kind="hist",bins=50,ax=ax,alpha=0.5)
projected50.loc[:,partoplot].plot(kind="hist",bins=50,ax=ax,alpha=0.5)
plt.legend(['unprojected','projected'])

We see that if we use a large number of singular components, then the null-space projection process greatly reduces the uncertainty in the ``rch_1`` parameter.  Note that using 50 singular components greatly overeastimates the dimension of the range space of the normal matrix ($\mathbf{J}^T\mathbf{Q}\mathbf{J}$) and is likely not justifiable, since only 12 observations are being used for inversion. Let's redo the redo the null-space projection operation with 12 singular components:

In [None]:
mc.draw(num_reals=1000,enforce_bounds=True)
unprojected12 = mc.parensemble.copy()
mc.project_parensemble(nsing=12,par_file="freyberg_pp.par") #use nsing=50 for demonstration purposes
projected12 = mc.parensemble.copy()

In [None]:
partoplot = 'hkpp10'
ax = plt.subplot(111)
unprojected12.loc[:,partoplot].plot(kind="hist",bins=50,ax=ax,alpha=0.5, normed=True)
projected12.loc[:,partoplot].plot(kind="hist",bins=50,ax=ax,alpha=0.5, normed=True)
plt.legend(['unprojected','projected'])

Now we see that the null-space projection operation only slightly increases the kurtosis of the distribution

## How does this all pan out when we run the models?

Let's look at three options:  
1. unconstrained Monte Carlo (like we did before with just K and R)  
2. posterior sampling Null Space Monte Carlo 
3. posterior sampling Null Space Monte Carlo with a single iteration using existing Jacobian


### first, unconstrained

In [None]:
mc = pyemu.MonteCarlo(jco=jco_file, forecasts=pst.pestpp_options["forecasts"].split(','),verbose=False)
mc.draw(num_reals=1000, enforce_bounds=True,how='gaussian')
mc.parensemble.to_csv('sweep_in.csv')
shutil.copy('sweep_in.csv','unconstrained_pars.csv')

In [None]:
# now run using sweep
if not os.path.exists('unconbase'):
    os.mkdir('unconbase')
[shutil.copy2(cf,os.path.join('unconbase',cf)) 
 for cf in os.listdir(os.getcwd()) if not os.path.isdir(cf)];

In [None]:
pyemu.helpers.start_slaves('unconbase',"{0}sweep".format(pref),"freyberg_pp.pst",num_slaves=20,master_dir='.')

In [None]:
# copy over the results
shutil.copy('sweep_out.csv','unconstrained_results.csv')

In [None]:
uncon_results = pd.read_csv('unconstrained_results.csv')
uncon_results.phi.hist(bins=50)

### now with the constrained samples (from the posterior) with 50 singular values

In [None]:
mc = pyemu.MonteCarlo(jco=jco_file, forecasts=pst.pestpp_options["forecasts"].split(','),verbose=False)
mc.draw(num_reals=1000, enforce_bounds=True,how='gaussian')
mc.project_parensemble(nsing=50,par_file="freyberg_pp.par") #use nsing=50 for demonstration purposes
mc.parensemble.to_csv('sweep_in.csv')
shutil.copy('sweep_in.csv','constrained_50sv_pars.csv')

In [None]:
# now run using sweep
if not os.path.exists('conbase50'):
    os.mkdir('conbase50')
[shutil.copy2(cf,os.path.join('conbase50',cf)) 
 for cf in os.listdir(os.getcwd()) if not os.path.isdir(cf)];

In [None]:
pyemu.helpers.start_slaves('conbase50',"{0}sweep".format(pref),"freyberg_pp.pst",num_slaves=20,master_dir='.')

In [None]:
# copy over the results
shutil.copy('sweep_out.csv','constrained_50_results.csv')

In [None]:
con50_results = pd.read_csv('constrained_50_results.csv')
ax = con50_results.phi.hist(bins=50, alpha=.5, normed=True)
uncon_results.phi.hist(bins=50, alpha=.5, ax=ax, normed=True)
plt.legend(['con50','uncon'])

### now with the constrained samples (from the posterior) with 12 singular values

In [None]:
mc = pyemu.MonteCarlo(jco=jco_file, forecasts=pst.pestpp_options["forecasts"].split(','),verbose=False)
mc.draw(num_reals=1000, enforce_bounds=True,how='gaussian')
mc.project_parensemble(nsing=12,par_file="freyberg_pp.par") #use nsing=50 for demonstration purposes
mc.parensemble.to_csv('sweep_in.csv')
shutil.copy('sweep_in.csv','constrained_12sv_pars.csv')

In [None]:
# now run using sweep
if not os.path.exists('conbase12'):
    os.mkdir('conbase12')
[shutil.copy2(cf,os.path.join('conbase12',cf)) 
 for cf in os.listdir(os.getcwd()) if not os.path.isdir(cf)];

In [None]:
pyemu.helpers.start_slaves('conbase12',"{0}sweep".format(pref),"freyberg_pp.pst",num_slaves=20,master_dir='.')

In [None]:
# copy over the results
shutil.copy('sweep_out.csv','constrained_12_results.csv')

In [None]:
con12_results = pd.read_csv('constrained_12_results.csv')
ax = con12_results.phi.hist(bins=50, alpha=.5)
uncon_results.phi.hist(bins=50, alpha=.5, ax=ax)
plt.legend(['con12','uncon'])


## Now let's subject this last parameter set to a single linearization using the existing Jacobian

In [None]:
if not os.path.exists('conbase12relin'):
    os.mkdir('conbase12relin')
[shutil.copy2(cf,os.path.join('conbase12relin',cf)) 
 for cf in os.listdir(os.getcwd()) if not os.path.isdir(cf)];

In [None]:
mc.write_psts(os.path.join('conbase12relin','freyberg_pp_12relin_real'),
              existing_jco="freyberg_pp.jcb",noptmax=1)

In [None]:
# make a simple script to run all of these
with open(os.path.join('conbase12relin','runall.py'), 'w') as ofp:
    ofp.write('import os \n')
    ofp.write('for cf in os.listdir(os.getcwd()): \n')
    ofp.write("    if cf.endswith('.pst') and 'real' in cf: \n")
    if 'window' in platform.platform().lower():
        ofp.write("        os.system('pest++ {0} '.format(cf))")
    else:
        ofp.write("        os.system('pest++ {0} '.format(cf))")
        
    
    

### Now we can read in all the PHI values from the `iobj` files

In [None]:
os.path.join('conbase12relin', 'freyberg_pp_12relin_real{0}.iobj'.format(0))

In [None]:
tmp = pyemu.Pst(os.path.join('conbase12relin', 'freyberg_pp_12relin_real{0}.pst'.format(0)))
con12relin_forecasts = pd.DataFrame.from_records(
    dict(zip([i.upper() for i in tmp.res.name.values],tmp.res.modelled.values)), index=[0])

con12relin_results = pd.read_csv(os.path.join('conbase12relin', 'freyberg_pp_12relin_real{0}.iobj'.format(0)))  

for creal in range(1, 1000):
    try:
        con12relin_results=con12relin_results.append(pd.read_csv(
                os.path.join('conbase12relin', 'freyberg_pp_12relin_real{0}.iobj'.format(creal))))  
        tmp = pyemu.Pst(os.path.join('conbase12relin', 'freyberg_pp_12relin_real{0}.pst'.format(creal)))

        con12relin_forecasts=con12relin_forecasts.append(pd.DataFrame.from_records(
    dict(zip([i.upper() for i in tmp.res.name.values],tmp.res.modelled.values)), index=[0]))
    except:
        print('No dice on realization {0}'.format(creal))

In [None]:
ax=con12relin_results.loc[con12relin_results.iteration==0].total_phi.hist(bins=50, alpha=0.5)
con12relin_results.loc[con12relin_results.iteration==1].total_phi.hist(bins=50, alpha=0.5,ax=ax)
plt.legend(['nonlinearized','linearized'])

In [None]:
con12_results.columns

# We can see what this all does to PHI, but what about our forecasts?

In [None]:
pltpar='FR30C16'
ax=con12_results.loc[con12_results[pltpar]>0][pltpar].hist(bins=50, alpha=.5)
con50_results.loc[con50_results[pltpar]>0][pltpar].hist(bins=50, alpha=.5, ax=ax)
plt.legend(['con12','con50'])

In [None]:
pltpar='FR04C9'
ax=con12_results.loc[con12_results[pltpar]>0][pltpar].hist(bins=50, alpha=.5)
con50_results.loc[con50_results[pltpar]>0][pltpar].hist(bins=50, alpha=.5, ax=ax)
plt.legend(['con12','con50'])

In [None]:
pltpar='RIVFLUX_FORE'
ax=con12_results.loc[con12_results[pltpar]>-1e8][pltpar].hist(bins=50, alpha=.5)
con50_results.loc[con50_results[pltpar]>-1e8][pltpar].hist(bins=50, alpha=.5, ax=ax)
plt.legend(['con12','con50'])

In [None]:
pltpar='RIVFLUX_FORE'
ax=con12_results.loc[con12_results[pltpar]>-1e8][pltpar].hist(bins=50, alpha=.5)
con12relin_forecasts.loc[con12relin_forecasts[pltpar]>-1e8][pltpar].hist(bins=50, alpha=.5, ax=ax)
plt.legend(['con12','con50'])

In [None]:
pltpar='TRAVEL_TIME'
ax=con12_results.loc[con12_results[pltpar]>-1e8][pltpar].hist(bins=50, alpha=.5, normed=True)
con12relin_forecasts.loc[con12relin_forecasts[pltpar]>-1e8][pltpar].hist(bins=50, alpha=.5, ax=ax,normed=True)
plt.legend(['con12','con50'])

In [None]:
con12relin_results