# Set up Pycap LPR to operate with PEST++ for optimization

In [1]:
import yaml
import pandas as pd
import numpy as np
from pathlib import Path
import sys
sys.path.append('../scripts')
from wdnr_pycap import Excel2YML
import pyemu
import matplotlib.pyplot as plt
import os, platform
from pycap.analysis_project import Project
import geopandas as gpd
from shapely.geometry import Point
from  datetime import date

In [2]:
#### Path to excel input file. Note can use absolute or relative path.
pycap_inputs_excel = Path("../../PyCap/PyCap_USGS/pycap-dss2/Notebooks/Inputs/LPR_Prepped.xlsx")

#### PyCap Run Name is what all your outputs will have as a name. 
pycap_run_name = "LPR_Redux"

#### Base directory for runs
parent_run_path = Path("../pycap_runs")

#### depletion potential calculations directory

base_run_path = parent_run_path / "pycap_base"
pest_path = parent_run_path / "pycap_pest"

Reviewer_Name = "MNF"


# Parameterization for PEST++ 

In [3]:
if not pest_path.exists():
    pest_path.mkdir(parents=True)

#### let's load up the configuration file

In [4]:
with open(base_run_path / f"{pycap_run_name}.yml", 'r') as ifp:
        indat = yaml.safe_load(ifp)

#### and now parameterize inputs to vary in optimization, Monte Carlo, and other analyses

First we set up template (`TPL`) files that allow PEST++ to update model input values by name. We do this by reading in the input (`YML`) file and replacing numeric values with updated values being changed by the algorithm.

In [5]:
# global T and S
T_init = indat['project_properties']['T']
S_init = indat['project_properties']['S']

indat['project_properties']['T'] = f'~{"global_T":^16s}~'
indat['project_properties']['S'] = f'~{"global_S":^16s}~'

In [6]:
# now well-by-well apportionment and pumping rates
well_keys = [i for i in indat.keys() if i.startswith('well_')]
app_keys = [[j for j in indat[i].keys() if j.startswith('stream_apportionment')]
            for i in well_keys]
pending_wells = [i for i in well_keys if 'pending' in indat[i]['status']]
allkeys = dict(zip(well_keys, app_keys))

pars = list()
parvals = list()

# for apportionment
for k,v in allkeys.items():
    for cv in v:
        cpar = f'{k}__{cv}'
        pars.append(cpar)
        parvals.append(indat[k][cv]['apportionment'])
        indat[k][cv]['apportionment'] = f'~{cpar:^45}~'
# then again for pumping rate Q
for k,v in allkeys.items():
    cpar = f'{k}__q'
    pars.append(cpar)
    parvals.append(indat[k]['Q'])
    indat[k]['Q'] = f'~{cpar:^45}~'

In [7]:
# save out tpl file
with open(pest_path / f"{pycap_run_name}.yml.tpl", 'w') as ofp:
    ofp.write('ptf ~\n')
    documents = yaml.dump(indat, ofp, default_flow_style = False, sort_keys = False)

In [8]:
# create DataFrame of parameters
pars_df = pd.DataFrame(index = pars, data= {'parval1':parvals})

# add initial parameters to df
pars_df = pd.concat([pars_df, pd.DataFrame(index = ['global_s','global_t'], data = {'parval1':[S_init,T_init]})])


In [9]:
pars_df.sample(5)

Unnamed: 0,parval1
well_24285__stream_apportionment190,0.0234
well_23645__q,158.8
well_23631__stream_apportionment33,0.1852
well_23883__stream_apportionment97,0.0369
well_23646__q,319.4


### Next we need to be able to read in model ouputs to PEST++
Now we write an instruction file (`INS`) that can navigate model output and read it into PEST++

In [10]:
# make ins file and external forward run file
# set base case depletion observations
basedeplobs = [f"{indat[k]['name']}:bdpl" for k in indat.keys() if 'stream' in k]

# get list of unique stream names used in the run
unique_rivers = list(set([i.split(':')[0] for i in basedeplobs]))

# add in the totals/sums of proposed/existing/combined depletions for each stream
basedeplobs.extend([f'{i}:{j}:bdpl' for i in unique_rivers for j in ['total_proposed','total_existing','total_combined']])

with open(pest_path / 'basedeplobs.dat','w') as ofp:
    [ofp.write(i + '\n') for i in basedeplobs]

In [11]:
basedeplobs

['LPR:418:bdpl',
 'LPR:466:bdpl',
 'LPR:467:bdpl',
 'LPR:490:bdpl',
 'LPR:509:bdpl',
 'LPR:602:bdpl',
 'LPR:603:bdpl',
 'LPR:798:bdpl',
 'LPR:807:bdpl',
 'LPR:850:bdpl',
 'LPR:862:bdpl',
 'LPR:1013:bdpl',
 'LPR:1302:bdpl',
 'LPR:1323:bdpl',
 'LPR:1486:bdpl',
 'LPR:1584:bdpl',
 'LPR:1589:bdpl',
 'LPR:1643:bdpl',
 'LPR:1683:bdpl',
 'LPR:1860:bdpl',
 'LPR:2544:bdpl',
 'LPR:2750:bdpl',
 'LPR:2886:bdpl',
 'LPR:3473:bdpl',
 'LPR:3949:bdpl',
 'LPR:4171:bdpl',
 'LPR:23610:bdpl',
 'LPR:23611:bdpl',
 'LPR:23618:bdpl',
 'LPR:23620:bdpl',
 'LPR:23627:bdpl',
 'LPR:23629:bdpl',
 'LPR:23630:bdpl',
 'LPR:23631:bdpl',
 'LPR:23635:bdpl',
 'LPR:23637:bdpl',
 'LPR:23638:bdpl',
 'LPR:23639:bdpl',
 'LPR:23645:bdpl',
 'LPR:23646:bdpl',
 'LPR:23648:bdpl',
 'LPR:23653:bdpl',
 'LPR:23695:bdpl',
 'LPR:23697:bdpl',
 'LPR:23698:bdpl',
 'LPR:23699:bdpl',
 'LPR:23700:bdpl',
 'LPR:23709:bdpl',
 'LPR:23710:bdpl',
 'LPR:23715:bdpl',
 'LPR:23716:bdpl',
 'LPR:23720:bdpl',
 'LPR:23721:bdpl',
 'LPR:23722:bdpl',
 'LPR:23723

In [12]:
# add time-series
# choose wells with individual time-series outputs.
allstrmobs = [f"{indat[k]['name']}" for k in indat.keys() if "stream" in k]
output_ts = allstrmobs 

# times you want to look at individual outputs, in this case just the depletions in the 5th year
times = range(365*4,365*5+1) 
ts_obs = []
for c_ts in output_ts:
    ts_obs.extend([f'{c_ts}__{i}' for i in times])
allobs = basedeplobs + ts_obs

with open(pest_path / 'ts_obs.dat' , 'w') as ofp:
    [ofp.write(c_ts + '\n') for c_ts in output_ts]

### Now read in the base case observation values for depletion data

In [13]:
base_data = pd.read_csv(base_run_path/"output" / f'{pycap_run_name}.table_report.base_stream_depletion.csv', index_col=0)
# read in the observation names and make a DataFrame to keep the results in
bdplobs = pd.read_csv(pest_path/'basedeplobs.dat', header=None)
bdplobs.columns =['obsname']
bdplobs.index = bdplobs.obsname
bdplobs['obs_values'] = np.nan

# now map the actual output values to the DataFrame
for cob in bdplobs.obsname:
    riv,wel,_ = cob.split(':')
    bdplobs.loc[cob, 'obs_values'] = base_data.loc[wel][riv]

In [14]:
bdplobs.sample(5)

Unnamed: 0_level_0,obsname,obs_values
obsname,Unnamed: 1_level_1,Unnamed: 2_level_1
LPR:23851:bdpl,LPR:23851:bdpl,0.04740724
LPR:58403:bdpl,LPR:58403:bdpl,0.0005532323
LPR:23870:bdpl,LPR:23870:bdpl,2.365863e-07
LPR:24033:bdpl,LPR:24033:bdpl,7.002443e-05
LPR:71110:bdpl,LPR:71110:bdpl,0.01107334


### Next for time series - read in the results for the 5th year only for each well

In [15]:
ts_data = pd.read_csv(base_run_path / "output" / f'{pycap_run_name}.table_report.all_ts.csv',index_col=0)
ts_data.columns = ts_data.columns.str.split('__').str[-1]
ts_path = pest_path / 'ts_obs.dat' 
output_ts = [i.strip() for i in open(ts_path, 'r').readlines()]
ts_df = pd.DataFrame(index= ts_obs,  data = {'obsname':ts_obs, 'obs_values':np.nan})
for cob in ts_df.index:
    criv,ctime=cob.split('__')
    ts_df.loc[cob, 'obs_values'] = ts_data.loc[int(ctime)][criv]

In [16]:
ts_df.sample(5)

Unnamed: 0,obsname,obs_values
LPR:24227__1533,LPR:24227__1533,0.001195
LPR:70881__1494,LPR:70881__1494,0.002631
LPR:1584__1522,LPR:1584__1522,0.03691
LPR:24307__1588,LPR:24307__1588,0.000303
LPR:23889__1678,LPR:23889__1678,0.040347


### We can combine all the outputs into a single dataframe and make the instruction file we'll need to read in the results

In [17]:
allout = pd.concat([bdplobs,ts_df])
allout['obs_values'].to_csv(pest_path / 'allobs.out', sep = ' ', header=None)

with open(pest_path / 'allobs.out.ins', 'w') as ofp:
    ofp.write('pif ~\n')
    [ofp.write(f'l1 w !{i}!\n') for i in allout.index]

### Now we need to make a PEST control file to orchestrate everything. Luckily, `pyemu` makes this straightforward now that we have made the `tpl` and `ins` files

In [18]:
cwd = os.getcwd()
os.chdir(pest_path)
pst = pyemu.Pst.from_io_files(*pyemu.utils.parse_dir_for_io_files('.'))
os.chdir(cwd)

error parsing metadata from 'obsnme', continuing


In [19]:
pars = pst.parameter_data

# let's clean up some of the data and add important values

In [23]:
# name paramter groups according to the type of parameter
pars.loc[pars.parnme.str.contains("global"), "pargp"] = "global"
pars.loc[pars.parnme.str.endswith("q"), "pargp"] = "pumping"
pars.loc[pars.parnme.str.contains("stream"), "pargp"] = "apportionment"
# set initial values
pars.loc[pars_df.index,'parval1'] = pars_df.parval1

In [24]:
pars

Unnamed: 0,parnme,partrans,parchglim,parval1,parlbnd,parubnd,pargp,scale,offset,dercom
global_s,global_s,log,factor,0.1200,1.100000e-10,1.100000e+10,global,1.0,0.0,1
global_t,global_t,log,factor,1700.0000,1.100000e-10,1.100000e+10,global,1.0,0.0,1
well_1013__q,well_1013__q,log,factor,125.1000,1.100000e-10,1.100000e+10,pumping,1.0,0.0,1
well_1013__stream_apportionment11,well_1013__stream_apportionment11,log,factor,0.0013,1.100000e-10,1.100000e+10,apportionment,1.0,0.0,1
well_1302__q,well_1302__q,log,factor,88.2000,1.100000e-10,1.100000e+10,pumping,1.0,0.0,1
...,...,...,...,...,...,...,...,...,...,...
well_94302__stream_apportionment324,well_94302__stream_apportionment324,log,factor,0.8775,1.100000e-10,1.100000e+10,apportionment,1.0,0.0,1
well_94988__q,well_94988__q,log,factor,11.1000,1.100000e-10,1.100000e+10,pumping,1.0,0.0,1
well_94988__stream_apportionment325,well_94988__stream_apportionment325,log,factor,0.0703,1.100000e-10,1.100000e+10,apportionment,1.0,0.0,1
well_95068__q,well_95068__q,log,factor,40.3000,1.100000e-10,1.100000e+10,pumping,1.0,0.0,1


In [27]:
# set apportionment changes
# how much to change apportionment
del_apport = 0.1

# apport lower and upper bound
#pars.loc[pars.index.str.startswith('well_'),'parlbnd'] = pars.loc[pars.index.str.startswith('well_'),'parval1']-.1
pars.loc[pars.pargp=="apportionment", "parlbnd"] = pars.loc[pars.pargp=="apportionment", "parval1"] - del_apport
pars.loc[pars.pargp=="apportionment", "parubnd"] = pars.loc[pars.pargp=="apportionment", "parval1"] + del_apport

# force overall lowerbound and upperbound as defined in .xlsx inputs
pars.loc[(pars.pargp=='apportionment')&
    (pars.parlbnd < indat['project_properties']['Min_FracInt']),
    'parlbnd'] = indat['project_properties']['Min_FracInt']
pars.loc[(pars.pargp=='apportionment')&
    (pars.parubnd > indat['project_properties']['Max_FracInt']),
    'parubnd'] = indat['project_properties']['Max_FracInt']

In [28]:
# set lower and upper bounds for T and S as defined in .xlsx inputs
pars.loc[(pars.pargp=='global_t'),'parlbnd'] = indat['project_properties']['Min_T']
pars.loc[(pars.pargp=='global_t'),'parubnd'] = indat['project_properties']['Max_T']

pars.loc[(pars.pargp=='global_s'),'parlbnd'] = indat['project_properties']['Min_S']
pars.loc[(pars.pargp=='global_s'),'parubnd'] = indat['project_properties']['Max_S']

# set transforms for all parameters to 'none', except for T which can be a log-transform
pars.partrans = 'none'
pars.loc['global_t','partrans']='log'

In [None]:
# setup and save pst file
pst.control_data.noptmax = -1
pst.model_command = [f'python run_pycap.py {yml_name} ts_obs.dat']
pst.pestpp_options['par_sigma_range']=6
pst.pestpp_options['ies_num_reals'] = 50

pst.write(str(template_dir / 'prior_mc.pst'), version=2)

# Copy over dir

current_dir = os.getcwd()
PyCap_Path = os.path.join(current_dir, '..','pycap')

shutil.copy2('run_pycap.py',template_dir/'run_pycap.py')
shutil.copy2(yml_name, template_dir)

if (template_dir/'pycap-dss').exists():
    shutil.rmtree(template_dir/'pycap-dss'/'pycap')

shutil.copytree(PyCap_Path, template_dir/'pycap-dss'/'pycap')

if (template_dir/'output').exists():
    shutil.rmtree(template_dir/'output')

shutil.copytree(os.path.join(current_dir, 'output'), template_dir/'output')

In [None]:
current_dir = os.getcwd()
PyCap_Path = os.path.join(current_dir, '..','..','..','pycap-dss','pycap-dss','pycap')
PyCap_Path

In [None]:
os.getcwd()

In [None]:
# run MC
run_global_sen = False
run_global_sen_distrib = False
run_ies = True


if MonteCarlo == "Y":
    SetupMC()

    os.chdir(cwd)
    if run_ies:
        if 'window' in platform.platform().lower():
            pestpp_ex = '../../dependencies/win_bin/pestpp-ies'
        else:
            pestpp_ex = '../../dependencies/mac_bin/pestpp-ies'
        pyemu.os_utils.start_workers(
                worker_dir=str(template_dir), 
                exe_rel_path=pestpp_ex,
                pst_rel_path='prior_mc.pst', 
                num_workers=20,
                worker_root='./', 
                master_dir='MASTER')

        

# Post-processing Monte Carlo Results

#### Look at raw PHI histograms

In [None]:
obs = pd.read_csv('./MASTER/prior_mc.0.obs.csv', index_col=0)
obs.index = obs.index.astype(str)
obs = obs.loc[reals_to_keep] 
ts_obs = obs[[i for i in obs.columns if not i.endswith(':bdpl')]]
bdpl_obs = obs[[i for i in obs.columns if i.endswith(':bdpl')]]
ts_lox = np.unique([i.split('__')[0] for i in ts_obs.columns])
tmp2 = ts_obs.T.copy()
tmp2['time'] = [int(i.split('__')[1]) for i in tmp2.index]
tmp2['lox'] = [str(i.split('__')[0]) for i in tmp2.index]
all_time_series = pd.read_csv('output/{}.table_report.all_ts.csv'.format(pycap_run_name), index_col=0)
cum_depl = all_time_series.sum(axis=1)



In [None]:
ts_lox

In [None]:
if MonteCarlo == "Y":
    # check phi
    phi = pd.read_csv('./MASTER/prior_mc.phi.actual.csv',index_col=0).T.iloc[5:]
    phi_org = phi.copy()
    phi.columns =['phi']
    phi.hist(bins=50)

    # only change if there are a very small number of extreme outliers
    phitoohigh = 20000000
    phi = phi.loc[phi.phi<phitoohigh]
    phi.hist(bins=50)

    reals_to_keep = phi.index
    reject_reals = list(set(phi_org.index) - set(reals_to_keep)) # Keep rejected realizations in case we want to review them
    print('Number of MC runs kept is ', len(reals_to_keep))

    # read in all MC observations
    # Note: Large file, takes a while to read in. 
    obs = pd.read_csv('./MASTER/prior_mc.0.obs.csv', index_col=0)
    obs.index = obs.index.astype(str)

    obs = obs.loc[reals_to_keep] 

    # parse into time-series
    ts_obs = obs[[i for i in obs.columns if not i.endswith(':bdpl')]]
    bdpl_obs = obs[[i for i in obs.columns if i.endswith(':bdpl')]]

    ts_lox = np.unique([i.split('__')[0] for i in ts_obs.columns])
    
    tmp2 = ts_obs.T.copy()
    tmp2['time'] = [int(i.split('__')[1]) for i in tmp2.index]
    tmp2['lox'] = [str(i.split('__')[0]) for i in tmp2.index]

    # look at timeseries outputs
    all_time_series = pd.read_csv('output/{}.table_report.all_ts.csv'.format(pycap_run_name), index_col=0)
    
    # sum up cumulative depletions (NOTE: assumes only one stream used in MC)
    cum_depl = all_time_series.sum(axis=1)

    # plot up time-series depletions
    fig,ax = plt.subplots(nrows=1, ncols=1)
    ax.plot(all_time_series, alpha=0.6, c='k', lw=0.2)
    ax.plot(cum_depl, c='r')
    #fig.set_title('Cumulative Depl in all streams in Monte Carlo Analysis')
    plt.savefig('output/cum_depl_MC_{}.png'.format(pycap_run_name))

    # plot up each well's last year of depletion
    fig,ax = plt.subplots(nrows=len(ts_lox), ncols=1, sharex=True, figsize=(10,4))
    i=0
                
    for cts in ts_lox:
        tmp = tmp2.loc[tmp2['lox']==cts]
        tmp.set_index('time', drop = True, inplace=True)
        tmp = tmp.drop('lox', axis=1)
        ax =tmp.plot(alpha=0.4, c='k', lw=0.1, legend=None)
        tmp['base'].plot(c='r')
        ax.set_title(cts)
        i+=1
    plt.tight_layout()
    plt.savefig('output/individual_depl_MC_{}.png'.format(pycap_run_name))
