## This script loads EPT files, calculates YF and median age YF, then makes 1:1 plots to compare simplified models to the most complex model.  The goal is to evaluate if and how complexity influences age metrics across scales.

#### The script reads in the model_grid.csv file for each model and uses that to tie each particle's initial location to select categories, such as the HUC, NLCD, coarse fraction, etc.

#### This code has been updated to only work with the FLUX-weighted age simulations.  

In [None]:
__author__ = 'Paul Juckem'
%matplotlib notebook
import os, sys
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib
import matplotlib.cm as cm
from datetime import datetime
import gdal
from gdal import ogr, osr
import gen_mod_functions as gm
import flopy as fp
import pickle
from ipywidgets import interact, Dropdown, Text
from IPython.display import display
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
import csv

try:
    import rasterio
except:
    print('Install rasterio into your python environment. Or, skip plotting of geotiffs at the end of this notebook')

# Try not to use these!!
#from gen_mod_dict import *
#from model_specs import *

modifier = False
def ReturnEntry(sender):
    modifier.value = intext.value


## Specify user input, including list of models to compare and which axes to plot them, plus attributes to analyze.

In [None]:
surf_aq_lays = 3  # deepest layer of the surficial aquifer.

simulate_list = ['FWP1L_zK', 'FWP5L_zK', 'FWP5L_hK'] 
nrow, ncol = 930, 650  # just easier to hardcode this.

# for plotting 1:1 graphs
x1, y1, y2 = 'FWP5L_hK', 'FWP1L_zK', 'FWP5L_zK'

# Columns in the model_grid.csv file to keep.  Purge all others.
mg_columns = ['node_num', 'row', 'col', 'HUC6', 'HUC8', 'HUC10', 'HUC12', 'ibound', 'gage_id', 'coarse_flag', 
              'qu_atlas', 'catchment', 'ssurgo', 'stream_order', 'surfmat']

category = 'HUC8' 
genHUCdict = {'Oconto':'04030104', 'TWR':'0403020218'}

minptl = 50  # The minimum number of particles for EACH model within each HUC (eg: if FWP5L has 2000, 
               # but FWP1L has 800, none get plotted in the 1:1 plots.  Still included in the RTD plots.)
               # 1000 seems to be a good number for 1:1 plots as it includes only HUCs with really refined RTDs;
               # however, 50 or 100 seems more reasonable if we want to visualize spatial patterns because it
               # allows more HUCs to be plotted.  50-100 is based on visually inspecting RTD curves.
                
purge_hucs = [40602, 4060200, 406020000, 40602000000]  # all hucs for Lake Michigan      

HUCtiffdict = {'HUC6':'E:/HUCS/WBD_4n7/HUC6_UTMft_FWP.tiff', 
              'HUC8':'E:/HUCS/WBD_4n7/HUC8_UTMft_FWP.tiff', 
              'HUC10':'E:/HUCS/WBD_4n7/HUC10_UTMft_FWP.tiff',
              'HUC12':'E:/HUCS/WBD_4n7/HUC12_UTMft_FWP.tiff'}

#HUCproprast = './vK_lay1_hK-vK.tif'
HUCproprastlist = ['./vK_lay1_5h-5z.tif', './vK_lay2_5h-5z.tif', './vK_lay3_5h-5z.tif', './vK_lay1_hK-vK.tif',
                 './vani_lay1_5h-5z.tif', './vani_lay2_5h-5z.tif', './vani_lay3_5h-5z.tif',
                 './T_lay1_5h-5z.tif', './T_lay2_5h-5z.tif', './T_lay3_5h-5z.tif', './GlacT_5h-5z.tif',
                 './glac_satthick_5h-5z.tif', './BrRCH_5h-5z.tif', './RCH_5h-5z.tif', 
                 './hK_lay1_5h-5z.tif', './hK_lay2_5h-5z.tif', './hK_lay3_5h-5z.tif',
                 './BrT_5h-5z.tif', './glacT2BrT_5h-5z.tif', './RCHUZF_5h-5z.tif', './UZF_5h-5z.tif', 
                 './RCHcbb_5h-5z.tif', './rch-over-satKs_5h-5z.tif', './sat-weighted_Ks_5h-5z.tif']
HUCfluxrastlist = ['./SWleak_5h-5z.tif', './MNW2_5h-5z.tif', './SFR_5h-5z.tif']

#### Prep the script for the models to be analyzed

In [None]:
homes = ['../Models']
fig_dir = '../Figures'

if not os.path.exists(fig_dir):
    os.mkdir(fig_dir)  # PFJ:  dst is not defined; changed to fig_dir.

mfpth = '../executables/MODFLOW-NWT_1.0.9/bin/MODFLOW-NWT_64.exe'
mp_exe_name = '../executables/modpath.6_0/bin/mp6x64.exe' 

#mf_start_date_str = '01/01/1900' 
#mp_release_date_str = '01/01/2017' 

age_cutoff = 65
#year_cutoff = '01/01/1952'

surf_aq_lays = 3  # deepest layer of the surficial aquifer.

dir_list = []
modlist = []
i = 0
r = 0

#model = {}
path_dict = {}
#modify = []
#mod_type = {}
dfdict = {}
totp = {}
for home in homes:
    if os.path.exists(home):
        for dirpath, dirnames, filenames in os.walk(home):
            for f in filenames:
                if os.path.splitext(f)[-1] == '.nam':
                    mod = os.path.splitext(f)[0]
                    i += 1
                    if mod in simulate_list:
                        modlist.append(mod)
                        dir_list.append(dirpath)
                        r += 1
                        path_dict[mod] = dirpath
                               
print('    {} models read'.format(i))
print('These {} models will be analyzed: {}'.format(r, modlist))

In [None]:
# read-in the model_grid.csv file for each model.  Then create a dataframe from each csv & pickle file.

dfdict = {}
totp = {}

for model in modlist:
    g = os.path.join(path_dict[model], 'model_grid.csv')
    try:
        df = pd.read_csv(g)
        df.ibound.replace(0, np.nan, inplace=True)
        df = df[df.ibound.notnull()]
        df = df[mg_columns]  # keep just the desired fields
        # re-calculate 2D cell number
        df['cellnum2d'] = df.row * ncol + df.col
        #df.rename(columns={'node_num':'initial_node_num'}, inplace=True)

        p = os.path.join(path_dict[model], '{}_flux_all_zones_mod.pickle'.format(model))
        eptu = pd.read_pickle(open(p, 'rb'))
        eptu['cellnum2d'] = (eptu['Initial Row']-1) * ncol + (eptu['Initial Column'] -1)  # -1 to convert to 0-based
        eptu_mg = eptu.join(df, on='cellnum2d', lsuffix='_ept', rsuffix='_mg')
        eptu_mg = eptu_mg[eptu_mg['Initial Layer'] <= surf_aq_lays]  # ensure that we're only analyzing Glacial!
        dfdict[model] = eptu_mg
        totp[model] = eptu_mg.rt.count()

    except (AttributeError, ValueError, IOError, IndexError):
        print('ERROR. THIS CODE BLOCK DID NOT COMPLETE. TROUBLE-SHOOT AND TRY AGAIN')
        print('The error occured while working on this model: {}'.format(model))
        raise SystemExit()

In [None]:
# Plot age distributions for all 3 models by HUC ID, for selected HUC scale.

uniques = dfdict[modlist[0]][category].unique()
# remove any HUCs listed in purge_hucs
for h in purge_hucs:
    ind = np.where(uniques==h)
    uniques = np.delete(uniques, ind)

n_uni = len(uniques)
sum_p = {}
for mn in modlist:
    sum_p[mn] = 0
    
#if portrait:
#    vplots = int(np.ceil(len(uniques)/ 3.0))
#    figsize = (10, 3*vplots)
#    CS, CSaxes = plt.subplots(vplots, 3, figsize=figsize)

#else:
if n_uni <= 20:
    vplots = int(np.ceil(n_uni/ 3.0))
    figsize = (12, 3*vplots)
    CS, CSaxes = plt.subplots(vplots, 3, figsize=figsize)
else:
    hplots = int(np.round(np.sqrt(n_uni)))
    vplots = int(np.ceil(np.sqrt(n_uni)))
    figsize = (hplots*4, hplots*3)
    CS, CSaxes = plt.subplots(vplots, hplots, figsize=figsize)
        
colors_line = plt.cm.brg(np.linspace(0, 1, len(modlist)))

for ax, cat_val in zip(CSaxes.flat, uniques):
    n = []
    for i, md in enumerate(modlist):

        rt = dfdict[md].loc[dfdict[md][category]==cat_val, 'rt']  # 'rt' is "raw time" in the dataframe
        rt.sort_values(inplace=True)
        n.append(rt.count())
        sum_p[md] = sum_p[md] + rt.count()
        y_rt = np.linspace(0, 1, rt.shape[0])

        ax.plot(rt, y_rt, c=colors_line[i], label=md)
        ax.plot((65, 65), (0.2, 1), 'k--')
        
        title = '{}: {}'.format(category, cat_val)
        ax.set_title(title, fontsize=12)

        ax.set_xscale('log')
        ax.set_xlim(1e0, 1e3)
        ax.set_ylim(0, )

        ax.legend(loc=0, frameon=False, fontsize=8)#, bbox_to_anchor=(0.20, 0.2), ncol=1)
        ax.set_xlabel('Residence time, in years')
        ax.set_ylabel('Cumulative frequency')
        if len(n) == len(modlist):
            nmin, nmax = min(n), max(n)
            ax.text(5,0.02, '# particles: {:,} - {:,}'.format(nmin, nmax))
        
CS.suptitle('Comparison of glacial particle time distributions by {} for the FWP models'.format(category), fontsize=18)  
CS.tight_layout()
#if portrait:
#    CS.subplots_adjust(top= 1-(n_uni/3*0.021), hspace=0.5)  # so suptitle doesn't get over-lapped when using tight_layout
#else:#  6: 0.86 is ideal.  18: 0.89-0.9 is ideal, 102: 0.95-0.96,  443: 0.97
if n_uni < 18:
    #CS.subplots_adjust(top= 0.86, hspace=0.85)
    CS.subplots_adjust(top= 0.86)
elif n_uni < 100:
    CS.subplots_adjust(top= 0.89, hspace=0.55)
elif n_uni < 400:
    CS.subplots_adjust(top= 0.95, hspace=0.55)
else:
    CS.subplots_adjust(top= 0.97, hspace=0.55)

#dst = 'RTD_compare--{}'.format(category)
#loc = os.path.dirname(path[modlist[0]])  # should go up one directory to the dir that houses all of the models.
#dst_pth = os.path.join(loc, dst)
#plt.savefig(dst_pth)
#plt.close()

In [None]:
# 1:1 plot of Young Fraction btwn 5LhK against 1L and 5LzK models for select HUC scale.

uniques = dfdict[modlist[0]][category].unique()
# remove any HUCs listed in purge_hucs
for h in purge_hucs:
    ind = np.where(uniques==h)
    uniques = np.delete(uniques, ind)

yfhucdict = {}
yfvalues = []
skiphuc = []
for j, cat_val in enumerate(uniques):  # each HUC ID
    yfmoddict = {}
    for i, md in enumerate(modlist):  # each of the 3 FWP models
        #df = dfdict[md].loc[dfdict[md][category]==cat_val]
        df = dfdict[md][dfdict[md][category]==cat_val].copy()
        if df.rt.count() >= minptl:
            youngdf = df.loc[df.rt < 65]
            yf = youngdf.rt.count() / df.rt.count()
            yfmoddict[md] = yf
            yfvalues.append(yf)
        else:
            skiphuc.append(cat_val)
            break
    if cat_val not in skiphuc:
        yfhucdict[cat_val] = yfmoddict    
        
ddd = pd.DataFrame(yfhucdict).T
print(mean_squared_error(np.array(ddd[x1]), np.array(ddd[y1])))
r2_y1 = r2_score(np.array(ddd[x1]), np.array(ddd[y1]))
r2_y2 = r2_score(np.array(ddd[x1]), np.array(ddd[y2]))
print(r2_y1)
print(r2_y2)

ax = ddd.plot(kind='scatter', x=x1, y=y1, marker='o', color='green', label='1-layer zoned K')
ddd.plot(kind='scatter', x=x1, y=y2, marker='^', color='blue', label = '5-layer zoned K', ax=ax)
mini, maxi = min(yfvalues), max(yfvalues)
y1x = maxi - ((maxi-mini)/2)
ax.plot((mini, maxi), (mini, maxi), 'k--')
plt.xlabel('Fraction of young water (<65 yr) in the complex model')
plt.ylabel('Fraction of young water (<65 yr) in the simple models')
plt.suptitle('Comparison of glacial young fraction\n among models by {}'.format(category), fontsize=14)
ax.text(0.76, maxi, 'R2 = {:3.3f}'.format(r2_y1), fontsize=9, color='green')
ax.text(0.76, maxi-0.02, 'R2 = {:3.3f}'.format(r2_y2), fontsize=9, color='blue')

#dst = 'YFrac_glac_121_{}'.format(category)
#loc = os.path.dirname(path[modlist[0]])  # should go up one directory to the dir that houses all of the models.
#dst_pth = os.path.join(loc, dst)
#plt.savefig(dst_pth)
#plt.close()

In [None]:
# 1:1 plot of Young Fraction btwn 5LhK against 1L and 5LzK models for ALL HUC scales.

hucs = ['HUC6', 'HUC8', 'HUC10', 'HUC12']
nhucs = len(hucs)
fig, [[ax1, ax2], [ax3, ax4]] = plt.subplots(nrows=2, ncols=2, figsize=(10, 10))
axisdict = {0:ax1, 1:ax2, 2:ax3, 3:ax4}

for k, each in enumerate(hucs):
    # make sure we get ALL unique HUC IDs
    u1 = dfdict[modlist[0]][each].unique()
    u2 = dfdict[modlist[1]][each].unique()
    u3 = dfdict[modlist[2]][each].unique()
    u = np.append(u1, u2)
    u = np.append(u, u3)
    uniques = np.unique(u)
    # remove any HUCs listed in purge_hucs
    for h in purge_hucs:
        ind = np.where(uniques==h)
        uniques = np.delete(uniques, ind)
    axis = axisdict[k]

    yfhucdict = {}
    yfvalues = []
    skiphuc = []
    for j, cat_val in enumerate(uniques):  # each HUC ID
        yfmoddict = {}
        for i, md in enumerate(modlist):  # each of the 3 FWP models
            df = dfdict[md].loc[dfdict[md][each]==cat_val]
            if df.rt.count() >= minptl:
                youngdf = df.loc[df.rt < 65]
                yf = youngdf.rt.count() / df.rt.count()
                yfmoddict[md] = yf
                yfvalues.append(yf)
            else:
                skiphuc.append(cat_val)
                break
        if cat_val not in skiphuc:
            yfhucdict[cat_val] = yfmoddict    

    ddd = pd.DataFrame(yfhucdict).T
    r2_y1 = r2_score(np.array(ddd[x1]), np.array(ddd[y1]))
    r2_y2 = r2_score(np.array(ddd[x1]), np.array(ddd[y2]))
    
    #plotting
    mini, maxi = min(yfvalues), max(yfvalues)
    #xplot = mini+(maxi-mini)/2
    yrange = maxi-mini
    if k == 0:
        ddd.plot(kind='scatter', x=x1, y=y1, marker='o', color='green', label='1-layer zoned K', ax=axis)
        ddd.plot(kind='scatter', x=x1, y=y2, marker='^', color='blue', label = '5-layer zoned K', ax=axis)
        axis.text(mini, mini+0.88*yrange, 'R2 = {:3.3f}'.format(r2_y1), fontsize=9, color='green')
        axis.text(mini, mini+0.83*yrange, 'R2 = {:3.3f}'.format(r2_y2), fontsize=9, color='blue')
    else:
        ddd.plot(kind='scatter', x=x1, y=y1, marker='o', color='green', ax=axis)
        ddd.plot(kind='scatter', x=x1, y=y2, marker='^', color='blue', ax=axis)
        axis.text(mini, mini+yrange, 'R2 = {:3.3f}'.format(r2_y1), fontsize=9, color='green')
        axis.text(mini, mini+0.95*yrange, 'R2 = {:3.3f}'.format(r2_y2), fontsize=9, color='blue')
    
    axis.plot((mini, maxi), (mini, maxi), 'k--')
    axis.set_xlabel(''), axis.set_ylabel('')
    axis.set_title(each)
    
    # Add an attribute to the HUC shapefile
    #shapefile = HUCshpdict[each]
    #shp = gpd.read_file(shapefile)  
    #shp[each] = shp[each].astype('int64')  # convert so can merge
    ddd.index = ddd.index.astype('int64')  # convert so can merge
    ddd['YF_hK-1L'] = ddd[x1] - ddd[y1]
    ddd['YF_hK-zK'] = ddd[x1] - ddd[y2]

    #df = pd.merge(shp, ddd, how='outer', left_on=each, right_index=True)
    #dst = shapefile[:-4] + '_YF.shp'
    #df.to_file(dst)

fig.text(0.5, 0.06, 'Fraction of young water (<65 yr) in the complex model', ha='center')
fig.text(0.06, 0.48, 'Fraction of young water (<65 yr) in the simple models', va='center', rotation='vertical')
plt.subplots_adjust(top = .9)
fig.suptitle('Comparison of glacial aquifer\n young fractions by HUCs', fontsize=14)

#dst = 'YFrac_glac_121_allHUCs'
#loc = os.path.dirname(path[modlist[0]])  # go up one directory to the dir that houses all of the models.
#dst_pth = os.path.join(loc, dst)
#plt.savefig(dst_pth)
#plt.close()

In [None]:
# 1:1 plot of meanYoungAge and meanOldAge btwn 5LhK against 1L and 5LzK models for select HUC scale.


# make sure we get ALL unique HUC IDs
u1 = dfdict[modlist[0]][category].unique()
u2 = dfdict[modlist[1]][category].unique()
u3 = dfdict[modlist[2]][category].unique()
u = np.append(u1, u2)
u = np.append(u, u3)
uniques = np.unique(u)
# remove any HUCs listed in purge_hucs
for h in purge_hucs:
    ind = np.where(uniques==h)
    uniques = np.delete(uniques, ind)

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(10, 4.6))

skiphuc = []
yfhucdict = {}
yfvalues = []
ofhucdict = {}
ofvalues = []
for j, cat_val in enumerate(uniques):  # each HUC ID
    yfmoddict = {}
    ofmoddict = {}
    for i, md in enumerate(modlist):  # each of the 3 FWP models
        df = dfdict[md].loc[dfdict[md][category]==cat_val]
        if df.rt.count() >= minptl:
            youngdf = df.loc[df.rt < 65]
            olddf = df.loc[df.rt >= 65]
            yfage = youngdf.rt.median()
            ofage = olddf.rt.median()
            yfmoddict[md] = yfage
            yfvalues.append(yfage) 
            ofmoddict[md] = ofage
            ofvalues.append(ofage)
        else:
            skiphuc.append(cat_val)
            break
    if cat_val not in skiphuc:
        yfhucdict[cat_val] = yfmoddict
        ofhucdict[cat_val] = ofmoddict
        
young = pd.DataFrame(yfhucdict).T
old = pd.DataFrame(ofhucdict).T
r2_y1y = r2_score(np.array(young[x1]), np.array(young[y1]))
r2_y2y = r2_score(np.array(young[x1]), np.array(young[y2]))
r2_y1o = r2_score(np.array(old[x1]), np.array(old[y1]))
r2_y2o = r2_score(np.array(old[x1]), np.array(old[y2]))

#plotting
miny, maxy = min(yfvalues), max(yfvalues)
mino, maxo = min(ofvalues), max(ofvalues)
xplot = mini+(maxi-mini)/2
yrange_y = maxy-miny
yrange_o = maxo-mino

young.plot(kind='scatter', x=x1, y=y1, marker='o', c='green', label='1-layer zoned K', ax=ax1)
young.plot(kind='scatter', x=x1, y=y2, marker='^', c='blue', label='5-layer zoned K', ax=ax1)
old.plot(kind='scatter', x=x1, y=y1, marker='o', c='green', ax=ax2)
old.plot(kind='scatter', x=x1, y=y2, marker='^', c='blue', ax=ax2)
ax1.text(miny, miny+0.80*yrange_y, 'R2 = {:3.3f}'.format(r2_y1y), fontsize=9, color='green')
ax1.text(miny, miny+0.75*yrange_y, 'R2 = {:3.3f}'.format(r2_y2y), fontsize=9, color='blue')
ax2.text(mino, mino+yrange_o, 'R2 = {:3.3f}'.format(r2_y1o), fontsize=9, color='green')
ax2.text(mino, mino+0.95*yrange_o, 'R2 = {:3.3f}'.format(r2_y2o), fontsize=9, color='blue')
ax1.plot((miny, maxy), (miny, maxy), 'k--')
ax2.plot((mino, maxo), (mino, maxo), 'k--')
ax1.set_xlabel(''), ax1.set_ylabel(''), ax2.set_ylabel(''), ax2.set_xlabel('')
fig.text(0.25, 0.82, 'Young fraction')
fig.text(0.73, 0.82, 'Old fraction', ha='center')
fig.text(0.07, 0.45, 'Median age (yrs) for the simpler models', va='center', rotation='vertical')
fig.text(0.5, 0.01, 'Median age (yrs) for the complex model', ha='center')
plt.subplots_adjust(top = .8)
fig.suptitle('Comparison of median ages for young and\n old water in the glacial aquifer by {}'.format(category), fontsize=14)
#fig.tight_layout()

#dst = 'YnOages_121_{}'.format(category)
#loc = os.path.dirname(path[modlist[0]])  # should go up one directory to the dir that houses all of the models.
#dst_pth = os.path.join(loc, dst)
#plt.savefig(dst_pth)
#plt.close()

In [None]:
# 1:1 plot of meanYoungAge and meanOldAge btwn 5LhK against 1L and 5LzK models for ALL HUC scales.

hucs = ['HUC6', 'HUC8', 'HUC10', 'HUC12']
nhucs = len(hucs)
fig, [[ax1, ax2, ax3, ax4], [ax5, ax6, ax7, ax8]] = plt.subplots(nrows=2, ncols=4, figsize=(20, 10))
youngaxisdict = {0:ax1, 1:ax2, 2:ax5, 3:ax6}
oldaxisdict = {0:ax3, 1:ax4, 2:ax7, 3:ax8}

for k, each in enumerate(hucs):
    # make sure we get ALL unique HUC IDs
    u1 = dfdict[modlist[0]][each].unique()
    u2 = dfdict[modlist[1]][each].unique()
    u3 = dfdict[modlist[2]][each].unique()
    u = np.append(u1, u2)
    u = np.append(u, u3)
    uniques = np.unique(u)
    # remove any HUCs listed in purge_hucs
    for h in purge_hucs:
        ind = np.where(uniques==h)
        uniques = np.delete(uniques, ind)
    youngaxis = youngaxisdict[k]
    oldaxis = oldaxisdict[k]
    ofhucdict = {}
    yfhucdict = {}
    yfvalues = []
    ofvalues = []
    skiphuc = []
    for j, cat_val in enumerate(uniques):  # each HUC ID
        yfmoddict = {} 
        ofmoddict = {} 
        for i, md in enumerate(modlist):  # each of the 3 FWP models
            df = dfdict[md].loc[dfdict[md][each]==cat_val]
            if df.rt.count() >= minptl:
                youngdf = df.loc[df.rt < 65]
                olddf = df.loc[df.rt >= 65]
                yfage = youngdf.rt.median()
                ofage = olddf.rt.median()
                yfmoddict[md] = yfage
                yfvalues.append(yfage) 
                ofmoddict[md] = ofage
                ofvalues.append(ofage)
                # pull out raw time info for generalized model areas for later use...
                if (cat_val == float(genHUCdict['Oconto'])) and ('fwp5lzk' in md):  # 5lzK is the most similar to Generalized models
                    FWPzKocontoDF = df.copy()
                #elif cat_val == float(genHUCdict['TWR'])  and (md == 'fwp5lzk'):
                elif cat_val == float(genHUCdict['TWR'])  and ('fwp5lzk' in md):
                    FWPzKtwrDF = df.copy()
                elif (cat_val == float(genHUCdict['Oconto'])) and ('fwp5lhk' in md):  # 5lzK is the most similar to Generalized models
                    FWPhKocontoDF = df.copy()
                elif cat_val == float(genHUCdict['TWR'])  and ('fwp5lhk' in md):
                    FWPhKtwrDF = df.copy()
                elif (cat_val == float(genHUCdict['Oconto'])) and ('fwp1l' in md):  # 5lzK is the most similar to Generalized models
                    FWP1locontoDF = df.copy()
                elif cat_val == float(genHUCdict['TWR'])  and ('fwp1l' in md):
                    FWP1ltwrDF = df.copy()                    
            else:
                skiphuc.append(cat_val)
                break
        if cat_val not in skiphuc:
            yfhucdict[cat_val] = yfmoddict
            ofhucdict[cat_val] = ofmoddict            
            
    young = pd.DataFrame(yfhucdict).T
    old = pd.DataFrame(ofhucdict).T
    for n in modlist:
        young = young.loc[~young[n].isnull()]  # need to remove any NANs
        old = old.loc[~old[n].isnull()]  # need to remove any NANs
    r2_y1y = r2_score(np.array(young[x1]), np.array(young[y1]))
    r2_y2y = r2_score(np.array(young[x1]), np.array(young[y2]))
    r2_y1o = r2_score(np.array(old[x1]), np.array(old[y1]))
    r2_y2o = r2_score(np.array(old[x1]), np.array(old[y2]))
    
    #plotting
    miny, maxy = min(yfvalues), max(yfvalues)
    mino, maxo = min(ofvalues), max(ofvalues)
    #xplot = mini+(maxi-mini)/2
    yrange_y = maxy-miny
    yrange_o = maxo-mino
    
    if k == 0:
        young.plot(kind='scatter', x=x1, y=y1, marker='o', color='green', label='1-layer zoned K', ax=youngaxis)
        young.plot(kind='scatter', x=x1, y=y2, marker='^', color='blue', label = '5-layer zoned K', ax=youngaxis)        
        old.plot(kind='scatter', x=x1, y=y1, marker='o', color='green', ax=oldaxis)
        old.plot(kind='scatter', x=x1, y=y2, marker='^', color='blue', ax=oldaxis)  
    else:
        young.plot(kind='scatter', x=x1, y=y1, marker='o', color='green', ax=youngaxis)
        young.plot(kind='scatter', x=x1, y=y2, marker='^', color='blue', ax=youngaxis)
        old.plot(kind='scatter', x=x1, y=y1, marker='o', color='green', ax=oldaxis)
        old.plot(kind='scatter', x=x1, y=y2, marker='^', color='blue', ax=oldaxis)  
        
    youngaxis.text(miny, miny+yrange_y, 'R2 = {:3.3f}'.format(r2_y1y), fontsize=9, color='green')
    youngaxis.text(miny, miny+0.95*yrange_y, 'R2 = {:3.3f}'.format(r2_y2y), fontsize=9, color='blue')
    oldaxis.text(mino, mino+yrange_o, 'R2 = {:3.3f}'.format(r2_y1o), fontsize=9, color='green')
    oldaxis.text(mino, mino+0.95*yrange_o, 'R2 = {:3.3f}'.format(r2_y2o), fontsize=9, color='blue')
    youngaxis.plot((miny, maxy), (miny, maxy), 'k--')
    oldaxis.plot((mino, maxo), (mino, maxo), 'k--')

    youngaxis.set_xlabel(''), youngaxis.set_ylabel(''), oldaxis.set_xlabel(''), oldaxis.set_ylabel('')
    youngaxis.set_title(each)
    oldaxis.set_title(each)
    
    # Add an attribute to the HUC shapefile
    #shapefile = HUCshpdict[each]
    #shp = gpd.read_file(shapefile)  
    #shp[each] = shp[each].astype('int64')  # convert so can merge
    
    young.index = young.index.astype('int64')  # convert so can merge
    young['mYage_hK-1L'] = young[x1] - young[y1]
    young['mYage_hK-zK'] = young[x1] - young[y2]
    #df = pd.merge(shp, young, how='outer', left_on=each, right_index=True)
    #dst = shapefile[:-4] + '_mYage.shp'
    #df.to_file(dst)
    
    old.index = old.index.astype('int64')  # convert so can merge
    old['mOage_hK-1L'] = old[x1] - old[y1]
    old['mOage_hK-zK'] = old[x1] - old[y2]
    #dfo = pd.merge(shp, old, how='outer', left_on=each, right_index=True)
    #dsto = shapefile[:-4] + '_mOage.shp'
    #dfo.to_file(dsto)

fig.text(0.28, 0.92, 'Young fraction', fontsize=12)
fig.text(0.71, 0.92, 'Old fraction', ha='center', fontsize=12)
fig.text(0.5, 0.055, 'Median age (yrs) for the complex model', ha='center', fontsize=12)
fig.text(0.095, 0.48, 'Median age (yrs) for the simpler models', va='center', fontsize=12, rotation='vertical')
plt.subplots_adjust(top = .89)
fig.suptitle('Comparison of median ages for young and old water in the glacial aquifer by HUCs', fontsize=16)

# add background color
youngrect = patches.Rectangle((200,70), 760, 830, zorder=-1, alpha=0.5, facecolor='b')
oldrect = patches.Rectangle((960,70), 780, 830, zorder=-1, alpha=0.5, facecolor='r')
fig.patches.append(youngrect)
fig.patches.append(oldrect)

#dst = 'YnOages_121_allHUCs'
#loc = os.path.dirname(path[modlist[0]])  # go up one directory to the dir that houses all of the models.
#dst_pth = os.path.join(loc, dst)
#plt.savefig(dst_pth)
#plt.close()

In [None]:
# 1:1 plot of meanYoungAge btwn 5LhK and 5LzK models for all HUC scales; color coded by differences in parameter
# values among the 5LhK and 5LzK models.  What's causing the age differences?

metric = 'mean'  # valid:  'mean', 'min', 'max', 'range', 'std'

hucs = ['HUC6', 'HUC8', 'HUC10', 'HUC12']
nhucs = len(hucs)
#fig, [[ax1, ax2], [ax3, ax4]] = plt.subplots(nrows=2, ncols=2, figsize=(9, 9))
#axisdict = {0:ax1, 1:ax2, 2:ax3, 3:ax4}

# loop this thing
for prop in HUCproprastlist:
    fig, [[ax1, ax2], [ax3, ax4]] = plt.subplots(nrows=2, ncols=2, figsize=(9, 9))
    axisdict = {0:ax1, 1:ax2, 2:ax3, 3:ax4}
    
    for k, each in enumerate(hucs):
        # make sure we get ALL unique HUC IDs
        u1 = dfdict[modlist[0]][each].unique()
        u2 = dfdict[modlist[1]][each].unique()
        u3 = dfdict[modlist[2]][each].unique()
        u = np.append(u1, u2)
        u = np.append(u, u3)
        uniques = np.unique(u)
        # remove any HUCs listed in purge_hucs
        for h in purge_hucs:
            ind = np.where(uniques==h)
            uniques = np.delete(uniques, ind)
        axis = axisdict[k]

        yfhucdict = {}
        yfvalues = []
        skiphuc = []
        for j, cat_val in enumerate(uniques.astype('int64')):  # each HUC ID
            yfmoddict = {}
            for i, md in enumerate(modlist):  # each of the 3 FWP models
                df = dfdict[md].loc[dfdict[md][each]==cat_val]
                if df.rt.count() >= minptl:
                    youngdf = df.loc[df.rt < 65]
                    yfage = youngdf.rt.median()
                    yfmoddict[md] = yfage
                    yfvalues.append(yfage)
                else:
                    skiphuc.append(cat_val)
                    break
            if cat_val not in skiphuc:
                yfhucdict[cat_val] = yfmoddict    

        ddd = pd.DataFrame(yfhucdict).T
        for n in modlist:
            ddd = ddd.loc[~ddd[n].isnull()]  # need to remove any NANs

        ddd[metric] = 0
        r2_y2 = r2_score(np.array(ddd[x1]), np.array(ddd[y2]))  #x1 = 5LheteroK; y2 = 5LzonedK

        print('Extracting zonal mean values from {} for {}'.format(prop, each))

        # run zonal_stats on the merged df. Use the 'HUC' Tiffs to match up with model properties
        raster_file = HUCtiffdict[each]
        with rasterio.open(raster_file) as raster:
            hucrast = raster.read()[0]
        #with rasterio.open(HUCproprast) as raster:
        with rasterio.open(prop) as raster:
            proprast = raster.read()[0]

        # Now pull out summary statistics for overlapping areas
        u = np.unique(hucrast)
        # Use a cross-reference approach for HUC10 and 12 cuz large INTs mess everything up!
        if max(u) < 10000:  # Less than any HUC6 ID number
            with open(raster_file.split('.')[0] + '_crossref.txt', mode='r') as infile:
                reader = csv.reader(infile)
                next(reader, None)  # skip the headers
                crossdict = {int(rows[0]):np.int64(rows[1]) for rows in reader}
        for ID in u[u>0]:  # HUC IDs in active part of model (not ID zero)
            idarr = np.zeros_like(hucrast)
            idarr[hucrast==ID] = 1
            # multiply by proprast, compute ave, min, max, range, std (median)
            hucprop = idarr * proprast
            hucprop[hucprop==0] = np.nan
            pmn, pmin, pmax, pstd = np.nanmean(hucprop), np.nanmin(hucprop), np.nanmax(hucprop), np.nanstd(hucprop)
            prange = pmax - pmin
            metricdict = {'mean':pmn, 'min':pmin, 'max':pmax, 'range':prange, 'std':pstd}
            if max(u) < 10000:
                ddd[metric].loc[ddd.index==crossdict[ID]] = metricdict[metric].astype('float64')  # assigned the appropriate zonal data            
            else:
                ddd[metric].loc[ddd.index==ID] = metricdict[metric].astype('float64')  # assigned the appropriate zonal data

        #plotting
        mini, maxi = min(yfvalues), max(yfvalues)
        xplot = mini+(maxi-mini)/2
        yrange = maxi-mini
        #if k == 0:
        #    ddd.plot(kind='scatter', x=x1, y=y2, marker='^', c=ddd[metric], cmap='viridis', label = '5-layer zoned K', ax=axis)
        #    axis.text(mini, mini+0.83*yrange, 'R2 = {:3.3f}'.format(r2_y2), fontsize=9)
        #else:

        ddd.plot(kind='scatter', x=x1, y=y2, marker='^', c=ddd[metric], cmap='viridis', ax=axis)  # How center on Zero?
        axis.text(mini, mini+0.95*yrange, 'R2 = {:3.3f}'.format(r2_y2), fontsize=9)
        pos1 = axis.get_position()
        axis.plot((mini, maxi), (mini, maxi), 'k--')
        axis.set_xlabel(''), axis.set_ylabel('')
        axis.set_title(each)

    fig.text(0.48, 0.89, 'Young fraction', fontsize=12)
    fig.text(0.5, 0.105, 'Median age (yrs) for the 5-layer variable K model', ha='center', fontsize=12)
    fig.text(0.07, 0.48, 'Median age (yrs) for the 5-layer zoned K model', va='center', fontsize=12, rotation='vertical')
    plt.subplots_adjust(top = .86, bottom = 0.15)
    #mod_factor = os.path.basename(HUCproprast).split('.')[0]
    mod_factor = os.path.basename(prop).split('.')[0]
    fig.suptitle('Comparison of median Glacial Young ages by HUC scale,\n as informed by model '
                 'property:  {} of {}'.format(metric, mod_factor), fontsize=16)

#    dst = '5L_mYageCompare_by_{}_of_{}.png'.format(metric, mod_factor)
#    loc = os.path.dirname(path[modlist[0]])  # go up one directory to the dir that houses all of the models.
#    dst_pth = os.path.join(loc, dst)
    #plt.savefig(dst_pth)
#    f = plt.gcf()
#    f.savefig(dst_pth)
    #[f.get_axes()[4].remove() for x in range(4,8)] # remove the colorbars
#f.close()
#fig.close()

# Preliminary stuff

## Set user-defined variables

MODFLOW and MODPATH use elapsed time and are not aware of calendar time. To place MODFLOW/MODPATH elapsed time on the calendar, two calendar dates were specified at the top of the notebook: the beginning of the first stress period (`mf_start_date`) and when particles are to be released (`mp_release_date`). The latter date could be used in many ways, for example to represent a sampling date, or it could be looped over to create a time-lapse set of ages. 

### Required: Populate the simulate_list and categories lists.  Also be sure to specify which category to evaluate. Note that categories that are small (lots of individual areas within the model domain) will make plotting VERY slow.

## Loop through home directory to get list of name files

##  Create names and path for model workspace. 

The procedures in this notebook can be run from the notebook or from a batch file by downloading the notebook as a Python script and uncommenting the following code and commenting out the following block. The remainder of the script has to be indented to be included in the loop.  This may require familiarity with Python. 

# Load an existing model

## Specification of time in MODFLOW/MODPATH

There are several time-related concepts used in MODPATH.
* `simulation time` is the elapsed time in model time units from the beginning of the first stress period
* `reference time` is an arbitrary value of `simulation time` that is between the beginning and ending of `simulation time`
* `tracking time` is the elapsed time relative to `reference time`. It is always positive regardless of whether particles are tracked forward or backward
* `release time` is when a particle is released and is specified in `tracking time`

In [None]:
# setup dictionaries of the MODFLOW units for proper labeling of figures.
lenunit = {0:'undefined units', 1:'feet', 2:'meters', 3:'centimeters'}
timeunit = {0:'undefined', 1:'second', 2:'minute', 3:'hour', 4:'day', 5:'year'}

# Create dictionary of multipliers for converting model time units to days
time_dict = dict()
time_dict[0] = 1.0 # undefined assumes days, so enter conversion to days
time_dict[1] = 24 * 60 * 60
time_dict[2] = 24 * 60
time_dict[3] = 24
time_dict[4] = 1.0
time_dict[5] = 1.0

In [None]:
# convert string representation of dates into Python datetime objects
mf_start_date = dt.datetime.strptime(mf_start_date_str , '%m/%d/%Y')
mp_release_date = dt.datetime.strptime(mp_release_date_str , '%m/%d/%Y')

# convert simulation time to days from the units specified in the MODFLOW DIS file
sim_time = np.append(0, dis.get_totim())
sim_time /= time_dict[dis.itmuni]

# make a list of simulation time formatted as calendar dates
date_list = [mf_start_date + dt.timedelta(days = item) for item in sim_time]

# reference time and date are set to the end of the last stress period
ref_time = sim_time[-1]
ref_date = date_list[-1]

# release time is calculated in tracking time (for particle release) and 
# in simulation time (for identifying head and budget components)
release_time_trk = np.abs((ref_date - mp_release_date).days)
release_time_sim = (mp_release_date - mf_start_date).days