In [1]:
import os
from pathlib import Path
import subprocess
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.lines import Line2D
import pyemu
import flopy
import warnings
warnings.filterwarnings(action='ignore')
import pandas as pd
import geopandas as gpd
import numpy as np
from IPython.display import display
import shutil
import math
from sklearn.metrics import mean_squared_error

In [2]:
cwd = os.getcwd()
base_model_dir = Path('../../Base_Model_for_Scenarios').resolve()
scenario_notebook_dir = Path('../').resolve()
obs_locs_dir =  Path('../../Locations/Obs_locs').resolve()
print(cwd)
print(base_model_dir)
print(scenario_notebook_dir)
print(obs_locs_dir)

C:\Users\farnut1\Desktop\How-Many-Realizations-main\Scenario_Notebook\Charts_Figures
C:\Users\farnut1\Desktop\How-Many-Realizations-main\Base_Model_for_Scenarios
C:\Users\farnut1\Desktop\How-Many-Realizations-main\Scenario_Notebook
C:\Users\farnut1\Desktop\How-Many-Realizations-main\Locations\Obs_locs


In [3]:
# Get grid geodataframe from base_model
# Get model_root_name from listing file
for file in os.listdir(base_model_dir):
    if file.endswith(".dis"):
        disfile = file
gwfmodel_root_name = os.path.splitext(disfile)[0]
print(gwfmodel_root_name)

# Load model
sim = flopy.mf6.MFSimulation.load(f'{gwfmodel_root_name}', sim_ws=base_model_dir, exe_name='mf6.exe')
model = sim.get_model(f'{gwfmodel_root_name}')

# Get geodataframe and nodes of model for use later
modelprops = model.modelgrid.geo_dataframe.reset_index()
modelprops = modelprops.rename(columns={'index':'node'})
# Make 1 based
modelprops['node'] +=1

fp_mf6_model
loading simulation...
  loading simulation name file...
  loading tdis package...
  loading model gwf6...
    loading package dis...
    loading package ic...
    loading package riv...
    loading package ghb...
    loading package wel...
    loading package rch...
    loading package npf...
    loading package oc...
  loading solution package fp_mf6_model...


In [4]:
# Get list of all folders in scenario_notebook folder
folders = [f.name for f in os.scandir(scenario_notebook_dir) if f.is_dir()]

# Filter to just our possible scenarios
scenario_folders = []
for file in folders:
    if file.startswith('R25'):
        scenario_folders.append(file)
for file in folders:
    if file.startswith('C25'):
        scenario_folders.append(file)
for file in folders:
    if file.startswith('R100'):
        scenario_folders.append(file)
for file in folders:
    if file.startswith('C100'):
        scenario_folders.append(file)
print(scenario_folders)

['R25_pp10_real10', 'R25_pp10_real100', 'R25_pp10_real1000', 'R25_pp10_real2000', 'R25_pp10_real25', 'R25_pp10_real250', 'R25_pp10_real50', 'R25_pp10_real500', 'R25_pp25_real10', 'R25_pp25_real100', 'R25_pp25_real1000', 'R25_pp25_real2000', 'R25_pp25_real25', 'R25_pp25_real250', 'R25_pp25_real50', 'R25_pp25_real500', 'R25_pp50_real10', 'R25_pp50_real100', 'R25_pp50_real1000', 'R25_pp50_real2000', 'R25_pp50_real25', 'R25_pp50_real250', 'R25_pp50_real50', 'R25_pp50_real500', 'C25_pp10_real10', 'C25_pp10_real100', 'C25_pp10_real1000', 'C25_pp10_real2000', 'C25_pp10_real25', 'C25_pp10_real250', 'C25_pp10_real50', 'C25_pp10_real500', 'C25_pp25_real10', 'C25_pp25_real100', 'C25_pp25_real1000', 'C25_pp25_real2000', 'C25_pp25_real25', 'C25_pp25_real250', 'C25_pp25_real50', 'C25_pp25_real500', 'C25_pp50_real10', 'C25_pp50_real100', 'C25_pp50_real1000', 'C25_pp50_real2000', 'C25_pp50_real25', 'C25_pp50_real250', 'C25_pp50_real50', 'C25_pp50_real500', 'R100_pp10_real10', 'R100_pp10_real100', 'R10

In [5]:
# Get truth observed locations and heads
obs_locsR25 =  pd.read_csv(obs_locs_dir / 'R25.csv')
obs_locsR100 = pd.read_csv(obs_locs_dir / 'R100.csv')
obs_locsC25 = pd.read_csv(obs_locs_dir / 'C25.csv')
obs_locsC100 = pd.read_csv(obs_locs_dir / 'C100.csv')

In [6]:
# Make empty lists to be filled with 1to1 stats below
scenario_list = []
reals_list = []
iteration_list = []
group_list = []
numofobs_list = []
obs_data_range_list = []
avg_list = []
min_res_list = []
max_res_list = []
rms_list = []
nrms_list = []

for scenario in scenario_folders:
    if scenario.startswith('R25_pp10'):
        Group=1
        obs_locs = obs_locsR25
        one2onelabel = 'Obs: Random 25    PP:800'
    if scenario.startswith('R100_pp10'):
        Group=2
        obs_locs = obs_locsR100
        one2onelabel = 'Obs: Random 100    PP:800'
    if scenario.startswith('C25_pp10'):
        Group=3
        obs_locs = obs_locsC25
        one2onelabel = 'Obs: Clustered 25    PP:800'
    if scenario.startswith('C100_pp10'):
        Group=4
        obs_locs = obs_locsC100
        one2onelabel = 'Obs: Clustered 100    PP:800'
    if scenario.startswith('R25_pp25'):
        Group=5
        obs_locs = obs_locsR25
        one2onelabel = 'Obs: Random 25    PP:128'
    if scenario.startswith('R100_pp25'):
        Group=6
        obs_locs = obs_locsR100
        one2onelabel = 'Obs: Random 100    PP:128'
    if scenario.startswith('C25_pp25'):
        Group=7
        obs_locs = obs_locsC25
        one2onelabel = 'Obs: Clustered 25    PP:128'
    if scenario.startswith('C100_pp25'):
        Group=8
        obs_locs = obs_locsC100
        one2onelabel = 'Obs: Clustered 100    PP:128'
    if scenario.startswith('R25_pp50'):
        Group=9
        obs_locs = obs_locsR25
        one2onelabel = 'Obs: Random 25    PP:32'
    if scenario.startswith('R100_pp50'):
        Group=10
        obs_locs = obs_locsR100
        one2onelabel = 'Obs: Random 100    PP:32'
    if scenario.startswith('C25_pp50'):
        Group=11
        obs_locs = obs_locsC25
        one2onelabel = 'Obs: Clustered 25    PP:32'
    if scenario.startswith('C100_pp50'):
        Group=12
        obs_locs = obs_locsC100
        one2onelabel = 'Obs: Clustered 100    PP:32'

    if scenario.endswith('real10'):
        reals=10
    if scenario.endswith('real25'):
        reals=25
    if scenario.endswith('real50'):
        reals=50
    if scenario.endswith('real100'):
        reals=100
    if scenario.endswith('real250'):
        reals=250
    if scenario.endswith('real500'):
        reals=500
    if scenario.endswith('real1000'):
        reals=1000
    if scenario.endswith('real2000'):
        reals=2000
        

    # copy over completed IES run into temp folder
    subprocess.call(['robocopy', str(scenario_notebook_dir) +'\\'+ str(scenario), str(cwd) + '\\temp', '/E'])

    temp_dir = os.path.join(cwd,'temp')

    # Make folder that final shps will be created in
    os.makedirs(os.path.join(cwd,f'Group_{Group}'), exist_ok=True)

    # Get model_root_name from listing file
    for file in os.listdir(temp_dir):
        if file.endswith(".dis"):
            disfile = file
    gwfmodel_root_name = os.path.splitext(disfile)[0]
    print(gwfmodel_root_name)

    # Load model
    sim = flopy.mf6.MFSimulation.load(f'{gwfmodel_root_name}', sim_ws=temp_dir, exe_name='mf6.exe')
    sim.set_sim_path(temp_dir)
    model = sim.get_model(f'{gwfmodel_root_name}')

    # Locate parcsv files in directory
    parcsvfile = []
    for file in os.listdir(temp_dir):
        if file.endswith('par.csv'):
            parcsvfile.append(file)

    # Locate iterations 1, 2, 3 amongst those csv files
    csv123file = []
    for item in parcsvfile:
        if f'{gwfmodel_root_name}.0' in item:
            csv123file.append(item)
    for item in parcsvfile:
        if f'{gwfmodel_root_name}.1' in item:
            csv123file.append(item)
    for item in parcsvfile:
        if f'{gwfmodel_root_name}.2' in item:
            csv123file.append(item)
    for item in parcsvfile:
        if f'{gwfmodel_root_name}.3' in item:
            csv123file.append(item)
    for item in parcsvfile:
        if f'{gwfmodel_root_name}.4' in item:
            csv123file.append(item)
    
    # check to see what all iterations were completed and make list to loop through
    for parcsv in csv123file:
        for item in parcsv:
            if '0' in item:
                iteration = '0'
        for item in parcsv:
            if '1' in item:
                iteration = '1'
        for item in parcsv:
            if '2' in item:
                iteration = '2'
        for item in parcsv:
            if '3' in item:
                iteration = '3'
        for item in parcsv:
            if '4' in item:
                iteration = '4'

        # Locate pst file in directory
        pstfile = []
        for file in os.listdir(temp_dir):
            if file.endswith('.pst'):
                pstfile.append(file)
        print(pstfile)
        
        # Load pst file into pyemu
        pst = pyemu.Pst(os.path.join(temp_dir,pstfile[0]))
        
        # Load parfile csv
        parfilecsv = pd.read_csv(os.path.join(cwd,temp_dir,parcsv))
        parfilecsv = parfilecsv.rename(columns={parfilecsv.columns[0]: 'Realization'})
        realsparfile = parfilecsv['Realization'].to_list() 
        
        # Run parrep on pst files
        pst.parrep(parfile=os.path.join(temp_dir,parcsv), real_name='base', noptmax=0)
        pst.write(os.path.join(temp_dir,f'realizations_base.pst'), version=2)
        
        # Run model once
        pyemu.os_utils.run("pestpp-glm realizations_base.pst", cwd=temp_dir)
        
        # Reload model to get exports from
        sim = flopy.mf6.MFSimulation.load(f'{gwfmodel_root_name}', sim_ws=temp_dir, exe_name='mf6.exe')
        sim.set_sim_path(temp_dir)
        model = sim.get_model(f'{gwfmodel_root_name}')

        # Get lay, row, col in a list for every cell
        lrc_list = []
        for i in range (int(model.modelgrid.nnodes/model.modelgrid.nlay)):
            lrc_list+=model.modelgrid.get_lrc(i)
        
        # convert to a dataframe with headings of lay, row, col
        modelprops_df = pd.DataFrame(lrc_list, columns =['lay', 'row','column'])
        
        # drop lay column
        modelprops_df = modelprops_df.drop('lay', axis=1)
        
        # Make node column
        modelprops_df = modelprops_df.reset_index().rename(columns={"index":"node"})
        
        # Add 1 to each to get to 1 based
        modelprops_df+=1
        
        # Add top to dataframe
        dataset = list(model.modelgrid.top.flat)
        modelprops_df['top']=dataset
        
        # Add botm's to dataframe
        dataset = list(model.modelgrid.botm[0].flat)
        modelprops_df['botm_1']=dataset
        
        # Add cell thickness's to dataframe
        dataset = list(model.modelgrid.cell_thickness[0].flat)
        modelprops_df['thickL1']=dataset
        
        # Add idomain's to dataframe
        dataset = list(model.dis.idomain.data[0].flat)
        modelprops_df['idomain_1']=dataset  
        
        # Add recharge to dataframe
        dataset = list(model.rcha_0.recharge.data.flat)
        modelprops_df['rch']=dataset
        
        # Add k's to dataframe
        dataset = list(model.npf.k.data[0].flat)
        modelprops_df['k_1']=dataset
        
        # Add riv to dataframe
        riv_df = pd.DataFrame(np.hstack(model.riv_0.stress_period_data.data[0]).tolist(), columns=['cellid','riv','rivcond','rivrbot','boundname'])
        rivnodes = model.modelgrid.get_node(riv_df.cellid.tolist())
        rivnodes = [i + 1 for i in rivnodes]
        riv_df['node']=rivnodes
        riv_df = riv_df.drop('cellid', axis=1)
        riv_df = riv_df.drop('boundname', axis=1)
        
        modelprops_df = modelprops_df.merge(riv_df, on='node', how='left')
        
        # Add ghb to dataframe
        ghb_df = pd.DataFrame(np.hstack(model.ghb_0.stress_period_data.data[0]).tolist(), columns=['cellid','ghb','ghbcond'])
        ghbnodes = model.modelgrid.get_node(ghb_df.cellid.tolist())
        ghbnodes = [i + 1 for i in ghbnodes]
        ghb_df['node'] = ghbnodes
        ghb_df = ghb_df.drop('cellid', axis=1)
        
        modelprops_df = modelprops_df.merge(ghb_df, on='node', how='left')
        
        # Add flux to dataframe all in layer 1
        wel_df = pd.DataFrame(np.hstack(model.wel_0.stress_period_data.data[0]).tolist(), columns=['cellid','flux'])
        # split tuple
        wel_df[['lay','row','col']] = wel_df['cellid'].tolist()
        # Make all layer 0
        wel_df['lay'] = 0
        # Re-tuple lay row col
        wel_df['cellid'] = list(zip(wel_df['lay'], wel_df['row'],wel_df['col']))
        wel_df= wel_df[['cellid','flux']]
        
        welnodes = model.modelgrid.get_node(wel_df.cellid.tolist())
        welnodes = [i + 1 for i in welnodes]
        wel_df['node']= welnodes
        wel_df = wel_df.drop('cellid',axis=1)
        
        modelprops_df = modelprops_df.merge(wel_df, on='node', how='left')
        
        # Merge with modelprops to get geometry column
        modelprops_df = modelprops_df.merge(modelprops, on='node')
        # Convert to geodataframe
        modelprops_df = gpd.GeoDataFrame(modelprops_df, crs='EPSG:26915', geometry='geometry')
        
        modelprops_df.to_file(os.path.join(cwd,f'Group_{Group}',f'{scenario}_{iteration}.shp'))

        # 1 to 1 plot
        # Load head_obs.csv
        sim_heads = pd.read_csv(os.path.join(temp_dir,'head_obs.csv'))
        sim_heads = sim_heads.drop(columns='time')
        sim_heads = sim_heads.T.reset_index()
        sim_heads = sim_heads.rename(columns={sim_heads.columns[0]: 'site_name', sim_heads.columns[1]: 'IES_SIM'})
        sim_heads['site_name'] = sim_heads['site_name'].str.lower()
        
        # Join with obs_locs
        sim_heads = pd.merge(sim_heads, obs_locs, on='site_name', how='left')
        sim_heads = gpd.GeoDataFrame(sim_heads, geometry=gpd.points_from_xy(sim_heads['x'], sim_heads['y']), crs="EPSG:26915")
        
        # Add residual column
        sim_heads['RESIDUAL'] = sim_heads['head'] - sim_heads['IES_SIM']

        # 1:1 plot
        x = sim_heads['head']
        y = sim_heads['IES_SIM']
        res = y - x # > 0 means overestimation
        
        cm = plt.cm.get_cmap('RdBu')
        # Matplotlib's color maps: https://matplotlib.org/stable/tutorials/colors/colormaps.html
        
        def format_plot1(ax,axislim,legend=False):
            ax.set_xlim(axislim)
            ax.set_ylim(axislim)
            ax.grid(which='minor', alpha = 0.6,linewidth=0.5,linestyle='dashed', color='grey')
            ax.grid(which='major', alpha = 0.5,linewidth=0.5,linestyle='dashed',color='black')
            ax.set_xlabel('Measured [m.asl]', fontsize=16)
            ax.set_ylabel('Simulated [m.asl]', fontsize=16)
            ax.tick_params(which='major', length=7,width=2, color='black', labelsize=18)
            ax.tick_params(which='minor', length=4,width=1, color='grey')
            ax.tick_params(axis="x", labelsize=16)
            ax.tick_params(axis="y", labelsize=16)
            ax.set_facecolor("white")
            if legend==True:
                ax.legend(loc="best", ncol=1, fontsize=14)
            return ax
        
        fig, ax = plt.subplots(figsize=(9,9),facecolor='snow')
        fig.suptitle(f"1 to 1 Head Calibration \n {one2onelabel} \n Realization: {reals}  Iteration: {iteration}",fontweight='bold', fontsize='24')
        sc = ax.scatter(x,y,c=res,vmin=-1,vmax=1,edgecolor='k',cmap=cm,s=60)
        
        # Format Customization
        axislim=[math.floor(min(sim_heads[['head','IES_SIM']].min())/10)*10,math.ceil(max(sim_heads[['head','IES_SIM']].max())/10)*10]     
        format_plot1(ax,axislim)
        
        # 1to1 line
        ax.plot(axislim,axislim,linewidth=1, c='black')
        
        # Color Bar for Residuals
        #cbar = plt.colorbar(sc, ticks=np.arange(-cbar_ticks,cbar_ticks,1))
        cbar = plt.colorbar(sc, ticks=[-1,-0.5,0,0.5,1])
        cbar.set_label('Resiudal [m]', rotation=270, fontsize=14, labelpad=15)
        cbar.ax.tick_params(labelsize=12) 
        
        # Calibration Statistics
        abs_res = abs(res)
        res_list = res.to_list()
        
        obs_data_range = x.max()-x.min() 
        
        avg = round(np.mean(res_list),2)# Mean Error
        min_res = round(min(res_list),2) #Min Residual
        max_res = round(max(res_list),2) #Max Residual
        rms = round(np.sqrt(mean_squared_error(x, y)),2) # RMS
        nrms = round(100*rms/obs_data_range,2) # NRMS
        numofobs = len(res)
        
        textstr = '\n'.join((
        r'Model Stats:',
        r' ',
        r'# of Obs: '+str(numofobs),    
        r'Range [m]: '+str(round(obs_data_range,2)),
        r'Res Mean [m]: '+str(avg),
        r'Min Res [m]: '+str(min_res),
        r'Max Res [m]: '+str(max_res),
        r'RMS [m]: '+str(rms),
        r'RMS [%]: '+str(nrms),
        ))
        props = dict(boxstyle='round', facecolor='white', alpha=0.95)
        ax.text(0.7, 0.3, textstr, transform=ax.transAxes, fontsize=12,
                verticalalignment='top', bbox=props)
        
        plt.tight_layout()
        
        plt.savefig(os.path.join(cwd,f'Group_{Group}',f'Iter_{iteration}_1to1_{scenario}.png'), dpi=200, bbox_inches='tight')
        plt.close()

        # Fill lists with stats
        scenario_list.append(f'{scenario}')
        iteration_list.append(f'{iteration}')
        reals_list.append(f'{reals}')
        group_list.append(f'{Group}')
        numofobs_list.append(numofobs)
        obs_data_range_list.append(obs_data_range)
        avg_list.append(avg)
        min_res_list.append(min_res)
        max_res_list.append(max_res)
        rms_list.append(rms)
        nrms_list.append(nrms)
        
        # Load hds file
        fname = (os.path.join(temp_dir,f"{model.name}.hds"))
        hdobj = flopy.utils.HeadFile(fname)
        head = hdobj.get_data()
        maskedhead = np.ma.masked_greater_equal(head, 1e9)
        maskedhead = np.ma.masked_less_equal(maskedhead, -1e9)
        levels = np.arange(10,50,1)
        
        # Create residual/potsurface map
        fig = plt.figure(figsize=(5,7))
        ax = plt.subplot2grid((10,9), (0,0), rowspan=9, colspan=7) # (subplot grid, num rows, num cols), (start row, start col)
        ax1 = plt.subplot2grid((10,9), (9,0), rowspan=1, colspan=7)
        
        ax.set_title(f'{one2onelabel} \n Realizations: {reals}  Iteration: {iteration}')
        ax.set_xticks([])
        ax.set_yticks([])
        mapview = flopy.plot.PlotMapView(model, ax=ax)
        quadmesh = mapview.plot_ibound()
        mapview.plot_bc("WEL")
        contour_set = mapview.contour_array(head[0], levels=levels, colors='gray', linestyles='dotted', linewidths=1)
        rivs = mapview.plot_bc("RIV", plotAll=True, color="green")
        ghbs = mapview.plot_bc("GHB", plotAll=True, color="purple")
        obs1 = ax.scatter(sim_heads.geometry.x, sim_heads.geometry.y, s=sim_heads['RESIDUAL'].abs() * 100, cmap='RdBu', c=sim_heads['RESIDUAL'], edgecolor='k', linewidth=0.5, vmin=-1,vmax=1)
        
        # Legend
        res_red_dot = Line2D([],[], color='red', marker='o', markeredgecolor='k', linestyle='none', markersize=10, label='Simulated Over-Prediction')
        res_blue_dot = Line2D([],[], color='blue', marker='o', markeredgecolor='k', linestyle='none', markersize=10, label='Simulated Under-Prediction')
        sim_contours_line =  Line2D([0],[0], color='gray', marker='none', linestyle='dotted', linewidth=5, label='Simulated Potentiometric Surface')
        LegendElement = [res_red_dot, res_blue_dot, sim_contours_line]
        
        # ax1
        ax1.axis('off')
        ax1.legend(handles=LegendElement, loc='upper center', frameon=False, ncol=1, prop= {'size':10, 'family':'calibri'}) 
        
        fig.savefig(os.path.join(cwd,f'Group_{Group}',f'Iter_{iteration}_mapview_{scenario}.png'), dpi=200, bbox_inches='tight')
        plt.close()

        # export potentiometric surface to shp
        flopy.export.utils.export_array_contours(model.modelgrid, os.path.join(cwd,f'Group_{Group}',f'Iter_{iteration}_contours_{scenario}.shp'),
                                                 head, levels=levels, epsg=26915)

        # close the head file
        hdobj.close()
            
    # Delete temp folder
    if os.path.exists(os.path.join(cwd,'temp')):
        shutil.rmtree(os.path.join(cwd,'temp'))
        print('removed temp')

one2one_stats=list(zip(scenario_list, iteration_list, reals_list, group_list, numofobs_list, obs_data_range_list, avg_list, min_res_list, max_res_list, rms_list, nrms_list))
one2one_stats_df = pd.DataFrame(one2one_stats, columns=['Scenario','Iteration','Realizations','Group','# of Obs','Range(m)','Res Mean(m)','Min Res(m)','Max Res(m)','RMS(m)','RMS(%)'])
one2one_stats_df.to_csv('1to1_stats.csv')

fp_mf6_model
loading simulation...
  loading simulation name file...
  loading tdis package...
  loading model gwf6...
    loading package dis...
    loading package riv...
    loading package ic...
    loading package wel...
    loading package rch...
    loading package npf...
    loading package oc...
    loading package obs...
    loading package ghb...
  loading solution package fp_mf6_model...
['fp_mf6_model.pst']
updating parval1 using realization:'base' from ensemble file C:\Users\farnut1\Desktop\How-Many-Realizations-main\Scenario_Notebook\Charts_Figures\temp\fp_mf6_model.0.par.csv
parrep: updating noptmax to 0
noptmax:0, npar_adj:809, nnz_obs:26
pestpp-glm.exe realizations_base.pst
loading simulation...
  loading simulation name file...
  loading tdis package...
  loading model gwf6...
    loading package dis...
    loading package riv...
    loading package ic...
    loading package wel...
    loading package rch...
    loading package npf...
    loading package oc...
    lo