In [1]:
import os
from pathlib import Path
from shapely.geometry import Polygon
import matplotlib
import matplotlib.pyplot as plt
import flopy
import warnings
warnings.filterwarnings(action='ignore')
import pandas as pd
import geopandas as gpd
import numpy as np
from IPython.display import display
import shutil
import math
from sklearn.metrics import mean_squared_error
from matplotlib.lines import Line2D
from matplotlib.font_manager import FontProperties

In [2]:
cwd = os.getcwd()
truth_model_dir = Path('../../Refined_Truth_Model/Modflow_Input_Files').resolve()
base_model_dir = Path('../../Base_Model_for_Scenarios').resolve()
print(cwd)
print(truth_model_dir)
print(base_model_dir)

C:\Users\farnut1\Desktop\How-Many-Realizations-main\Scenario_Notebook\Charts_Figures
C:\Users\farnut1\Desktop\How-Many-Realizations-main\Refined_Truth_Model\Modflow_Input_Files
C:\Users\farnut1\Desktop\How-Many-Realizations-main\Base_Model_for_Scenarios


In [3]:
# Get grid geodataframe from truth model
# Load model
truthsim = flopy.mf6.MFSimulation.load('fp_mf6_model', sim_ws=truth_model_dir, exe_name='mf6.exe')
truthmodel = truthsim.get_model('fp_mf6_model')

loading simulation...
  loading simulation name file...
  loading tdis package...
  loading model gwf6...
    loading package dis...
    loading package ic...
    loading package riv...
    loading package ghb...
    loading package wel...
    loading package rch...
    loading package npf...
    loading package oc...
  loading model prt6...
    loading package dis...
    loading package mip...
    loading package prp...
    loading package oc...
  loading exchange package gwf-prt_exg_0...
  loading solution package fp_mf6_model...
  loading solution package mf6_prt...


In [4]:
# Get geodataframe and nodes of truth model for use later
modelprops = truthmodel.modelgrid.geo_dataframe.reset_index()
modelprops = modelprops.rename(columns={'index':'node'})
# Make 1 based
modelprops['node'] +=1

# Get lay, row, col in a list for every cell
lrc_list = []
for i in range (int(truthmodel.modelgrid.nnodes/truthmodel.modelgrid.nlay)):
    lrc_list+=truthmodel.modelgrid.get_lrc(i)

# convert to a dataframe with headings of lay, row, col
modelprops_df = pd.DataFrame(lrc_list, columns =['lay', 'row','column'])

# drop lay column
modelprops_df = modelprops_df.drop('lay', axis=1)

# Make node column
modelprops_df = modelprops_df.reset_index().rename(columns={"index":"node"})

# Add 1 to each to get to 1 based
modelprops_df+=1
        
# Add idomain's to dataframe
dataset = list(truthmodel.dis.idomain.data[0].flat)
modelprops_df['idomain_1']=dataset  

# Add recharge to dataframe
dataset = list(truthmodel.rcha_0.recharge.data.flat)
modelprops_df['rch']=dataset

# Add k's to dataframe
dataset = list(truthmodel.npf.k.data[0].flat)
modelprops_df['k_1']=dataset
        
# Merge with modelprops to get geometry column
modelprops_df = modelprops_df.merge(modelprops, on='node')
# Convert to geodataframe
truth_modelprops_df = gpd.GeoDataFrame(modelprops_df, crs='EPSG:26915', geometry='geometry')

# drop columns
truth_modelprops_df = truth_modelprops_df[['rch','k_1','geometry']].copy()

In [5]:
# Get grid geodataframe from base model
# Load model
basesim = flopy.mf6.MFSimulation.load('fp_mf6_model', sim_ws=base_model_dir, exe_name='mf6.exe')
basemodel = basesim.get_model('fp_mf6_model')

loading simulation...
  loading simulation name file...
  loading tdis package...
  loading model gwf6...
    loading package dis...
    loading package ic...
    loading package riv...
    loading package ghb...
    loading package wel...
    loading package rch...
    loading package npf...
    loading package oc...
  loading solution package fp_mf6_model...


In [6]:
# Get geodataframe and nodes of truth model for use later
modelprops = basemodel.modelgrid.geo_dataframe.reset_index()
modelprops = modelprops.rename(columns={'index':'node'})
# Make 1 based
modelprops['node'] +=1

# Get lay, row, col in a list for every cell
lrc_list = []
for i in range (int(basemodel.modelgrid.nnodes/basemodel.modelgrid.nlay)):
    lrc_list+=basemodel.modelgrid.get_lrc(i)

# convert to a dataframe with headings of lay, row, col
modelprops_df = pd.DataFrame(lrc_list, columns =['lay', 'row','column'])

# drop lay column
modelprops_df = modelprops_df.drop('lay', axis=1)

# Make node column
modelprops_df = modelprops_df.reset_index().rename(columns={"index":"node"})

# Add 1 to each to get to 1 based
modelprops_df+=1
        
# Add idomain's to dataframe
dataset = list(basemodel.dis.idomain.data[0].flat)
modelprops_df['idomain_1']=dataset  
        
# Merge with modelprops to get geometry column
modelprops_df = modelprops_df.merge(modelprops, on='node')
# Convert to geodataframe
base_modelprops_df = gpd.GeoDataFrame(modelprops_df, crs='EPSG:26915', geometry='geometry')

In [7]:
# Convert Truth Model rch and K to coarser base model grid
truth_model_k_rch_basegrid = gpd.sjoin(base_modelprops_df, truth_modelprops_df, how='left', predicate='intersects')

# keep only first of duplicate rows
truth_model_k_rch_basegrid = truth_model_k_rch_basegrid.drop_duplicates(subset=['node'], keep='first')
truth_model_k_rch_basegrid.reset_index(drop=True, inplace=True)
truth_model_k_rch_basegrid

Unnamed: 0,node,row,column,idomain_1,geometry,index_right,rch,k_1
0,1,1,1,1,"POLYGON ((0 10000, 0 9975, 25 9975, 25 10000, ...",3202,0.000131,10.37000
1,2,1,2,1,"POLYGON ((25 10000, 25 9975, 50 9975, 50 10000...",3203,0.000131,10.37000
2,3,1,3,1,"POLYGON ((50 10000, 50 9975, 75 9975, 75 10000...",3211,0.000131,10.37000
3,4,1,4,1,"POLYGON ((75 10000, 75 9975, 100 9975, 100 100...",3216,0.000131,10.37550
4,5,1,5,1,"POLYGON ((100 10000, 100 9975, 125 9975, 125 1...",3216,0.000131,10.37550
...,...,...,...,...,...,...,...,...
79995,79996,400,196,0,"POLYGON ((4875 25, 4875 0, 4900 0, 4900 25, 48...",1279983,0.000145,4.31850
79996,79997,400,197,0,"POLYGON ((4900 25, 4900 0, 4925 0, 4925 25, 49...",1279987,0.000145,4.31926
79997,79998,400,198,0,"POLYGON ((4925 25, 4925 0, 4950 0, 4950 25, 49...",1279991,0.000145,4.32008
79998,79999,400,199,0,"POLYGON ((4950 25, 4950 0, 4975 0, 4975 25, 49...",1279991,0.000145,4.32008


In [8]:
#Rename k_1 column
truth_model_k_rch_basegrid = truth_model_k_rch_basegrid.rename(columns={'k_1':'k_1_truth'})

In [9]:
# Get list of all folders in scenario_notebook folder
folders = [f.name for f in os.scandir(cwd) if f.is_dir()]

# Filter to just our possible scenarios
group_folders = []
for file in folders:
    if file.startswith('Group'):
        group_folders.append(file)
print(group_folders)

['Group_1', 'Group_10', 'Group_11', 'Group_12', 'Group_2', 'Group_3', 'Group_4', 'Group_5', 'Group_6', 'Group_7', 'Group_8', 'Group_9']


In [12]:
# Make empty lists to be filled with 1to1 stats below
scenario_list = []
reals_list = []
iteration_list = []
group_list = []
numofobs_list = []
obs_data_range_list = []
avg_list = []
min_res_list = []
max_res_list = []
rms_list = []
nrms_list = []

for groups in group_folders:
    listdir = os.listdir(groups)

    for iteration in range(0,5,1):   #(1,4,1)
        
        # Get all shps in directory per iteration
        iter_shps = []
        for file in listdir:
            if file.endswith(f'_{iteration}.shp'):
                iter_shps.append(file)
                
        if iter_shps[0].startswith('R25_pp10'):
            obs = 'Obs: Random 25'
            pp = 'PP: 800'
            filesave = 'R25_pp10'
            Group=1
        if iter_shps[0].startswith('R100_pp10'):
            obs = 'Obs: Random 100'
            pp = 'PP: 800'
            filesave = 'R100_pp10'
            Group=2
        if iter_shps[0].startswith('C25_pp10'):
            obs = 'Obs: Clustered 25'
            pp = 'PP: 800'
            filesave = 'C25_pp10'
            Group=3
        if iter_shps[0].startswith('C100_pp10'):
            obs = 'Obs: Clustered 100'
            pp = 'PP: 800'
            filesave = 'C100_pp10'
            Group=4
        if iter_shps[0].startswith('R25_pp25'):
            obs = 'Obs: Random 25'
            pp = 'PP: 128'
            filesave = 'R25_pp25'
            Group=5
        if iter_shps[0].startswith('R100_pp25'):
            obs = 'Obs: Random 100'
            pp = 'PP: 128'
            filesave = 'R100_pp25'
            Group=6
        if iter_shps[0].startswith('C25_pp25'):
            obs = 'Obs: Clustered 25'
            pp = 'PP: 128'
            filesave = 'C25_pp25'
            Group=7
        if iter_shps[0].startswith('C100_pp25'):
            obs = 'Obs: Clustered 100'
            pp = 'PP: 128'
            filesave = 'C100_pp25'
            Group=8
        if iter_shps[0].startswith('R25_pp50'):
            obs = 'Obs: Random 25'
            pp = 'PP: 32'
            filesave = 'R25_pp50'
            Group=9
        if iter_shps[0].startswith('R100_pp50'):
            obs = 'Obs: Random 100'
            pp = 'PP: 32'
            filesave = 'R100_pp50'
            Group=10
        if iter_shps[0].startswith('C25_pp50'):
            obs = 'Obs: Clustered 25'
            pp = 'PP: 32'
            filesave = 'C25_pp50'
            Group=11
        if iter_shps[0].startswith('C100_pp50'):
            obs = 'Obs: Clustered 100'
            pp = 'PP: 32'
            filesave = 'C100_pp50'
            Group=12
        
        for i in iter_shps:
            if '_real10_' in i:
                real = gpd.read_file(os.path.join(cwd,groups,i),engine='pyogrio')
                real = pd.merge(real,truth_model_k_rch_basegrid[['node','k_1_truth']],on='node', how='left')
                real['k_residual'] = real['k_1_truth'] - real['k_1']
                real = real[real['idomain_1']==1]
                real.reset_index(drop=True, inplace=True)
                reals=10
            if '_real25_' in i:
                real = gpd.read_file(os.path.join(cwd,groups,i),engine='pyogrio')
                real = pd.merge(real,truth_model_k_rch_basegrid[['node','k_1_truth']],on='node', how='left')
                real['k_residual'] = real['k_1_truth'] - real['k_1']
                real = real[real['idomain_1']==1]
                real.reset_index(drop=True, inplace=True)
                reals=25
            if '_real50_' in i:
                real = gpd.read_file(os.path.join(cwd,groups,i),engine='pyogrio')
                real = pd.merge(real,truth_model_k_rch_basegrid[['node','k_1_truth']],on='node', how='left')
                real['k_residual'] = real['k_1_truth'] - real['k_1']
                real = real[real['idomain_1']==1]
                real.reset_index(drop=True, inplace=True)
                reals=50
            if '_real100_' in i:
                real = gpd.read_file(os.path.join(cwd,groups,i),engine='pyogrio')
                real = pd.merge(real,truth_model_k_rch_basegrid[['node','k_1_truth']],on='node', how='left')
                real['k_residual'] = real['k_1_truth'] - real['k_1']
                real = real[real['idomain_1']==1]
                real.reset_index(drop=True, inplace=True)
                reals=100
            if '_real250_' in i:
                real = gpd.read_file(os.path.join(cwd,groups,i),engine='pyogrio')
                real = pd.merge(real,truth_model_k_rch_basegrid[['node','k_1_truth']],on='node', how='left')
                real['k_residual'] = real['k_1_truth'] - real['k_1']
                real = real[real['idomain_1']==1]
                real.reset_index(drop=True, inplace=True)
                reals=250
            if '_real500_' in i:
                real = gpd.read_file(os.path.join(cwd,groups,i),engine='pyogrio')
                real = pd.merge(real,truth_model_k_rch_basegrid[['node','k_1_truth']],on='node', how='left')
                real['k_residual'] = real['k_1_truth'] - real['k_1']
                real = real[real['idomain_1']==1]
                real.reset_index(drop=True, inplace=True)
                reals=500
            if '_real1000_' in i:
                real = gpd.read_file(os.path.join(cwd,groups,i),engine='pyogrio')
                real = pd.merge(real,truth_model_k_rch_basegrid[['node','k_1_truth']],on='node', how='left')
                real['k_residual'] = real['k_1_truth'] - real['k_1']
                real = real[real['idomain_1']==1]
                real.reset_index(drop=True, inplace=True)
                reals=1000
            if '_real2000_' in i:
                real = gpd.read_file(os.path.join(cwd,groups,i),engine='pyogrio')
                real = pd.merge(real,truth_model_k_rch_basegrid[['node','k_1_truth']],on='node', how='left')
                real['k_residual'] = real['k_1_truth'] - real['k_1']
                real = real[real['idomain_1']==1]
                real.reset_index(drop=True, inplace=True)
                reals=2000

            # 1 to 1 plot
            # 1:1 plot
            x = real['k_1_truth']
            y = real['k_1']
            res = y - x # > 0 means overestimation
            
            cm = plt.cm.get_cmap('RdBu')
            # Matplotlib's color maps: https://matplotlib.org/stable/tutorials/colors/colormaps.html
            
            def format_plot1(ax,axislim,legend=False):
                ax.set_xlim(axislim)
                ax.set_ylim(axislim)
                ax.grid(which='minor', alpha = 0.6,linewidth=0.5,linestyle='dashed', color='grey')
                ax.grid(which='major', alpha = 0.5,linewidth=0.5,linestyle='dashed',color='black')
                ax.set_xlabel('Truth [m/d]', fontsize=16)
                ax.set_ylabel('Simulated [m/d]', fontsize=16)
                ax.tick_params(which='major', length=7,width=2, color='black', labelsize=18)
                ax.tick_params(which='minor', length=4,width=1, color='grey')
                ax.tick_params(axis="x", labelsize=16)
                ax.tick_params(axis="y", labelsize=16)
                ax.set_facecolor("white")
                if legend==True:
                    ax.legend(loc="best", ncol=1, fontsize=14)
                return ax
            
            fig, ax = plt.subplots(figsize=(9,9),facecolor='snow')
            fig.suptitle(f"1 to 1 Hydraulic Conductivity \n {obs}   {pp} \n Realization: {reals}  Iteration: {iteration}",fontweight='bold', fontsize='24')
            sc = ax.scatter(x,y,c=res,vmin=-10,vmax=10,edgecolor='k',cmap=cm,s=60, linewidth=0.1)
            
            # Format Customization
            axislim=[math.floor(min(real[['k_1_truth','k_1']].min())/10)*10,math.ceil(max(real[['k_1_truth','k_1']].max())/10)*10]     
            format_plot1(ax,axislim)
            
            # 1to1 line
            ax.plot(axislim,axislim,linewidth=1, c='black')
            
            # Color Bar for Residuals
            #cbar = plt.colorbar(sc, ticks=np.arange(-cbar_ticks,cbar_ticks,1))
            cbar = plt.colorbar(sc, ticks=[-10,-5,0,5,10])
            cbar.set_label('Resiudal [m/d]', rotation=270, fontsize=14, labelpad=15)
            cbar.ax.tick_params(labelsize=12) 
            
            # Calibration Statistics
            abs_res = abs(res)
            res_list = res.to_list()
            
            obs_data_range = x.max()-x.min() 
            
            avg = round(np.mean(res_list),2)# Mean Error
            min_res = round(min(res_list),2) #Min Residual
            max_res = round(max(res_list),2) #Max Residual
            rms = round(np.sqrt(mean_squared_error(x, y)),2) # RMS
            nrms = round(100*rms/obs_data_range,2) # NRMS
            numofobs = len(res)
            
            textstr = '\n'.join((
            r'Model Stats:',
            r' ',
            r'# of Obs: '+str(numofobs),    
            r'Range [m/d]: '+str(round(obs_data_range,2)),
            r'Res Mean [m/d]: '+str(avg),
            r'Min Res [m/d]: '+str(min_res),
            r'Max Res [m/d]: '+str(max_res),
            r'RMS [m/d]: '+str(rms),
            r'RMS [%]: '+str(nrms),
            ))
            props = dict(boxstyle='round', facecolor='white', alpha=0.95)
            ax.text(0.7, 0.3, textstr, transform=ax.transAxes, fontsize=12,
                    verticalalignment='top', bbox=props)
            
            plt.tight_layout()
            plt.savefig(os.path.join(cwd,f'Group_{Group}',f'K_Iter_{iteration}_1to1_{filesave}_r{reals}.png'), dpi=300, bbox_inches='tight')
            plt.close()
            print('saved',f'K_Iter_{iteration}_1to1_{filesave}_r{reals}.png')

            
            # Fill lists with stats
            scenario_list.append(f'{filesave}')
            iteration_list.append(f'{iteration}')
            reals_list.append(f'{reals}')
            group_list.append(f'{Group}')
            numofobs_list.append(numofobs)
            obs_data_range_list.append(obs_data_range)
            avg_list.append(avg)
            min_res_list.append(min_res)
            max_res_list.append(max_res)
            rms_list.append(rms)
            nrms_list.append(nrms)

one2one_stats=list(zip(scenario_list, iteration_list, reals_list, group_list, numofobs_list, obs_data_range_list, avg_list, min_res_list, max_res_list, rms_list, nrms_list))
one2one_stats_df = pd.DataFrame(one2one_stats, columns=['Scenario','Iteration','Realizations','Group','# of Obs','Range(m/d)','Res Mean(m/d)','Min Res(m/d)','Max Res(m/d)','RMS(m/d)','RMS(%)'])
one2one_stats_df.to_csv('K_1to1_stats.csv')

saved K_Iter_0_1to1_R25_pp10_r1000.png
saved K_Iter_0_1to1_R25_pp10_r100.png
saved K_Iter_0_1to1_R25_pp10_r10.png
saved K_Iter_0_1to1_R25_pp10_r2000.png
saved K_Iter_0_1to1_R25_pp10_r250.png
saved K_Iter_0_1to1_R25_pp10_r25.png
saved K_Iter_0_1to1_R25_pp10_r500.png
saved K_Iter_0_1to1_R25_pp10_r50.png
saved K_Iter_1_1to1_R25_pp10_r1000.png
saved K_Iter_1_1to1_R25_pp10_r100.png
saved K_Iter_1_1to1_R25_pp10_r10.png
saved K_Iter_1_1to1_R25_pp10_r2000.png
saved K_Iter_1_1to1_R25_pp10_r250.png
saved K_Iter_1_1to1_R25_pp10_r25.png
saved K_Iter_1_1to1_R25_pp10_r500.png
saved K_Iter_1_1to1_R25_pp10_r50.png
saved K_Iter_2_1to1_R25_pp10_r1000.png
saved K_Iter_2_1to1_R25_pp10_r100.png
saved K_Iter_2_1to1_R25_pp10_r10.png
saved K_Iter_2_1to1_R25_pp10_r2000.png
saved K_Iter_2_1to1_R25_pp10_r250.png
saved K_Iter_2_1to1_R25_pp10_r25.png
saved K_Iter_2_1to1_R25_pp10_r500.png
saved K_Iter_2_1to1_R25_pp10_r50.png
saved K_Iter_3_1to1_R25_pp10_r1000.png
saved K_Iter_3_1to1_R25_pp10_r100.png
saved K_Iter_3