In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from numpy.linalg import norm
import flopy
import flopy.utils.binaryfile as bf
%matplotlib inline
%config InlineBackend.figure_format='retina'
import rasterio
from tqdm import tqdm
from scipy.ndimage import maximum_filter
from scipy.interpolate import griddata
from sbi import utils as utils
#from sbi import analysis as analysis
from sbi.inference.base import infer
#from sbi.inference import SNPE
import torch
import warnings

warnings.filterwarnings("ignore")
# ! pip uninstall torch sbi scipy -y
# ! pip install scipy=1.12.0
# ! pip install sbi==0.21.0
# ! pip install torch==2.0

In [None]:
import sys
sys.path.append('../scripts/')
from Slate_Floodplain_MODFLOW import *

In [None]:
nlay = 16
soil_nlay = 10
gravel_nlay = nlay - soil_nlay
dam_nlay = 5

# Observed data and simulated data

In [None]:
# Location
locations = pd.read_csv('../data/response_data/preprocessing/locations_GB_VHG.csv')

# Baseflow value
baseflow_obs = pd.read_csv('../data/response_data/preprocessing/baseflow_obs_20210815.csv')

In [None]:
ncol = 480
nrow = 460

nlay = 16
soil_nlay = 10
gravel_nlay = nlay - soil_nlay
dam_nlay = 5

# setting up the vertical discretization and model bottom elevation
zbot = np.zeros((nlay,nrow,ncol))

# Soil layers
for lay in np.arange(0,soil_nlay):    
    zbot[lay,:,:] = DEM - np.maximum(gravel_interface*((lay+1)/soil_nlay),0.1*(lay+1)) 

# Gravel layers
gravel_discretized_ratio = [0.02,0.04,0.1,0.3,0.6,1]
for i, lay in enumerate(np.arange(soil_nlay, nlay)):
    zbot[lay,:,:] = zbot[soil_nlay-1,:,:] - np.maximum(bedrock_interface*gravel_discretized_ratio[i],0.1*(i+1))
    
thickness = np.zeros(zbot.shape)
for i in range(nlay):
    if i == 0:
        thickness[i,:,:] = DEM-zbot[i,:,:]
    else:
        thickness[i,:,:] = zbot[i-1,:,:]-zbot[i,:,:]
depth_to_surface = DEM-zbot

In [None]:
locations['X'] = np.int64(np.round(locations['Easting.UTM.13N']-bounds[0]))
locations['Y'] = np.int64(bounds[3]-np.round(locations['Northing.UTM.13N']))
locations['Z'] = np.argmin(np.abs(depth_to_surface[:,locations['Y'],locations['X']]-locations['Depth.m'].values.reshape(1,-1)),axis = 0)
locations['Z'].iloc[7] +=1
locations['Y'].iloc[11] = 221

In [None]:
def f_data(head):
    return head[locations['Z'],locations['Y'],locations['X']]

In [None]:
head, flf, frf  = read_sim('./baseflow_test', 'test')

In [None]:
sim = f_data(head)

In [None]:
plt.figure(figsize=[8,8])
plt.scatter(sim,baseflow_obs['masl'])
for i in range(len(locations)):
    plt.text(sim[i],baseflow_obs['masl'][i],locations['ParameterColumnName'][i])
plt.plot([2724.6,2725.8],[2724.6,2725.8],color = 'grey',linestyle = '--')
plt.gca().set_aspect('equal')
plt.xlabel('Simulation')
plt.ylabel('Observed')
# Default matches the gravel bed ok, but not the vertical hydraulic gradient, too small

# Load all simulated data

In [None]:
num_MC = 300
head_MC = np.zeros([num_MC,head.shape[0],head.shape[1],head.shape[2]])
flf_MC = np.zeros([num_MC,head.shape[0],head.shape[1],head.shape[2]])
frf_MC = np.zeros([num_MC,head.shape[0],head.shape[1],head.shape[2]])
sim_MC = np.zeros([num_MC,sim.shape[0]])

for i in tqdm(range(num_MC)):
    head,flf,frf = read_sim('./Prior_Simulation/sim'+str(i).zfill(3),'sim'+str(i).zfill(3))
    head_MC[i,:,:,:] = head
    flf_MC[i,:,:,:] = flf
    frf_MC[i,:,:,:] = frf
    sim_MC[i,:] = f_data(head)

In [None]:
sim_MC = pd.DataFrame(sim_MC,columns = baseflow_obs['ID'])

In [None]:
# Define a dictionary to specify colors for each column
column_colors = {
    'GB2': 'C2', 'GB4': 'C2', 'GB5': 'C2', 'GB6': 'C2', 'GB7': 'C2', 'GB8': 'C2', 'GB9': 'C2','GB10':'C2',
    'OBJ1-D': 'C1', 'OBJ1-M': 'C1', 'OBJ1-S': 'C1', 'OBJ2-D': 'C1', 'OBJ2-M': 'C1', 'OBJ2-S': 'C1', 
    'OBJ4-D': 'C1', 'OBJ4-M': 'C1', 'OBJ4-S': 'C1',
    # Add more columns and colors as needed
}

In [None]:
# Melt the DataFrame to convert it to long format
melted_df = sim_MC.melt(var_name='Columns', value_name='Head (masl)')

# Create a box plot using Seaborn
plt.figure(figsize=(10, 3))  # Adjust figure size if needed
sns.boxplot(x='Columns', y='Head (masl)', data=melted_df,palette=column_colors)

plt.scatter(baseflow_obs['ID'],baseflow_obs['masl'],color = 'red',zorder = 100,label = 'observed')
# Set the title and labels
plt.title('Prior simulation')
plt.xlabel('Columns')
plt.ylabel('Values')

plt.legend()

# Rotate x-axis labels for better readability
plt.xticks(rotation=45)
#plt.ylim(2723.2,2725.9)
# Show the plot
plt.show()


In [None]:
sim_MC = sim_MC.drop(columns=['GB10'])

In [None]:
IDs_to_remove = ['GB10']
# Remove rows with specified IDs
baseflow_obs_filtered = baseflow_obs[~baseflow_obs['ID'].isin(IDs_to_remove)]

In [None]:
np.random.seed(1)
noise = np.random.normal(0, 0.03, sim_MC.values.shape)
sim_MC_w_noise = sim_MC + noise

In [None]:
# Melt the DataFrame to convert it to long format
melted_df = sim_MC_w_noise.melt(var_name='Columns', value_name='Head (masl)')

# Create a box plot using Seaborn
plt.figure(figsize=(12, 3))  # Adjust figure size if needed
sns.boxplot(x='Columns', y='Head (masl)', data=melted_df,palette=column_colors, linewidth=1, fliersize=2)  

plt.scatter(baseflow_obs_filtered['ID'],baseflow_obs_filtered['masl'],color = 'red',zorder = 100, alpha = 0.8, label = 'baseflow observations')
#plt.scatter(baseflow_obs_filtered['ID'],sim_MC_w_noise.iloc[118,:],color = 'orange')

# Set the title and labels
plt.title('Prior simulation')
plt.xlabel('Columns')
plt.ylabel('Values')
plt.legend()
# Rotate x-axis labels for better readability
plt.xticks()
plt.ylim(2723.2,2725.9)
# Show the plot
plt.show()


In [None]:
paras = pd.read_csv('Prior_Simulation/Prior_Parameters.csv')
paras = np.log10(paras)

# Calibration

In [None]:
# hidden_features: Number of hidden features.
# num_transforms: Number of transforms when a flow is used. Only relevant if
#             density estimator is a normalizing flow (i.e. currently either a `maf` or a
#             `nsf`). Ignored if density estimator is a `mdn` or `made`.

# Prior
prior = utils.BoxUniform(low=[-4,-6,-2,-2,-8], 
                         high=[-2,-4,0,0,-6])

In [None]:
from sbi.utils import posterior_nn

## Explore the hyperparameters 

In [None]:
from itertools import product
hidden_features_values = np.arange(1, 32, step=5)-1
hidden_features_values[0] = 1
num_transforms_values = np.arange(1, 7, step=1)

# Perform grid search over hyperparameters
hyperparameters_combinations = list(product(hidden_features_values, num_transforms_values))


# Initialize lists to store hyperparameters and log probabilities
hyperparameters = []
log_probabilities = []
best_val_log_probs = -100
best_posterior = []
# Validation_default 0.1 percent
# Iterate over hyperparameter combinations
for hidden_features, num_transforms in hyperparameters_combinations:

    # Build density estimator with current hyperparameters
    density_estimator_build_fun = posterior_nn(model="maf", hidden_features=hidden_features, num_transforms=num_transforms)
    inference = SNPE(prior, density_estimator=density_estimator_build_fun)

    # Append simulations and train density estimator
    density_estimator = inference.append_simulations(torch.tensor(np.array(paras.values,'float32')),
                                                     torch.tensor(np.array(sim_MC_w_noise.values,'float32'))).train()
    posterior = inference.build_posterior(density_estimator) 
    
    
    # Store hyperparameters
    hyperparameters.append((hidden_features, num_transforms))

    # Store training and validation log probabilities
    log_probabilities.append((inference.summary['training_log_probs'][-1], inference.summary['validation_log_probs'][-1]))
    
    if inference.summary['validation_log_probs'][-1]>best_val_log_probs:
        best_posterior = posterior
        best_val_log_probs = inference.summary['validation_log_probs'][-1]
        print('updated')
    
    print((hidden_features, num_transforms))
    print((inference.summary['training_log_probs'][-1], inference.summary['validation_log_probs'][-1]))
# Convert lists to NumPy array
hyperparameters_array = np.array(hyperparameters)
log_probabilities_array = np.array(log_probabilities)

# Create DataFrame
hyper_df = pd.DataFrame(data=np.hstack((hyperparameters_array, log_probabilities_array)), 
                  columns=['hidden_features', 'num_transforms', 'training_log_prob', 'validation_log_prob'])

In [None]:
# Reshape log probabilities array into a matrix
log_prob_matrix_training = log_probabilities_array[:, 0].reshape(len(hidden_features_values), len(num_transforms_values))
log_prob_matrix_validation = log_probabilities_array[:, 1].reshape(len(hidden_features_values), len(num_transforms_values))

# Create subplots
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Plot training log probabilities
im1 = axes[0].imshow(log_prob_matrix_training, origin='lower', extent=[min(num_transforms_values), max(num_transforms_values), min(hidden_features_values), max(hidden_features_values)], aspect='auto',vmin = -3,vmax = 1.5)
axes[0].set_title('Training Log Probabilities')
axes[0].set_xlabel('Num Transforms')
axes[0].set_ylabel('Hidden Features')
plt.colorbar(im1, ax=axes[0])

# Plot validation log probabilities
im2 = axes[1].imshow(log_prob_matrix_validation, origin='lower', extent=[min(num_transforms_values), max(num_transforms_values), min(hidden_features_values), max(hidden_features_values)], aspect='auto',vmin = -3,vmax = 1.5)
axes[1].set_title('Validation Log Probabilities')
axes[1].set_xlabel('Num Transforms')
axes[1].set_ylabel('Hidden Features')
plt.colorbar(im2, ax=axes[1])

# Adjust layout
plt.tight_layout()

In [None]:
max_index = hyper_df['validation_log_prob'].idxmax()

# Retrieve row corresponding to the maximum validation log probability
hyper_df.loc[max_index]

## Getting posterior samples

### Synthetic Case: use one simulated result as the dataset, we know the ground truth

In [None]:
synthetic_idx = 118
data_obs_synthetic = sim_MC_w_noise.iloc[synthetic_idx,:]
posterior_samples = best_posterior.sample((100000,),x=data_obs_synthetic)

In [None]:
posterior_samples = np.array(posterior_samples,dtype = 'float64')
xlabel = ['log $K_{h}^{gravel}$ [m/s]','log $K_{h}^{soil}$ [m/s]','log($K_{v}^{gravel}/K_{h}^{gravel}$)','log($K_{v}^{soil}/K_{h}^{soil}$)','log $K^{dam}$ [m/s]']

In [None]:
# Set larger font sizes for all plot elements
plt.rcParams.update({'font.size': 14, 'axes.labelsize': 14, 'axes.titlesize': 16, 'legend.fontsize': 12, 'xtick.labelsize': 12, 'ytick.labelsize': 12})

# Assuming posterior_samples is a DataFrame and paras is defined

# Convert posterior_samples to a DataFrame for easier handling, applying np.log10 transformation
posterior_df = pd.DataFrame((posterior_samples), columns=paras.columns)

# Number of variables
n_variables = len(paras.columns)

# Set up the figure for subplots
fig, axes = plt.subplots(nrows=1, ncols=n_variables, figsize=(3.*n_variables, 2), sharey=True)

# Ensure axes is iterable in case of a single subplot
axes = np.atleast_1d(axes)

# Iterate over each variable to create its plot
for i, var_name in enumerate(paras.columns):
    # Assuming uniform prior, specify the actual bounds of your uniform prior for each variable here
    # Example bounds for demonstration; replace with your actual prior bounds
    uniform_min, uniform_max = np.min(paras[var_name]), np.max(paras[var_name])
    
    # Filling the area for uniform prior
    uniform_height = 1 / (uniform_max - uniform_min)
    axes[i].fill_betweenx([0, uniform_height], uniform_min, uniform_max, alpha=0.3, color='C0', label='Prior')

    # Plotting posterior KDE
    print(posterior_df[var_name].std())
    sns.kdeplot(posterior_df[var_name], ax=axes[i], fill=True, alpha=0.5,bw_adjust=1.5,label='Posterior', color='C1')
    axes[i].vlines(x = paras.iloc[synthetic_idx].values[i],ymin = 0, ymax = 5, color = 'grey',linestyle = '--',label = 'Truth')
    if i >0:
        axes[i].set_ylabel('')
    axes[i].set_xlabel('')
    # Set x-axis limits to match the prior bounds
    axes[i].set_xlim(uniform_min, uniform_max)
    axes[i].set_ylim(0,3.4)
    #axes[i].set_title(var_name)
    axes[i].set_title(xlabel[i])
#axes[i].legend()

# Adjust layout for better visual
plt.tight_layout()
plt.show()


In [None]:
# Set larger font sizes for all plot elements
plt.rcParams.update({'font.size': 14, 'axes.labelsize': 14, 'axes.titlesize': 16, 'legend.fontsize': 12, 'xtick.labelsize': 12, 'ytick.labelsize': 12})

# Assuming posterior_samples is a DataFrame and paras is defined

# Convert posterior_samples to a DataFrame for easier handling, applying np.log10 transformation
posterior_df = pd.DataFrame((posterior_samples), columns=paras.columns)

# Number of variables
n_variables = len(paras.columns)

# Set up the figure for subplots
fig, axes = plt.subplots(nrows=1, ncols=n_variables, figsize=(3.*n_variables, 2), sharey=True)

# Ensure axes is iterable in case of a single subplot
axes = np.atleast_1d(axes)

# Iterate over each variable to create its plot
for i, var_name in enumerate(paras.columns):
    # Assuming uniform prior, specify the actual bounds of your uniform prior for each variable here
    # Example bounds for demonstration; replace with your actual prior bounds
    uniform_min, uniform_max = np.min(paras[var_name]), np.max(paras[var_name])
    
    # Filling the area for uniform prior
    uniform_height = 1 / (uniform_max - uniform_min)
    axes[i].fill_betweenx([0, uniform_height], uniform_min, uniform_max, alpha=0.3, color='C0', label='Prior')

    # Plotting posterior KDE
    sns.kdeplot(posterior_df[var_name], ax=axes[i], fill=True, alpha=0.5,bw_adjust=1.5,label='Posterior', color='C1')
    axes[i].vlines(x = paras.iloc[synthetic_idx].values[i],ymin = 0, ymax = 5, color = 'grey',linestyle = '--',label = 'Truth')
    if i >0:
        axes[i].set_ylabel('')
    axes[i].set_xlabel('')
    # Set x-axis limits to match the prior bounds
    axes[i].set_xlim(uniform_min, uniform_max)
    
    #axes[i].set_title(var_name)
    axes[i].set_title(xlabel[i])
#axes[i].legend()

# Adjust layout for better visual
plt.tight_layout()
plt.show()


In [None]:
posterior_samples = pd.DataFrame(np.power(10,np.array(posterior_samples)),columns = paras.columns)

In [None]:
from sbi.analysis import pairplot
_ = pairplot(np.log10(posterior_samples.values))

In [None]:
prior_log_prob = np.array(posterior.log_prob(torch.from_numpy(paras.values).float(),x=data_obs))

In [None]:
#posterior_samples.to_csv('Posterior_Simulation_synthetic/Posterior_Parameters.csv')

### Actual observation

In [None]:
data_obs = baseflow_obs_filtered['masl'].values
posterior_samples = best_posterior.sample((100000,),x=data_obs)

In [None]:
posterior_samples = np.array(posterior_samples,dtype = 'float64')
xlabel = ['log $K_{h}^{gravel}$ [m/s]','log $K_{h}^{soil}$ [m/s]','log($K_{v}^{gravel}/K_{h}^{gravel}$)','log($K_{v}^{soil}/K_{h}^{soil}$)','log $K^{dam}$ [m/s]']

In [None]:
# Set larger font sizes for all plot elements
plt.rcParams.update({'font.size': 14, 'axes.labelsize': 14, 'axes.titlesize': 16, 'legend.fontsize': 12, 'xtick.labelsize': 12, 'ytick.labelsize': 12})

# Assuming posterior_samples is a DataFrame and paras is defined

# Convert posterior_samples to a DataFrame for easier handling, applying np.log10 transformation
posterior_df = pd.DataFrame((posterior_samples), columns=paras.columns)

# Number of variables
n_variables = len(paras.columns)

# Set up the figure for subplots
fig, axes = plt.subplots(nrows=1, ncols=n_variables, figsize=(3.*n_variables, 2), sharey=True)

# Ensure axes is iterable in case of a single subplot
axes = np.atleast_1d(axes)

# Iterate over each variable to create its plot
for i, var_name in enumerate(paras.columns):
    # Assuming uniform prior, specify the actual bounds of your uniform prior for each variable here
    # Example bounds for demonstration; replace with your actual prior bounds
    uniform_min, uniform_max = np.min(paras[var_name]), np.max(paras[var_name])
    
    # Filling the area for uniform prior
    uniform_height = 1 / (uniform_max - uniform_min)
    axes[i].fill_betweenx([0, uniform_height], uniform_min, uniform_max, alpha=0.3, color='C0', label='Prior')

    # Plotting posterior KDE
    sns.kdeplot(posterior_df[var_name], ax=axes[i], fill=True, alpha=0.5,bw_adjust=1.5,label='Posterior', color='C1')
    if i >0:
        axes[i].set_ylabel('')
    axes[i].set_xlabel('')
    # Set x-axis limits to match the prior bounds
    axes[i].set_xlim(uniform_min, uniform_max)
    
    #axes[i].set_title(var_name)
    axes[i].set_title(xlabel[i])
axes[i].legend()

# Adjust layout for better visual
plt.tight_layout()
plt.show()


In [None]:
posterior_samples = pd.DataFrame(np.power(10,np.array(posterior_samples)),columns = paras.columns)

In [None]:
from sbi.analysis import pairplot
_ = pairplot(np.log10(posterior_samples.values))

In [None]:
prior_log_prob = np.array(posterior.log_prob(torch.from_numpy(paras.values).float(),x=data_obs))

In [None]:
# Create a box plot using Seaborn
plt.figure(figsize=(10, 3))  # Adjust figure size if needed
# Melt the DataFrame to convert it to long format
#melted_df = sim_MC.melt(var_name='Columns', value_name='Head (masl)')
#sns.boxplot(x='Columns', y='Head (masl)', data=melted_df)
melted_df = sim_MC_w_noise.iloc[prior_log_prob>-10].melt(var_name='Columns', value_name='Head (masl)')
sns.boxplot(x='Columns', y='Head (masl)', data=melted_df)
plt.scatter(baseflow_obs_filtered['ID'],baseflow_obs_filtered['masl'],color = 'red')

# Set the title and labels
plt.title('Prior simulation')
plt.xlabel('Columns')
plt.ylabel('Values')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45)
plt.ylim(2723.5,2726)
# Show the plot
plt.show()


In [None]:
posterior_samples.to_csv('Posterior_Simulation/Posterior_Parameters.csv')

# Posterior simulation, Set 2

In [None]:
posterior_samples = pd.read_csv('Posterior_Simulation/Posterior_Parameters.csv')
posterior_samples = posterior_samples[posterior_samples.columns[1:]]

## Set 2A

In [None]:
precip = 2e-3 #m/d
ET = 2e-3 #m/d
num_MC = 300
for i in range(num_MC):
    hk_gravel,hk_soil,vka_ratio_gravel,vka_ratio_soil,k_dam = posterior_samples.iloc[i]
    mf,head,hk,vka,strt,zbot,flf,frf = modflow_BC(hk_gravel,hk_soil,vka_ratio_gravel,vka_ratio_soil,k_dam,
                                                  ET,precip, 'sim'+str(i).zfill(3), 'Posterior_Simulation/sim'+str(i).zfill(3))
    print(i)

## Set 2B

In [None]:
precip = 1e-3 #m/d
ET = 2e-3 #m/d
num_MC = 300
period = 'dry'
for i in range(num_MC):
    hk_gravel,hk_soil,vka_ratio_gravel,vka_ratio_soil,k_dam = posterior_samples.iloc[i]
    mf,head,hk,vka,strt,zbot,flf,frf = modflow_BC(hk_gravel,hk_soil,vka_ratio_gravel,vka_ratio_soil,k_dam,
                                                  ET,precip, 'sim'+str(i).zfill(3), 'Posterior_Simulation_Drypond/sim'+str(i).zfill(3),  period)
    print(i)

## Set 2C

In [None]:
precip = 1.5e-3 #m/d
ET = 3e-3 #m/d
num_MC = 300
period = 'snowmelt'
for i in range(num_MC):
    hk_gravel,hk_soil,vka_ratio_gravel,vka_ratio_soil,k_dam = posterior_samples.iloc[i]
    mf,head,hk,vka,strt,zbot,flf,frf = modflow_BC(hk_gravel,hk_soil,vka_ratio_gravel,vka_ratio_soil,k_dam,
                                                  ET,precip, 'sim'+str(i).zfill(3), 'Posterior_Simulation_Snowmelt/sim'+str(i).zfill(3),  period)
    print(i)

## Synthetic inversion test

In [None]:
posterior_samples = pd.read_csv('Posterior_Simulation_synthetic/Posterior_Parameters.csv')
posterior_samples = posterior_samples[posterior_samples.columns[1:]]

In [None]:
precip = 2e-3 #m/d
ET = 2e-3 #m/d
num_MC = 300
for i in range(num_MC):
    hk_gravel,hk_soil,vka_ratio_gravel,vka_ratio_soil,k_dam = posterior_samples.iloc[i]
    mf,head,hk,vka,strt,zbot,flf,frf = modflow_BC(hk_gravel,hk_soil,vka_ratio_gravel,vka_ratio_soil,k_dam,
                                                  ET,precip, 'sim'+str(i).zfill(3), 'Posterior_Simulation_synthetic/sim'+str(i).zfill(3))
    print(i)

# Posterior visualization

In [None]:
num_MC = 300
head_MC_pos = np.zeros([num_MC,head.shape[0],head.shape[1],head.shape[2]])
flf_MC_pos = np.zeros([num_MC,head.shape[0],head.shape[1],head.shape[2]])
frf_MC_pos = np.zeros([num_MC,head.shape[0],head.shape[1],head.shape[2]])
sim_MC_pos = np.zeros([num_MC,sim.shape[0]])

for i in tqdm(range(num_MC)):
    head,flf,frf = read_sim('./Posterior_Simulation/sim'+str(i).zfill(3),'sim'+str(i).zfill(3))
    head_MC_pos[i,:,:,:] = head
    flf_MC_pos[i,:,:,:] = flf
    frf_MC_pos[i,:,:,:] = frf
    sim_MC_pos[i,:] = f_data(head)

In [None]:
sim_MC_pos = pd.DataFrame(sim_MC_pos,columns = baseflow_obs['ID'])

In [None]:
mismatch = sim_MC_pos.values -baseflow_obs['masl'].values.reshape(1,-1)

In [None]:
plt.figure(figsize=[8,8])
plt.scatter(sim_MC_pos.iloc[np.argmin(np.nansum(np.abs(mismatch),axis = 1))],baseflow_obs['masl'])
for i in range(len(locations)):
    plt.text(sim_MC_pos.iloc[np.argmin(np.nansum(np.abs(mismatch),axis = 1))][i],baseflow_obs['masl'][i],locations['ParameterColumnName'][i])
plt.plot([2724.6,2725.8],[2724.6,2725.8],color = 'grey',linestyle = '--')
plt.gca().set_aspect('equal')
plt.xlabel('Simulation')
plt.ylabel('Observed')
# Default matches the gravel bed ok, but not the vertical hydraulic gradient, too small

In [None]:
sim_MC_pos = pd.DataFrame(sim_MC_pos,columns = baseflow_obs['ID'])
sim_MC_pos = sim_MC_pos.drop(columns=['GB10'])

In [None]:
sim_MC_pos_w_noise = sim_MC_pos + noise

In [None]:
# Melt the DataFrame to convert it to long format
melted_df = sim_MC_w_noise.melt(var_name='Columns', value_name='Head (masl)')

# Create a box plot using Seaborn
plt.figure(figsize=(14, 3))  # Adjust figure size if needed
sns.boxplot(x='Columns', y='Head (masl)', data=melted_df,palette=column_colors, linewidth=1, fliersize=2)  

plt.scatter(baseflow_obs_filtered['ID'],baseflow_obs_filtered['masl'],color = 'red',zorder = 100, alpha = 0.8, label = 'baseflow observations')
#plt.scatter(baseflow_obs_filtered['ID'],sim_MC_w_noise.iloc[118,:],color = 'orange')

# Set the title and labels
plt.title('Prior simulation')
plt.ylabel('Water Level Elevation (m)')
plt.legend()
# Rotate x-axis labels for better readability
plt.xticks()
plt.xlabel('')
plt.ylim(2723.2,2725.9)
# Show the plot
plt.show()


In [None]:
# Melt the DataFrame to convert it to long format
melted_df = sim_MC_pos_w_noise.melt(var_name='Columns', value_name='Head (masl)')

# Create a box plot using Seaborn
plt.figure(figsize=(14, 3))  # Adjust figure size if needed
sns.boxplot(x='Columns', y='Head (masl)', data=melted_df,palette=column_colors, linewidth=1, fliersize=2)  

plt.scatter(baseflow_obs_filtered['ID'],baseflow_obs_filtered['masl'],color = 'red',zorder = 100,alpha = 0.8, label = 'baseflow observations')
# Set the title and labels
plt.title('Posterior simulation: Baseflow observations')
#plt.xlabel('Columns')
plt.ylabel('Water Level Elevation (m)')
plt.legend()

# Rotate x-axis labels for better readability
plt.xlabel('')
plt.ylim(2723.2,2725.9)
# Show the plot
plt.show()


In [None]:
posterior_samples = pd.read_csv('Posterior_Simulation/Posterior_Parameters.csv')
posterior_samples = np.log10(posterior_samples[posterior_samples.columns[1:]])

In [None]:
xlabel = ['log $K_{h}^{gravel}$ [m/s]','log $K_{h}^{soil}$ [m/s]','log($K_{v}^{gravel}/K_{h}^{gravel}$)','log($K_{v}^{soil}/K_{h}^{soil}$)','log $K^{dam}$ [m/s]']

In [None]:
# Default values 
observed_parameters = [2e-3,1.4e-5, np.nan, 1.6e-6/1.4e-5, np.nan]


In [None]:
# Set larger font sizes for all plot elements
plt.rcParams.update({'font.size': 14, 'axes.labelsize': 14, 'axes.titlesize': 16, 'legend.fontsize': 12, 'xtick.labelsize': 12, 'ytick.labelsize': 12})

# Assuming posterior_samples is a DataFrame and paras is defined

# Convert posterior_samples to a DataFrame for easier handling, applying np.log10 transformation
posterior_df = pd.DataFrame((posterior_samples), columns=paras.columns)

# Number of variables
n_variables = len(paras.columns)

# Set up the figure for subplots
fig, axes = plt.subplots(nrows=1, ncols=n_variables, figsize=(3.*n_variables, 2), sharey=True)

# Ensure axes is iterable in case of a single subplot
axes = np.atleast_1d(axes)

# Iterate over each variable to create its plot
for i, var_name in enumerate(paras.columns):
    # Assuming uniform prior, specify the actual bounds of your uniform prior for each variable here
    # Example bounds for demonstration; replace with your actual prior bounds
    uniform_min, uniform_max = np.min(paras[var_name]), np.max(paras[var_name])
    
    # Filling the area for uniform prior
    uniform_height = 1 / (uniform_max - uniform_min)
    axes[i].fill_betweenx([0, uniform_height], uniform_min, uniform_max, alpha=0.3, color='C0', label='Prior')

    # Plotting posterior KDE
    sns.kdeplot(posterior_df[var_name], ax=axes[i], fill=True, alpha=0.5,bw_adjust=1.5,label='Posterior', color='C1')
    if i >0:
        axes[i].set_ylabel('')
    axes[i].set_xlabel('')
    # Set x-axis limits to match the prior bounds
    axes[i].set_xlim(uniform_min, uniform_max)
    axes[i].set_ylim(0, 5)
    axes[i].vlines(x = np.log10(observed_parameters[i]),ymin = 0,ymax = 5, color = 'lightgrey',linestyle = '--',label = 'One observed')

    #axes[i].set_title(var_name)
    axes[i].set_title(xlabel[i])
#axes[i].legend()


# Adjust layout for better visual
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=[3,2])
sns.kdeplot(paras['hk_soil']+paras['vka_ratio_soil'],
            fill=True, alpha=0.5,bw_adjust=1.5,label='Prior', color='C0')
sns.kdeplot(posterior_samples['hk_soil']+posterior_samples['vka_ratio_soil'],
            fill=True, alpha=0.5,bw_adjust=1.5,label='Posterior', color='C1')
plt.vlines(x = np.log10(1.6e-6),ymin = 0,ymax = 5, color = 'lightgrey',linestyle = '--',label = 'True')
plt.title('log($K_{v}^{soil}$) [m/s]')
plt.yticks([0,2.5,5])
plt.ylim(0,5)
plt.ylabel('')

plt.tight_layout()

In [None]:
num_MC = 300
head_MC_pos = np.zeros([num_MC,head.shape[0],head.shape[1],head.shape[2]])
flf_MC_pos = np.zeros([num_MC,head.shape[0],head.shape[1],head.shape[2]])
frf_MC_pos = np.zeros([num_MC,head.shape[0],head.shape[1],head.shape[2]])
sim_MC_pos = np.zeros([num_MC,sim.shape[0]])

for i in tqdm(range(num_MC)):
    head,flf,frf = read_sim('./Posterior_Simulation_synthetic/sim'+str(i).zfill(3),'sim'+str(i).zfill(3))
    head_MC_pos[i,:,:,:] = head
    flf_MC_pos[i,:,:,:] = flf
    frf_MC_pos[i,:,:,:] = frf
    sim_MC_pos[i,:] = f_data(head)

In [None]:
sim_MC_pos = pd.DataFrame(sim_MC_pos,columns = baseflow_obs['ID'])
sim_MC_pos = sim_MC_pos.drop(columns=['GB10'])

In [None]:
sim_MC_pos_w_noise = sim_MC_pos + noise

In [None]:
# Melt the DataFrame to convert it to long format
melted_df = sim_MC_pos_w_noise.melt(var_name='Columns', value_name='Head (masl)')

# Create a box plot using Seaborn
plt.figure(figsize=(14, 3))  # Adjust figure size if needed
sns.boxplot(x='Columns', y='Head (masl)', data=melted_df,palette=column_colors, linewidth=1, fliersize=2)  

plt.scatter(baseflow_obs_filtered['ID'],data_obs_synthetic,color = 'yellow',zorder = 100,alpha = 0.8, label = 'synthetic observations')
# Set the title and labels
plt.title('Posterior simulation: Synthetic observations')
#plt.xlabel('Columns')
plt.ylabel('Water Level Elevation (m)')
plt.legend()

# Rotate x-axis labels for better readability
#plt.xticks(rotation=45)
plt.xlabel('')
plt.ylim(2723.2,2725.9)
# Show the plot
plt.show()


In [None]:
posterior_samples = pd.read_csv('Posterior_Simulation_synthetic/Posterior_Parameters.csv')
posterior_samples = np.log10(posterior_samples[posterior_samples.columns[1:]])

In [None]:
# Set larger font sizes for all plot elements
plt.rcParams.update({'font.size': 14, 'axes.labelsize': 14, 'axes.titlesize': 16, 'legend.fontsize': 12, 'xtick.labelsize': 12, 'ytick.labelsize': 12})

# Assuming posterior_samples is a DataFrame and paras is defined

# Convert posterior_samples to a DataFrame for easier handling, applying np.log10 transformation
posterior_df = pd.DataFrame((posterior_samples), columns=paras.columns)

# Number of variables
n_variables = len(paras.columns)

# Set up the figure for subplots
fig, axes = plt.subplots(nrows=1, ncols=n_variables, figsize=(3.*n_variables, 2), sharey=True)

# Ensure axes is iterable in case of a single subplot
axes = np.atleast_1d(axes)

# Iterate over each variable to create its plot
for i, var_name in enumerate(paras.columns):
    # Assuming uniform prior, specify the actual bounds of your uniform prior for each variable here
    # Example bounds for demonstration; replace with your actual prior bounds
    uniform_min, uniform_max = np.min(paras[var_name]), np.max(paras[var_name])
    
    # Filling the area for uniform prior
    uniform_height = 1 / (uniform_max - uniform_min)
    axes[i].fill_betweenx([0, uniform_height], uniform_min, uniform_max, alpha=0.3, color='C0', label='Prior')

    # Plotting posterior KDE
    sns.kdeplot(posterior_df[var_name], ax=axes[i], fill=True, alpha=0.5,bw_adjust=1.5,label='Posterior', color='C1')
    if i >0:
        axes[i].set_ylabel('')
    axes[i].set_xlabel('')
    axes[i]
    # Set x-axis limits to match the prior bounds
    axes[i].set_xlim(uniform_min, uniform_max)
    axes[i].vlines(x = paras.iloc[synthetic_idx,i],ymin = 0,ymax = 3, color = 'grey',linestyle = '--',label = 'True')
    #axes[i].set_title(var_name)
    axes[i].set_title(xlabel[i])
axes[i].legend()

# Adjust layout for better visual
plt.tight_layout()

plt.show()
