This notebook provides an example of how a user might interact with the current model evaluation functions. Here we show a start-to-finish workflow of requesting observations, ParFlow outputs, and then creating some plots. These functions run under the assumption that a ParFlow run has already been completed and has outputs available.

In [1]:
import pandas as pd
import subsettools

from model_evaluation import get_observations, get_parflow_output, calculate_metrics, explore_available_observations
from plots import plot_obs_locations, plot_time_series, plot_compare_scatter, plot_metric_map

In [2]:
# Define mask
# Note that this workflow requires a mask and its ij_bounds on either the CONUS1 or CONUS2 grid. This is an
# example of defining such a mask and bounds using subsettools and a HUC. 
grid = "conus2"
huc_list = ["02040106"]
ij_bounds, mask = subsettools.define_huc_domain(huc_list, grid)


# Define other inputs to workflow
start_date = "2003-04-01 01:00:00"
end_date = "2003-04-30 00:00:00"
temporal_resolution = "hourly"

parflow_runname="02040106"
parflow_output_dir=f"/scratch/network/hydrogen_collaboration/example_parflow_runs/02040106/outputs/{parflow_runname}_conus2_2003WY"

variable_list = ["streamflow"]

In [3]:
# Example workflow
# Note: We could wrap this whole code cell in a function evaluate_parflow() that takes
# in the entries from the above cell as inputs and does the rest behind-the-scenes

# Evaluate
for variable in variable_list:

    # Get observation data for sites in domain
    obs_metadata_df, obs_data_df = get_observations(mask, ij_bounds, grid, start_date, end_date,
                                                    variable, temporal_resolution)

    # Get ParFlow outputs matching site locations
    parflow_data_df = get_parflow_output(obs_metadata_df, parflow_output_dir, parflow_runname, 
                                         start_date, end_date, variable, temporal_resolution)

    # Calculate metrics comparing ParFlow vs. observations
    metrics_df = calculate_metrics(obs_data_df, parflow_data_df, obs_metadata_df,
                                   write_csv=True, csv_path=f"{variable}_metrics.csv")

    ########################
    # Produce and save plots
    ########################
    
    # Time series plots: ParFlow vs. observations
    plot_time_series(obs_data_df, parflow_data_df, obs_metadata_df, variable,
                     output_dir=f"plots/{variable}/time_series")

    # Scatter plot for all sites comparing values
    plot_compare_scatter(obs_data_df, parflow_data_df, variable,
                         log_scale=False, output_dir=f"plots/{variable}")

    # Map color-coded by site value for a given metric
    plot_metric_map(mask, metrics_df, variable,
                    metrics_list=["rmse", "spearman_rho"], 
                    output_dir=f"plots/{variable}")

In [None]:
# Auxillary function to see what observations are available across mulitple site types
obs_available_metadata_df = explore_available_observations(mask, ij_bounds, grid, date_start=start_date, date_end=end_date)

plot_obs_locations(obs_available_metadata_df, mask, file_path="plots/observation_site_locations.png")