In [None]:
%run "0a_Workspace_setup.ipynb"

from NHM_helpers.NHM_helpers import (
    hrus_by_poi,
    hrus_by_seg,
    subset_stream_network,
    create_poi_group,
)
from NHM_helpers.map_template import *
from NHM_helpers.NHM_Assist_utilities import make_plots_par_vals, make_HW_cal_level_files

from NHM_helpers.NHM_output_visualization import (
    retrieve_hru_output_info,
    create_sum_var_dataarrays,
    create_mean_var_dataarrays,
    create_sum_var_annual_gdf,
    create_sum_var_annual_df,
    create_sum_var_monthly_df,
    create_var_daily_df,
    create_var_ts_for_poi_basin_df,
)
from NHM_helpers.output_plots import *

from ast import literal_eval

poi_id_sel = None

crs = 4326

In [None]:
import matplotlib as mplib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import folium
from folium import Choropleth, Circle, Marker

from folium import plugins
from shapely.geometry import Polygon
from folium.features import DivIcon
from folium.plugins import MarkerCluster
from shapely.geometry import Polygon

from folium.plugins import MarkerCluster


from tobler.dasymetric import masked_area_interpolate
from tobler.model import glm
from tobler.area_weighted import area_interpolate

from libpysal.examples import load_example

import hydroeval as he

# import hyswap
# from hyswap.percentiles import calculate_variable_percentile_thresholds_by_day
# from hyswap.cumulative import calculate_daily_cumulative_values
import calendar
import statistics
from sklearn.metrics import r2_score

In [None]:
def stats_table(stats_df):

    evaluations = stats_df.discharge
    std_evaluations = statistics.stdev(evaluations)

    simulations = stats_df.seg_outflow

    rmse = np.round(he.evaluator(he.rmse, simulations, evaluations), 2)
    nse = np.round(he.evaluator(he.nse, simulations, evaluations), 2)
    pbias = np.round(he.evaluator(he.pbias, simulations, evaluations), 2)
    kge, r, alpha, beta = np.round(he.evaluator(he.kge, simulations, evaluations), 2)

    rsr = np.round(rmse / std_evaluations, 2)
    r_sq = np.round(np.array([r2_score(simulations, evaluations)]), 2)

    stat_dict = {
        "KGE": kge[0],
        "NSE": nse[0],
        "Pbias": pbias[0],
        "RMSE": rmse[0],
        "R^2": r_sq[0],
        "R": r[0],
        "Alpha": alpha[0],
        "Beta": beta[0],
        "RSR": rsr[0],
    }

    df = pd.DataFrame(stat_dict, index=[0])

    return df

In [None]:
# Create dataframes for hydrofabric elements
hru_gdf = create_hru_gdf(
    NHM_dir,
    model_dir,
    GIS_format,
    param_filename,
    nhru_params,
    nhru_nmonths_params,
)

seg_gdf = create_segment_gdf(
    model_dir,
    GIS_format,
    param_filename,
)

nwis_gages_aoi = fetch_nwis_gage_info(
    model_dir,
    control_file_name,
    nwis_gage_nobs_min,
    hru_gdf,
)

poi_df = create_poi_df(
    model_dir,
    param_filename,
    control_file_name,
    hru_gdf,
    nwis_gages_aoi,
    gages_file,
)

default_gages_file = create_default_gages_file(
    model_dir,
    nwis_gages_aoi,
    poi_df,
)

gages_df = read_gages_file(
    model_dir,
    poi_df,
    nwis_gages_file,
    gages_file,
)

#### NHM Calibration Levels for HRUs: (those hrus calibrated in byHW and byHWobs parts)
HW basins were descritized using a drainage area maxiumum and minimum; HW HRUs, segments, outlet segment, and drainage area are available. Gages used in byHWobs calibration, Part 3, for selected headwaters are also provided here.  FILES AND TABLES IN THIS SECTION ARE CONUS COVERAGE and will be subsetted later.

In [None]:
HW_basins_gdf, HW_basins = make_HW_cal_level_files(hru_gdf)

#### Identify the NHM poi gages that were used in claibration byHWobs

In [None]:
# This reads in the csv file that hase the gages used to calibrate the byHWobs part for CONUS.
# Read in station file columns needed (You may need to tailor this to the particular file.
col_names = [
    "poi_id",
    #'poi_name',
    "latitude",
    "longitude",
    #'drainage_area',
    #'drainage_area_contrib'
]
col_types = [
    np.str_,
    # np.str_,
    float,
    float,
    # float,
    # float
]
cols = dict(
    zip(col_names, col_types)
)  # Creates a dictionary of column header and datatype called below.

byHWobs_poi_df = pd.read_csv(
    r"data_dependencies/NHM_v1_1/nhm_v1_1_byhwobs_cal_gages.csv", sep="\t", dtype=cols
).fillna(0)

# byHWobs_poi_df = pd.read_csv(f'{NHM_dir}/nhm_v11_hwobs_pois.csv', sep='\t').fillna(0)
# byHWobs_poi_df['poi_id'] = byHWobs_poi_df.poi_id.astype('str') # makes sure that this is a string,
# must have the leading zeros; suggest a more formal read and set like used in prev notebook.

# Identify the byHWobs calibration gages in our current poi database (ammended in the model prams file to include more gages)
poi_df["nhm_calib"] = "N"
poi_df.loc[poi_df["poi_id"].isin(byHWobs_poi_df["poi_id"]), "nhm_calib"] = "Y"
# poi_df.head()

### Retrieve pywatershed output file information.
> explain

In [None]:
plot_start_date, plot_end_date, year_list, output_var_list = retrieve_hru_output_info(
    out_dir,
    water_years,
)

#### Compute KGE for all gages to color the icon on the map

In [None]:
# Read in simulated flows and write daily ts array and for resample: monthly and annual
output_var_sel = "seg_outflow"
with xr.load_dataarray(out_dir / f"{output_var_sel}.nc") as da:
    # these machinations are to keep downstream things as they were before some refactoring
    # da = da.to_dataset().rename_dims({"nhm_seg": "nhru"})[da.name]
    sf_units = da.units
    da = da.swap_dims(nhm_seg="npoi_gages")
    output_var = da
    output_var_daily = da.sel(time=slice(plot_start_date, plot_end_date))
    output_var_monthly = output_var_daily.resample(time="m").sum()
    # Water year annual
    output_var_annual = output_var_daily.resample(time="A-SEP").sum()

In [None]:
# Read in observed flows
# Note that the model start and stop times in the control file should be the same as the observation start and stop times.
sf_filename = model_dir / "notebook_output_files" / "nc_files" / "sf_efc.nc"

with xr.open_dataset(sf_filename) as obs_data:
    # Make a station name dataframe and station id list from the streamflow file .nc (created in previous notebook)
    # station_name_df = getattr(
    #     obs_data, "poi_name"
    # ).to_dataframe()  # supporting df for plot labeling
    station_name_df = obs_data["poi_name"].to_dataframe()
    station_id_list = station_name_df.index.to_list()  # supporting list for processing

    # Resample daily timeseries arrays: monthly and annual
    obs_0 = obs_data.sel(
        time=slice(plot_start_date, plot_end_date)
    ).transpose()  # load_dataset will open, read into memory and close the .nc file
    obs_efc = obs_0["efc"]  # getattr(obs_0, "efc")
    obs = obs_0["discharge"]  # getattr(obs_0, "discharge")
    obs_monthly = obs.resample(time="m").mean()
    obs_annual = obs.resample(time="A-SEP").mean()

    del obs_data

In [None]:
# OLD WAY
# poi_df["kge"] = np.nan
# for idx, row in poi_df.iterrows():
#     poi_tag = row["poi_id"]
#     df_sf_data_sel = (obs.sel(poi_id=poi_tag)).to_dataframe()

#     # Determine por
#     por_start = df_sf_data_sel["discharge"].notna().idxmax()  # First Day
#     por_end = df_sf_data_sel["discharge"].notna()[::-1].idxmax()  # Last Day

#     # Slice to por
#     df_sf_data_sel = (
#         obs.sel(poi_id=poi_tag, time=slice(por_start, por_end))
#     ).to_dataframe()
#     df_sf_data_sel.drop(columns=["poi_id"], inplace=True)  # drop unwanted columns

#     sim_flow = (
#         output_var.sel(npoi_gages=poi_tag, time=slice(por_start, por_end))
#     ).to_dataframe()
#     sim_flow.drop(columns=["npoi_gages"], inplace=True)  # drop unwanted columns

#     # drop the Nan's from the obs for memory/stats (may want to check back on this later)
#     daily_stat_df = (
#         df_sf_data_sel.merge(sim_flow, right_index=True, left_index=True, how="inner")
#     ).dropna()
#     month_stat_df = daily_stat_df.resample("m").mean().dropna()

In [None]:
for i in poi_df.poi_id:
    # print(obs.sel(poi_id=poi_tag))
    df_sf_data_sel_temp = obs.sel(poi_id=i)
    df_sf_data_sel = df_sf_data_sel_temp.to_dataframe()
    # Determine por
    por_start = df_sf_data_sel["discharge"].notna().idxmax()  # First Day
    por_end = df_sf_data_sel["discharge"].notna()[::-1].idxmax()  # Last Day

    # Slice to por
    df_sf_data_sel = (obs.sel(poi_id=i, time=slice(por_start, por_end))).to_dataframe()
    df_sf_data_sel.drop(columns=["poi_id"], inplace=True)  # drop unwanted columns

    sim_flow = (
        output_var.sel(npoi_gages=i, time=slice(por_start, por_end))
    ).to_dataframe()
    sim_flow.drop(columns=["npoi_gages"], inplace=True)  # drop unwanted columns

    # drop the Nan's from the obs for memory/stats (may want to check back on this later)
    daily_stat_df = (
        df_sf_data_sel.merge(sim_flow, right_index=True, left_index=True, how="inner")
    ).dropna()
    month_stat_df = daily_stat_df.resample("m").mean().dropna()

    kge_func = np.round(
        he.evaluator(
            he.kge,
            month_stat_df["seg_outflow"],  # simulation data set
            month_stat_df["discharge"],  # observation data set
        ),
        2,  # decimal places for the round() function
    )[
        0
    ]  # this grabs only the kge var, in position"0" from the list of ke.kge() output vars

    poi_df.loc[poi_df.poi_id == i, "kge"] = np.array(
        kge_func[0]
    )  # pandas wrangling of the array output from he.evaluator() as an array

In [None]:
# # Calculate kge and add value to poi_df

# poi_df["kge"] = np.nan
# for idx, row in poi_df.iterrows():
#     poi_tag = row["poi_id"]

#     # print(obs.sel(poi_id=poi_tag))
#     df_sf_data_sel_temp = obs.sel(poi_id=poi_tag)
#     df_sf_data_sel = df_sf_data_sel_temp.to_dataframe()


#     # Determine por
#     por_start = df_sf_data_sel["discharge"].notna().idxmax()  # First Day
#     por_end = df_sf_data_sel["discharge"].notna()[::-1].idxmax()  # Last Day

#     # Slice to por
#     df_sf_data_sel = (
#         obs.sel(poi_id=poi_tag, time=slice(por_start, por_end))
#     ).to_dataframe()
#     df_sf_data_sel.drop(columns=["poi_id"], inplace=True)  # drop unwanted columns

#     sim_flow = (
#         output_var.sel(npoi_gages=poi_tag, time=slice(por_start, por_end))
#     ).to_dataframe()
#     sim_flow.drop(columns=["npoi_gages"], inplace=True)  # drop unwanted columns

#     # drop the Nan's from the obs for memory/stats (may want to check back on this later)
#     daily_stat_df = (
#         df_sf_data_sel.merge(sim_flow, right_index=True, left_index=True, how="inner")
#     ).dropna()
#     month_stat_df = daily_stat_df.resample("m").mean().dropna()

#     # kge_func  = np.round(he.evaluator(he.kge,
#     #                                   daily_stat_df['seg_outflow'],# simulation data set
#     #                                   daily_stat_df['discharge'],# observation data set
#     #                                  ), 2# decimal places for the round() function
#     #                     )[0]#this grabs only the kge var, in position"0" from the list of ke.kge() output vars

#     # poi_df.loc[idx, 'kge'] = np.array(kge_func[0])# pandas wrangling of the array output from he.evaluator() as an array

#     kge_func = np.round(
#         he.evaluator(
#             he.kge,
#             month_stat_df["seg_outflow"],  # simulation data set
#             month_stat_df["discharge"],  # observation data set
#         ),
#         2,  # decimal places for the round() function
#     )[
#         0
#     ]  # this grabs only the kge var, in position"0" from the list of ke.kge() output vars

#     poi_df.loc[idx, "kge"] = np.array(
#         kge_func[0]
#     )  # pandas wrangling of the array output from he.evaluator() as an array

# Create an interactive map to evaluate streamflow at poi_gages

### Read mapping elements

In [None]:
from IPython.display import display
from folium.plugins import MeasureControl
from folium.utilities import Element
from folium.plugins import FloatImage
import base64

# Load standard map settings
pfile_lat, pfile_lon, zoom, cluster_zoom = folium_map_elements(
    hru_gdf, poi_df, poi_id_sel
)

USGSHydroCached_layer, USGStopo_layer, Esri_WorldImagery, OpenTopoMap = (
    folium_map_tiles()
)

minimap = create_minimap()

# Clear map if previously created
m = folium.Map()

# Create map
m = folium.Map(
    location=[pfile_lat, pfile_lon],
    # width=1000, height=600,
    tiles=USGSHydroCached_layer,
    zoom_start=zoom,
    control_scale=True,
)

# Add base map layers
USGStopo_layer.add_to(m)
OpenTopoMap.add_to(m)
Esri_WorldImagery.add_to(m)

# Add widgets
m.add_child(minimap)
m.add_child(MeasureControl(position="bottomright"))


################################################
# Create and add hru map
# hru_gdf_map = HW_basins_gdf.to_crs(crs)
hru_map = folium.GeoJson(
    HW_basins_gdf,  # hru_gdf_map,
    style_function=cal_style_function,
    # highlight_function = highlight_function_hru_map,
    name="NHM HRUs",
    z_index_offset=40002,
).add_to(m)

tooltip_hru = folium.GeoJsonPopup(
    fields=["hw_id"], aliases=["Headwater id"], labels=True
)

# Add tool tip to map
hru_map.add_child(tooltip_hru)

# Create/Add segment map
seg_map = create_segment_map_hide(seg_gdf)
seg_map.add_to(m)

# add POI marker clusters (marker and label)
# byHRU_Group = folium.FeatureGroup(name='HRUs calibrated by HRU -- brown')
marker_cluster = folium.FeatureGroup(
    name="All the POIs",
    overlay=True,
    control=True,
    icon_create_function=None,
    z_index_offset=5000,
)
marker_cluster_label_poi = folium.FeatureGroup(
    name="All the POI labels",
    overlay=True,
    control=True,
    show=False,  # False will not draw the child upon opening the map, but have it to draw in the Layer control.
    icon_create_function=None,
    z_index_offset=4004,
)

### Move below into poi marker function(s)
for idx, row in poi_df.iterrows():
    poi_id = row["poi_id"]
    var_plot_file = Folium_maps_dir / f"{output_var_sel}_{poi_id}.txt"

    if row["nhm_calib"] == "Y":  # Do this for all the gages used in calibration
        if row["kge"] >= 0.7:

            marker = folium.CircleMarker(
                location=[row["latitude"], row["longitude"]],
                name=row["poi_id"],
                popup=folium.Popup(
                    f'Gage <b>{row["poi_id"]}</b>, {row["poi_name"]}<br>',
                    max_width=150,
                    max_height=70,
                ),
                radius=5,
                weight=2,
                color="Black",
                fill=True,
                fill_color="Green",
                fill_opacity=1.0,
                draggable=True,
                lazy=True,
                z_index_offset=4006,
            ).add_to(marker_cluster)

            text = f'{row["poi_id"]}'
            label_lat = row["latitude"]  # -0.005
            label_lon = row["longitude"]

            marker_label = folium.map.Marker(
                [label_lat, label_lon],
                z_index_offset=4007,
                icon=DivIcon(
                    icon_size=(150, 36),
                    icon_anchor=(0, 0),
                    html='<div style="font-size: 12pt; font-weight: bold">%s</div>'
                    % text,
                ),
            ).add_to(marker_cluster_label_poi)
        if (row["kge"] < 0.7) & (row["kge"] >= 0.5):

            marker = folium.CircleMarker(
                location=[row["latitude"], row["longitude"]],
                name=row["poi_id"],
                popup=folium.Popup(
                    f'Gage <b>{row["poi_id"]}</b>, {row["poi_name"]}<br>',
                    max_width=150,
                    max_height=70,
                ),
                radius=5,
                weight=2,
                color="Black",
                fill=True,
                fill_color="Yellow",
                fill_opacity=1.0,
                draggable=True,
                lazy=True,
                z_index_offset=4006,
            ).add_to(marker_cluster)

            # marker_cluster.add_child(marker)
            text = f'{row["poi_id"]}'
            label_lat = row["latitude"]  # -0.005
            label_lon = row["longitude"]

            marker_label = folium.map.Marker(
                [label_lat, label_lon],
                z_index_offset=4007,
                icon=DivIcon(
                    icon_size=(150, 36),
                    icon_anchor=(0, 0),
                    html='<div style="font-size: 12pt; font-weight: bold">%s</div>'
                    % text,
                ),
            ).add_to(marker_cluster_label_poi)
        if row["kge"] < 0.5:

            marker = folium.CircleMarker(
                location=[row["latitude"], row["longitude"]],
                name=row["poi_id"],
                popup=folium.Popup(
                    f'Gage <b>{row["poi_id"]}</b>, {row["poi_name"]}<br>',
                    max_width=150,
                    max_height=70,
                ),
                radius=5,
                weight=2,
                color="Black",
                fill=True,
                fill_color="Red",
                fill_opacity=1.0,
                draggable=True,
                lazy=True,
                z_index_offset=4006,
            ).add_to(marker_cluster)

            # marker_cluster.add_child(marker)
            text = f'{row["poi_id"]}'
            label_lat = row["latitude"]  # -0.005
            label_lon = row["longitude"]

            marker_label = folium.map.Marker(
                [label_lat, label_lon],
                z_index_offset=4007,
                icon=DivIcon(
                    icon_size=(150, 36),
                    icon_anchor=(0, 0),
                    html='<div style="font-size: 12pt; font-weight: bold">%s</div>'
                    % text,
                ),
            ).add_to(marker_cluster_label_poi)
    ################################################

    ###########
    if row["nhm_calib"] == "N":
        if row["kge"] >= 0.7:

            marker = folium.CircleMarker(
                location=[row["latitude"], row["longitude"]],
                name=row["poi_id"],
                popup=folium.Popup(
                    f'Gage <b>{row["poi_id"]}</b>, {row["poi_name"]}<br>',
                    max_width=150,
                    max_height=70,
                ),
                radius=5,
                weight=2,
                color=None,
                fill=True,
                fill_color="Green",
                fill_opacity=1.0,
                draggable=True,
                lazy=True,
                z_index_offset=4006,
            ).add_to(marker_cluster)

            # marker_cluster.add_child(marker)
            text = f'{row["poi_id"]}'
            label_lat = row["latitude"]  # -0.005
            label_lon = row["longitude"]

            marker_label = folium.map.Marker(
                [label_lat, label_lon],
                z_index_offset=4007,
                icon=DivIcon(
                    icon_size=(150, 36),
                    icon_anchor=(0, 0),
                    html='<div style="font-size: 12pt; font-weight: bold">%s</div>'
                    % text,
                ),
            ).add_to(marker_cluster_label_poi)
        if (row["kge"] < 0.7) & (row["kge"] >= 0.5):

            marker = folium.CircleMarker(
                location=[row["latitude"], row["longitude"]],
                name=row["poi_id"],
                popup=folium.Popup(
                    f'Gage <b>{row["poi_id"]}</b>, {row["poi_name"]}<br>',
                    max_width=150,
                    max_height=70,
                ),
                radius=5,
                weight=2,
                color=None,
                fill=True,
                fill_color="Yellow",
                fill_opacity=1.0,
                draggable=True,
                lazy=True,
                z_index_offset=4006,
            ).add_to(marker_cluster)

            # marker_cluster.add_child(marker)
            text = f'{row["poi_id"]}'
            label_lat = row["latitude"]  # -0.005
            label_lon = row["longitude"]

            marker_label = folium.map.Marker(
                [label_lat, label_lon],
                z_index_offset=4007,
                icon=DivIcon(
                    icon_size=(150, 36),
                    icon_anchor=(0, 0),
                    html='<div style="font-size: 12pt; font-weight: bold">%s</div>'
                    % text,
                ),
            ).add_to(marker_cluster_label_poi)
        if row["kge"] < 0.5:

            marker = folium.CircleMarker(
                location=[row["latitude"], row["longitude"]],
                name=row["poi_id"],
                popup=folium.Popup(
                    f'Gage <b>{row["poi_id"]}</b>, {row["poi_name"]}<br>',
                    max_width=150,
                    max_height=70,
                ),
                radius=5,
                weight=2,
                color=None,
                fill=True,
                fill_color="Red",
                fill_opacity=1.0,
                draggable=True,
                lazy=True,
                z_index_offset=4006,
            ).add_to(marker_cluster)

            # marker_cluster.add_child(marker)
            text = f'{row["poi_id"]}'
            label_lat = row["latitude"]  # -0.005
            label_lon = row["longitude"]

            marker_label = folium.map.Marker(
                [label_lat, label_lon],
                z_index_offset=4007,
                icon=DivIcon(
                    icon_size=(150, 36),
                    icon_anchor=(0, 0),
                    html='<div style="font-size: 12pt; font-weight: bold">%s</div>'
                    % text,
                ),
            ).add_to(marker_cluster_label_poi)
        if np.isnan(row["kge"]):

            marker = folium.CircleMarker(
                location=[row["latitude"], row["longitude"]],
                name=row["poi_id"],
                popup=folium.Popup(
                    f'Gage <b>{row["poi_id"]}</b>, {row["poi_name"]}<br> Gage has less than 2yrs of observations.',
                    max_width=150,
                    max_height=70,
                ),
                radius=2,
                weight=2,
                color="Black",
                fill=True,
                fill_color="Black",
                fill_opacity=1.0,
                draggable=True,
                lazy=True,
                z_index_offset=4006,
            ).add_to(marker_cluster)

            # marker_cluster.add_child(marker)
            text = f'{row["poi_id"]}'
            label_lat = row["latitude"]  # -0.005
            label_lon = row["longitude"]

            marker_label = folium.map.Marker(
                [label_lat, label_lon],
                z_index_offset=4007,
                icon=DivIcon(
                    icon_size=(150, 36),
                    icon_anchor=(0, 0),
                    html='<div style="font-size: 12pt; font-weight: bold">%s</div>'
                    % text,
                ),
            ).add_to(marker_cluster_label_poi)
    # ################################################
# Add hw boundary ref map
hw_basins_map = folium.GeoJson(
    HW_basins, style_function=hw_basin_style, name="HW basin boundary"
).add_to(m)

################################################
marker_cluster.add_to(m)
marker_cluster_label_poi.add_to(m)

plugins.Fullscreen(position="topleft").add_to(m)
folium.LayerControl(collapsed=True, position="bottomright", autoZIndex=True).add_to(m)

################################################
# Print map header
con.print(f"")
con.print(f"")
con.print(f"")
con.print("NHM poi_gages map", style="u bold black")
con.print(
    "Click on a poi and copy the gage id into the field below to view hydrographs and flow statistics.",
    style="bold yellow",
)

m

In [None]:
set(poi_df.nhm_calib.values)

### Paste the poi_id in the field below

In [None]:
poi_id_sel = None
if poi_id_sel is None:
    poi_id_sel = poi_df.poi_id.tolist()[0]

In [None]:
v = widgets.Combobox(
    # value='John',
    placeholder="Enter Gage ID here",
    options=poi_df.poi_id.tolist(),
    description="Plot Gage:",
    ensure_option=True,
    disabled=False,
)


def on_change(change):
    global poi_id_sel, fig
    if change["type"] == "change" and change["name"] == "value":
        poi_id_sel = v.value


v.observe(on_change)

display(v)

In [None]:
# This is for testing only; can comment out in user version
if poi_id_sel is None:
    poi_id_sel = poi_df.poi_id.tolist()[1]

In [None]:
if poi_id_sel == None:
    con.print(
        "Select a gage ID from the dropdown above or copy/paste from the map into the field."
    )
else:

    # Single request
    if len((obs_annual.sel(poi_id=poi_id_sel)).to_dataframe().dropna()) < 2:
        con.print(
            f"The gage {poi_id_sel} has no observation data in the streamflow obs file."
        )
        pass
    else:
        df_sf_data_sel = (obs.sel(poi_id=poi_id_sel)).to_dataframe()

        # Determine por
        por_start = df_sf_data_sel["discharge"].notna().idxmax()  # First Day
        por_end = df_sf_data_sel["discharge"].notna()[::-1].idxmax()  # Last Day

        # Slice to por
        df_sf_data_sel = (
            obs.sel(poi_id=poi_id_sel, time=slice(por_start, por_end))
        ).to_dataframe()
        df_sf_data_sel.drop(columns=["poi_id"], inplace=True)  # drop unwanted columns

        obs_efc_sel = (
            obs_efc.sel(poi_id=poi_id_sel, time=slice(por_start, por_end))
        ).to_dataframe()
        obs_efc_sel.drop(columns=["poi_id"], inplace=True)  # drop unwanted columns
        obs_with_efc_sel = df_sf_data_sel.merge(
            obs_efc_sel, right_index=True, left_index=True, how="inner"
        )  # .dropna() #how='left' will slice ts with obs range

        sim_flow = (
            output_var.sel(npoi_gages=poi_id_sel, time=slice(por_start, por_end))
        ).to_dataframe()
        sim_flow.drop(columns=["npoi_gages"], inplace=True)  # drop unwanted columns

        # Create a dataframe for the NaN's that occur between the beginning and end of por
        daily_efc_df = (
            obs_with_efc_sel.merge(
                sim_flow, right_index=True, left_index=True, how="inner"
            )
        ).dropna()
        daily_efc_plot_df = obs_with_efc_sel.merge(
            sim_flow, right_index=True, left_index=True, how="inner"
        )
        daily = df_sf_data_sel.merge(
            sim_flow, right_index=True, left_index=True, how="inner"
        )
        daily_na = daily[daily["discharge"].isnull()]
        daily_na["discharge"] = 5.0

        # drop the Nan's from the obs for memory/stats (may want to check back on this later)
        daily_stat_df = (
            df_sf_data_sel.merge(
                sim_flow, right_index=True, left_index=True, how="inner"
            )
        ).dropna()
        daily_plot_df = df_sf_data_sel.merge(
            sim_flow, right_index=True, left_index=True, how="inner"
        )  # .dropna()

        # daily_stat_df_na = daily_stat_df[daily_stat_df['discharge'].isnull()]
        # daily_stat_df = daily_stat_df.dropna()

        # .dropna() #how='left' will slice ts with obs range
        # daily_stat_df =streamflows_df.copy()#.dropna()
        month_stat_df = daily_stat_df.resample("m").mean().dropna()
        month_plot_df = daily_plot_df.resample("m").mean()  # .dropna()

        water_year_stat_df = daily_stat_df.resample("A-SEP").mean().dropna()
        water_year_plot_df = daily_plot_df.resample("A-SEP").mean()  # .dropna()

        if len(daily_efc_df) <= 10000:
            n = len(daily_efc_df)
        else:
            n = 10000  # Number of sampled days in records

        ######################################################
        # Make timeseries subplot figure
        fig = plotly.subplots.make_subplots(
            rows=3,
            cols=2,
            column_widths=[0.5, 0.5],  # row_heights=[0., 0.3, 0.3, 0.4],
            shared_xaxes="columns",
            # shared_yaxes = 'columns',
            start_cell="top-left",
            vertical_spacing=0.1,
            horizontal_spacing=0.06,
            # y_title=f"Average daily streamflow, {getattr(model_output, output_var_sel).units}",
            y_title=f"Average daily streamflow, {sf_units}",
            subplot_titles=[
                "Annual mean",
                f"Flow Exceedence Curve, n = {n}",
                "Monthly mean",
                "Daily",
                "Statistics",
            ],
            specs=[
                [{"type": "scatter"}, {"type": "scatter", "rowspan": 2}],
                [{"type": "scatter"}, None],
                [{"type": "scatter"}, {"type": "table"}],
            ],
        )

        station_name = station_name_df.loc[
            station_name_df.index == poi_id_sel, "poi_name"
        ].values[0]
        date_range = f"{daily_stat_df.index.month[0]}-{daily_stat_df.index.day[0]}-{daily_stat_df.index.year[0]} to {daily_plot_df.index.month[-1]}-{daily_plot_df.index.day[-1]}-{daily_plot_df.index.year[-1]} "

        fig.update_layout(
            title_text=f"NHM simulated streamflow at {poi_id_sel},<br>{station_name}, {date_range}",  #
            width=900,
            height=700,
            legend=dict(
                orientation="h", yanchor="bottom", y=-0.15, xanchor="right", x=0.7
            ),
            font=dict(family="Arial", size=14, color="#7f7f7f"),  # font color
            paper_bgcolor="linen",
            plot_bgcolor="white",
        )

        fig.update_layout(
            title_automargin=True,
            title_font_color="black",
            title_font_size=20,
            title_x=0.5,
            title_y=0.945,
            title_xref="container",
            title_xanchor="center",
        )

        fig.update_xaxes(range=[daily_plot_df.index[0], daily_plot_df.index[-1]])
        # fig.update_xaxes(range = [(obs["time"][0].dt.datetime.strftime("%Y-%m-%d").values.tolist()), (obs["time"][-1].dt.datetime.strftime("%Y-%m-%d").values.tolist())])

        # fig.update_layout(legend_grouptitlefont_color='black')
        fig.update_layout(font_color="black")

        # fig.update_yaxes(title_text=f'{output_var_sel}, {getattr(model_output, output_var_sel).units}', title_font_color = 'black')
        # fig.update_xaxes(title_text="Water years, from October 1 to September 31", title_font_color = 'black')

        fig.update_xaxes(ticks="inside", tickwidth=2, tickcolor="black", ticklen=10)
        fig.update_yaxes(ticks="inside", tickwidth=2, tickcolor="black", ticklen=10)

        fig.update_xaxes(
            showline=True, linewidth=2, linecolor="black", gridcolor="lightgrey"
        )
        fig.update_yaxes(
            showline=True, linewidth=2, linecolor="black", gridcolor="lightgrey"
        )

        fig.update_traces(hovertemplate=None)
        fig.update_layout(hovermode="x unified")  # "x unified"
        fig.update_layout(
            hoverlabel=dict(
                bgcolor="linen",
                font_size=13,
                font_family="Rockwell",
            )
        )
        # Useful xarray calls
        # f'{(obs["time"][0].dt.datetime.strftime("%Y-%m-%d").values.tolist())} to {(obs["time"][-1].dt.datetime.strftime("%Y-%m-%d").values.tolist())} '
        # x_values_annual = (output_var_annual["time"].dt.datetime.strftime("%Y-%m-%d").values.tolist())
        # sim_values_annual = (output_var_annual.sel(npoi_gages = poi_id_sel).values.tolist())
        # obs_values = (obs_annual.sel(poi_id = poi_id_sel).values.tolist())

        ######################################################
        # Create annual subplot
        annual_plots = [
            go.Scatter(
                x=water_year_plot_df.index,
                y=water_year_plot_df.discharge,
                mode="lines",
                name="Observed flow, annual",
                showlegend=False,
                # marker=dict(color='brown'),
                # xaxis =
                line=dict(
                    color="deepskyblue",
                    width=4,
                    # dash='dot'
                ),
            ),
            go.Scatter(
                x=water_year_plot_df.index,
                y=water_year_plot_df.seg_outflow,
                mode="lines",
                name="Simulated flow, annual",
                showlegend=False,
                # marker=dict(color='brown'),
                line=dict(
                    color="black",
                    width=1,
                    # dash='dot'
                ),
            ),
        ]
        annual_fig = go.Figure(data=annual_plots)

        ######################################################
        # Create monthly subplot
        monthly_plots = [
            go.Scatter(
                x=month_plot_df.index,
                y=month_plot_df.discharge,
                mode="lines",
                name="Observed flow, monthly",
                showlegend=False,
                # marker=dict(color='brown'),
                # xaxis =
                line=dict(
                    color="deepskyblue",
                    width=4,
                    # dash='dot'
                ),
            ),
            go.Scatter(
                x=month_plot_df.index,
                y=month_plot_df.seg_outflow,
                mode="lines",
                name="Simulated flow, monthly",
                showlegend=False,
                # marker=dict(color='brown'),
                line=dict(
                    color="black",
                    width=1,
                    # dash='dot'
                ),
            ),
        ]
        monthly_fig = go.Figure(data=monthly_plots)

        ######################################################
        # Create daily subplot
        # Make a line set for na values to show no data in the plot.

        # daily_efc_exlow_df = daily_efc_df.loc[daily_efc_df['efc'].isin([5])]
        daily_efc_low_plot_df = daily_efc_plot_df.copy()
        daily_efc_low_plot_df.loc[daily_efc_low_plot_df["efc"] <= 3, "discharge"] = (
            np.nan
        )

        daily_efc_high_plot_df = daily_efc_plot_df.copy()
        daily_efc_high_plot_df.loc[daily_efc_high_plot_df["efc"] >= 4, "discharge"] = (
            np.nan
        )

        daily_plots = [
            go.Scatter(
                x=daily_efc_high_plot_df.index,  # (output_var["time"].dt.datetime.strftime("%Y-%m-%d").values.tolist()),
                y=daily_efc_high_plot_df.discharge,  # (obs.sel(poi_id = poi_id_sel).values.tolist()),
                mode="lines",
                name="Observed flow",
                showlegend=True,
                connectgaps=False,
                # marker=dict(color='deepskyblue', size = 5),
                # xaxis =
                line=dict(
                    color="deepskyblue",
                    width=4,
                    # dash='dot'
                ),
            ),
            go.Scatter(
                x=daily_efc_low_plot_df.index,  # (output_var["time"].dt.datetime.strftime("%Y-%m-%d").values.tolist()),
                y=daily_efc_low_plot_df.discharge,  # (obs.sel(poi_id = poi_id_sel).values.tolist()),
                mode="lines",
                name="Observed flow, (Low)",
                showlegend=True,
                connectgaps=False,
                # marker=dict(color='deepskyblue', size = 5),
                # xaxis =
                line=dict(
                    color="red",
                    width=4,
                    # dash='dot'
                ),
            ),
            go.Scatter(
                x=daily_plot_df.index,  # (output_var["time"].dt.datetime.strftime("%Y-%m-%d").values.tolist()),
                y=daily_plot_df.seg_outflow,  # (output_var.sel(npoi_gages = poi_id_sel).values.tolist()),
                mode="lines",
                name="Simulated flow, daily",
                showlegend=False,
                # marker=dict(color='black', size = 3),
                line=dict(
                    color="black",
                    width=1,
                    # dash='dot'
                ),
            ),
        ]
        #######################################################
        # EFC classifications
        # 1 = Large floods
        # 2 = Small floods
        # 3 = High flow pulses
        # 4 = Low flows
        # 5 = Extreme low flows

        daily_df = stats_table(daily_stat_df)
        daily_df["time"] = "daily"
        monthly_df = stats_table(month_stat_df)
        monthly_df["time"] = "monthly"
        annual_df = stats_table(water_year_stat_df)
        annual_df["time"] = "annual"

        # daily_efc_exlow_df = daily_efc_df.loc[daily_efc_df['efc'].isin([5])]
        daily_efc_low_df = daily_efc_df.loc[daily_efc_df["efc"].isin([4, 5])]
        daily_efc_high_df = daily_efc_df.loc[daily_efc_df["efc"].isin([1, 2, 3])]

        # daily_exlow_tab_df = stats_table(daily_efc_exlow_df)
        # daily_exlow_tab_df['time'] = 'exlow'
        # daily_exlow_tab_df[['NSE','KGE']] = np.nan

        daily_low_tab_df = stats_table(daily_efc_low_df)
        daily_low_tab_df["time"] = "low"
        daily_low_tab_df[["NSE", "KGE"]] = np.nan

        daily_high_tab_df = stats_table(daily_efc_high_df)
        daily_high_tab_df["time"] = "high"
        daily_high_tab_df[["NSE", "KGE"]] = np.nan

        all_df = pd.concat(
            [
                daily_df,
                daily_low_tab_df,
                daily_high_tab_df,
                monthly_df,
                annual_df,
            ]
        )
        all_df.set_index("time", inplace=True)
        stats_table_df = all_df.T
        # stats_table_df

        stats_table_obj = go.Figure(
            data=[
                go.Table(
                    header=dict(
                        values=[
                            "Statistic",
                            "Daily",
                            "Low",
                            "High",
                            "Monthly",
                            "Annual",
                        ]
                    ),
                    cells=dict(
                        values=[
                            stats_table_df.index,
                            stats_table_df.daily,
                            stats_table_df.low,
                            stats_table_df.high,
                            stats_table_df.monthly,
                            stats_table_df.annual,
                        ]
                    ),
                )
            ]
        )

        #######################################################

        obs_data = daily_efc_df.discharge.sample(
            n=n, replace=False, random_state=3  # frac=0.25,
        )
        sim_data = daily_efc_df.seg_outflow.sample(
            n=n, replace=False, random_state=3  # frac=0.25,
        )

        obs_sort = np.sort(obs_data)[::-1]
        sim_sort = np.sort(sim_data)[::-1]
        obs_color_sort = daily_efc_df.sort_values("discharge")[
            ::-1
        ]  # Makes the color value sort in same order for use in plot.

        obs_exceedence = np.arange(1.0, len(obs_sort) + 1) / len(obs_sort)
        sim_exceedence = np.arange(1.0, len(sim_sort) + 1) / len(sim_sort)

        efc_colors = {
            1: "rgba(0, 191, 255, 0.5)",  # Large Floods
            0: "white",
            2: "rgba(0, 191, 255, 0.5)",  # Small Floods
            3: "rgba(0, 191, 255, 0.5)",  # High Flow Pulse
            4: "rgba(255, 0, 0, 0.5)",  # Low
            5: "rgba(255, 0, 0, 0.5)",  # Extreemly Low
            np.nan: "yellow",
        }  # missing
        # or ...color_discrete_sequence = plotly.colors.sequential.Viridis

        custom_marker_color = obs_color_sort["efc"].map(efc_colors)

        exceed_plot = [
            go.Scatter(
                x=obs_exceedence,
                y=obs_sort,
                mode="markers",
                name="Observed flow",
                marker=dict(color=custom_marker_color, size=3),
                showlegend=False,
                # line = dict(color='deepskyblue',
                #    width=3,
                # dash='dot'
                # )
            ),
            go.Scatter(
                x=sim_exceedence,
                y=sim_sort,
                mode="lines",
                name="NHM simulated flow",
                showlegend=False,
                # marker=dict(#color='brown',
                #            size=1),
                line=dict(
                    color="black",
                    width=1,
                    # dash='dot'
                ),
            ),
        ]

        exceed_fig = go.Figure(data=exceed_plot)

        # fig.update_yaxes(title_text=f'Streamflow, {getattr(model_output, "seg_outflow").units}', title_font_color = 'black', row=1, col=3)
        # fig.update_xaxes(title_text="Exceedence, probability", title_font_color = 'black', row=1, col=3)

        fig.update_yaxes(type="log", col=2)

        tickvals = [
            0,
            1,
            2,
            5,
            10,
            20,
            50,
            100,
            200,
            500,
            1000,
            2000,
            5000,
            10000,
            20000,
            50000,
            100000,
            200000,
            500000,
            1000000,
        ]

        tickvals_exceed = [0, 0.25, 0.5, 0.75, 1]

        fig.update_xaxes(
            tickvals=tickvals_exceed,
            ticks="inside",
            tickwidth=2,
            tickcolor="black",
            showticklabels=True,
            ticklen=10,
            col=2,
        )
        fig.update_yaxes(
            tickvals=tickvals,
            ticks="inside",
            tickwidth=2,
            tickcolor="black",
            ticklen=10,
            col=2,
        )

        fig.update_xaxes(
            showline=True,
            linewidth=2,
            linecolor="black",
            gridcolor="lightgrey",
            range=[-0.1, 1.1],
            col=2,
        )
        fig.update_yaxes(
            showline=True, linewidth=2, linecolor="black", gridcolor="lightgrey", col=2
        )

        #######################################################
        # Add plots and stats tables to figure
        daily_fig = go.Figure(data=daily_plots)

        for t in annual_fig.data:
            fig.append_trace(t, row=1, col=1)
        for t in monthly_fig.data:
            fig.append_trace(t, row=2, col=1)
        for t in daily_fig.data:
            fig.append_trace(t, row=3, col=1)
        for t in exceed_fig.data:
            fig.append_trace(t, row=1, col=2)
        for t in stats_table_obj.data:
            fig.append_trace(t, row=3, col=2)

        # # Creating the html code for the plotly plot
        # text_div = plotly.offline.plot(fig, include_plotlyjs=False, output_type="div")

        # # Saving the plot as txt file with the html code
        # # idx = 1
        # with open(Folium_maps_dir / f"streamflow_{poi_id_sel}.txt", "w") as f:
        #     f.write(text_div)
        fig.write_html(Folium_maps_dir / f"streamflow_{poi_id_sel}.html")
        fig.show()

In [None]:
# Add D scores into here (sydney and Tim); maybe use the differenct components of the EFC rating