In [None]:
%run "0a_Workspace_setup.ipynb"

## 1. Read NHM Subbasin HRU and Segment geodatabases.
This reads three layers (<b>nhru, amd nsegments</b>) into GeoPandas as DataFrames (_df) and if geometry is included (_gdb).
<b>Note:</b> Layer npoigages includes the poi gages that were included in the model and are limited. Since pois will be added to the model paramter file, we provide another method of for retrieving poi metadata, such as latitude (lat) and longitude (lon), for pois listed in the parameter file that uses NWIS and a supplimental gage ref table for gages that do not occur in NWIS. Locations may NOT be located exactly on the NHM segment. The POIs' assigned segment is displayed in the popup window when the gage icon is clicked.

In [None]:
def create_hru_gdf(GIS_format, 
                   model_dir,
                   nhru_params,
                   nhru_nmonths_params,
                   ):
    """
    Creates hru gdf for selected hru parameters from the parameter file.
    Selected in notebook 0a.
    """
    
    # List of bynhru parameters to retrieve for the Notebook interactive maps.
    hru_params = [
        "hru_lat",  # the latitude if the hru centroid
        "hru_lon",  # the longitude if the hru centroid
        "hru_area",
        "hru_segment_nhm",  # The nhm_id of the segment recieving flow from the HRU
    ]
    cal_hru_params = nhru_params + nhru_nmonths_params
    gdb_hru_params = hru_params + nhru_params + nhru_nmonths_params
    
    """
    Projections are ascribed geometry from the HRUs geodatabase (GIS). 
    The NHM uses the NAD 1983 USGS Contiguous USA Albers projection EPSG# 102039. 
    The geometry units of this projection are not useful for many notebook packages. 
    The geodatabases are reprojected to World Geodetic System 1984.

    Options:
        crs = 3857, WGS 84 / Pseudo-Mercator - Spherical Mercator, Google Maps, OpenStreetMap, Bing, ArcGIS, ESRI.
        *crs = 4326, WGS 84 - WGS84 - World Geodetic System 1984, used in GPS
    """
    crs = 4326
    
    if GIS_format == ".gpkg":
        hru_gdb = gpd.read_file(
            f"{model_dir}/GIS/model_layers.gpkg", layer="nhru"
        )  # Reads HRU file to Geopandas.
    
    if GIS_format == ".shp":
        hru_gdb = gpd.read_file(
            f"{model_dir}/GIS/model_nhru.shp"
        )  # Reads HRU file to Geopandas.
        hru_gdb = hru_gdb.set_index("nhm_id", drop=False).fillna(
            0
        )  # Set an index for HRU geodatabase.
        hru_gdb.index.name = "index"  # Index column must be renamed of the hru
    
    hru_gdb = hru_gdb.to_crs(crs)  # reprojects to the defined crs projection
    
    # Create a dataframe for parameter values
    first = True
    for vv in gdb_hru_params:
        if (
            first
        ):  # this creates the first iteration for the following iterations to concantonate to
            df = pdb.get_dataframe(vv)
            first = False
        else:
            df = pd.concat([df, pdb.get_dataframe(vv)], axis=1)  # , ignore_index=True)
    
    df.reset_index(inplace=True)
    df["model_idx"] = (
        df.index + 1
    )  #'model_idx' created here is the order of the parameters in the parameter file.
    # df
    
    # Join the HRU params values to the HRU geodatabase using Merge
    hru_gdb = pd.merge(df, hru_gdb, on="nhm_id")
    
    # Create a Goepandas GeoDataFrame for the HRU geodatabase
    hru_gdf = gpd.GeoDataFrame(
        hru_gdb, geometry="geometry"
    )
    
    return hru_gdf

In [None]:
def create_segment_gdf(GIS_format, 
                       model_dir,
                      ):

    """
    Creates segment gdf for selected segment parameters from the parameter file.
    Selected in notebook 0a.
    """
    
    # List of parameters values to retrieve for the segments.
    seg_params = ["tosegment_nhm", "tosegment", "seg_length", "obsin_segment"]
    
    """
    Projections are ascribed geometry from the HRUs geodatabase (GIS). 
    The NHM uses the NAD 1983 USGS Contiguous USA Albers projection EPSG# 102039. 
    The geometry units of this projection are not useful for many notebook packages. 
    The geodatabases are reprojected to World Geodetic System 1984.

    Options:
        crs = 3857, WGS 84 / Pseudo-Mercator - Spherical Mercator, Google Maps, OpenStreetMap, Bing, ArcGIS, ESRI.
        *crs = 4326, WGS 84 - WGS84 - World Geodetic System 1984, used in GPS
    """
    crs = 4326

    if GIS_format == ".gpkg":
        seg_gdb = gpd.read_file(
            f"{model_dir}/GIS/model_layers.gpkg", layer="nsegment"
        ).fillna(
            0
        )  # Reads segemnt file to Geopandas.
    
    if GIS_format == ".shp":
        seg_gdb = gpd.read_file(f"{model_dir}/GIS/model_nsegment.shp").fillna(0)
        seg_gdb = seg_gdb.set_index(
            "nhm_seg", drop=False
        )  # Set an index for segment geodatabase(GIS)
        seg_gdb.index.name = "index"  # Index column must be renamed of the hru
    
    seg_gdb = seg_gdb.to_crs(crs)  # reprojects to the defined crs projection
    
    # Create a dataframe for parameter values
    first = True
    for vv in seg_params:
        if first:
            df = pdb.get_dataframe(vv)
            first = False
        else:
            df = pd.concat([df, pdb.get_dataframe(vv)], axis=1)  # , ignore_index=True)
    
    df.reset_index(inplace=True)
    df["model_idx"] = df.index + 1
    df.index.name = "index"  # Index column must be renamed
    
    # Join the HRU params values to the HRU geodatabase using Merge
    seg_gdb = pd.merge(df, seg_gdb, on="nhm_seg")
    
    # Create a Goepandas GeoDataFrame for the HRU geodatabase
    seg_gdf = gpd.GeoDataFrame(seg_gdb, geometry="geometry")
    
    return seg_gdf


## 4. Create POI DataFrame (poi_df) for gages (poi_gages) included in the parameter file.

#### Create a dataframe of all POI-related parameters from the parameter file.

In [None]:
from NHM_helpers.NHM_Assist_utilities import fetch_nwis_gage_info

def create_poi_gdf(pdb,
                   control,
                   
    
):
    """
    Create a dataframe of all POI-related parameters from the parameter file.
    """
    
    poi = pdb["poi_gage_id"].as_dataframe
    poi = poi.merge(pdb["poi_gage_segment"].as_dataframe, left_index=True, right_index=True)
    poi = poi.merge(pdb["poi_type"].as_dataframe, left_index=True, right_index=True)
    poi = poi.merge(
        pdb["nhm_seg"].as_dataframe, left_on="poi_gage_segment", right_index=True
    )
    
    st_date = control.start_time
    en_date = control.end_time
    
    # Make a list if the HUC2 region(s) the subbasin intersects for NWIS queries
    huc2_gdf = gpd.read_file("./data_dependencies/HUC2/HUC2.shp").to_crs(crs)
    model_domain_regions = list((huc2_gdf.clip(hru_gdf).loc[:]["huc2"]).values)
    # print(model_domain_regions)
    
    """
    Create a dataframe of all avialable NWIS gages and thier metadata in the model domain (spatial and temporal) 
    that have at least 90 days of streamflow obervations.
    """
    
    nwis_gages_aoi = fetch_nwis_gage_info(
        nwis_gage_nobs_min,
        model_domain_regions,
        st_date,
        en_date,
        hru_gdf,
        nwis_gages_file,
        crs,
    )
    
    """
    Create a dataframe for poi_gages from the parameter file with NWIS gage information data.
    """
    poi = poi.merge(nwis_gages_aoi, left_on="poi_gage_id", right_on="poi_id", how="left")
    poi_df = pd.DataFrame(poi)  # Creates a Pandas DataFrame
    
    return poi_df

## Create default_gages.csv for your model extraction.
### NHM-Assist notebooks will display gages using the default gages file (default_gages.csv), if a modified gages file (gages.csv) is lacking.
##### By default, this file will be composed of 
1) the gages listed in the parameter file (poi_gages), and
2) all streamflow gages from NWIS in the model domain that have at least 90 days of streamflow obervations.

###### Note: Time-series data for streamflow observations will be collected using this gage list and the time range in the control file.
###### Note: Initially, all gages listed in the parameter file exist in NWIS.

### Make a dataframe of the non-NWIS gages (if present) in the parameter file (poi_gages)

In [None]:
# create a dataframe of the gages in the parameter file that are not USGS gages in NWIS
if pd.isnull(poi_df["poi_agency"]).values.any():
    non_NWIS_gages_from_poi_df = poi_df.loc[pd.isnull(poi_df["poi_agency"])]
    non_NWIS_gages_from_poi_df.drop(
        columns=["poi_id", "nhm_seg", "poi_gage_segment", "poi_type"], inplace=True
    )
    non_NWIS_gages_from_poi_df.rename(columns={"poi_gage_id": "poi_id"}, inplace=True)
    # non_NWIS_gages_from_poi_df

    non_NWIS_gages_from_poi_list = non_NWIS_gages_from_poi_df.poi_id.to_list()
    non_NWIS_gages_from_poi_df_txt = f"[bold red]WARNING:[/bold red] {len(non_NWIS_gages_from_poi_list)} of the {len(poi_df)} gages in your parameter file were not found in NWIS. Please check for missing gage information in the default_gages file."
else:
    non_NWIS_gages_from_poi_df_txt = (
        f"All {len(poi_df)} gages in the parameter file were found in NWIS."
    )
    pass

### Make a dataframe for all gages in the parameter file and additional NWIS gages found in the model domain
### Note: the NWIS gages in the poi_df (gages in the parameter file) should be in NWIS_sites_aoi df.

In [None]:
sta_file_col_order = [
    "poi_id",
    "poi_agency",
    "poi_name",
    "latitude",
    "longitude",
    "drainage_area",
    "drainage_area_contrib",
    #'nhm_seg', 'poi_gage_segment', 'poi_type'
]
if pd.isnull(poi_df["poi_agency"]).values.any():
    temp = pd.concat([nwis_gages_aoi, non_NWIS_gages_from_poi_df], ignore_index=True)
    temp2 = temp[sta_file_col_order]

else:
    temp = nwis_gages_aoi.copy()
    temp2 = temp[sta_file_col_order]

### Save the default station file (gage file) as a .csv file

In [None]:
temp2.to_csv(default_gages_file, index=False)
# temp2.info()

## Read modified gages file (gages.csv).
If there are gages in the parameter file that are not in NWIS (USGS gages), then latitude, longitude, and poi_name must be provided from another source, and appended to the "default_gages.csv" file. Once editing is complete, that file can be renamed "gages.csv"and will be used as the gages file. If NO gages.csv is made, the default_gages.csv will be used.

#### If an improved/edited gages.csv file exists, then the poi_df metadata will be updated below.

In [None]:
# Updates the non-usgs gages in the poi dataframe with metadata from the stations file (that was added or edited)
if gages_file.exists():
    for idx, row in poi_df.iterrows():
        if pd.isnull(row["poi_id"]):
            new_poi_id = row["poi_gage_id"]
            new_lat = gages_df.loc[
                gages_df.index == row["poi_gage_id"], "latitude"
            ].values[0]
            new_lon = gages_df.loc[
                gages_df.index == row["poi_gage_id"], "longitude"
            ].values[0]
            new_poi_agency = gages_df.loc[
                gages_df.index == row["poi_gage_id"], "poi_agency"
            ].values[0]
            new_poi_name = gages_df.loc[
                gages_df.index == row["poi_gage_id"], "poi_name"
            ].values[0]

            poi_df.loc[idx, "latitude"] = new_lat
            poi_df.loc[idx, "longitude"] = new_lon
            poi_df.loc[idx, "poi_id"] = new_poi_id
            poi_df.loc[idx, "poi_agency"] = new_poi_agency
            poi_df.loc[idx, "poi_name"] = new_poi_name

else:
    pass

#### If an improved/edited gages.csv file exists, NHM-Assist notebooks will display gages using the modified gage file (gages.csv).

In [None]:
# Read in station file columns needed (You may need to tailor this to the particular file.
col_names = [
    "poi_id",
    "poi_agency",
    "poi_name",
    "latitude",
    "longitude",
    "drainage_area",
    "drainage_area_contrib",
]
col_types = [np.str_, np.str_, np.str_, float, float, float, float]
cols = dict(
    zip(col_names, col_types)
)  # Creates a dictionary of column header and datatype called below.


if gages_file.exists():

    nwis_gages_aoi = pd.read_csv(nwis_gages_file, dtype=cols)
    gages_df = pd.read_csv(gages_file)

    # Make poi_id the index
    gages_df.set_index("poi_id", inplace=True)
    agencies_txt = ", ".join(f"{item}" for item in list(set(poi_df.poi_agency)))

    con.print(
        f"[bold]Create hydrofabric files:\n",
        f"\n{non_NWIS_gages_from_poi_df_txt}",
        f"\nNHM-Assist notebooks will display gages using the modified gage file (gages.csv).",
        f"\nThe default gage file includes {len(gages_df)} gages from {agencies_txt} listed in the parameter file, and found in NWIS for the model domain.",
    )
else:
    gages_df = pd.read_csv(default_gages_file, dtype=cols)

    # Make poi_id the index
    gages_df.set_index("poi_id", inplace=True)
    agencies_txt = ", ".join(f"{item}" for item in list(set(poi_df.poi_agency)))

    con.print(
        f"[bold]Create hydrofabric files:\n",
        f"\n{non_NWIS_gages_from_poi_df_txt}",
        f"\nNHM-Assist notebooks will display gages using the default gages file (default_gages.csv).",
        f"\nThe default gage file includes {len(gages_df)} gages from {agencies_txt} listed in the parameter file, and found in NWIS for the model domain.",
    )

#### CHECK: All pois in the poi_df with missing metadata for lat, lon, and poi_name will be dropped from the poi_df

In [None]:
# Print warning and drop poi's with missing data for lat, lon, and poi_name
missing_meta_df = poi_df[
    poi_df[["latitude", "longitude", "poi_name"]].isna().any(axis=1)
]  # poi_df[poi_df.isna().any(axis=1)]
missing_meta_list = list(missing_meta_df.poi_gage_id.values)

if missing_meta_list:
    missing_meta_txt = f"[bold red]WARNING:[\bold red] Gage {missing_meta_list} missing metadata and will be dropped from the poi_gdf and will not be plotted on maps in the notebook.\
    To avoid this, open up the default_gages file and add the missing metadata: latitude, longitude, and gage name."
else:
    missing_meta_txt = f"[bold green]All gages in the default gage file have metadata and will be plotted on maps in the notebook."


# poi_df.notna(inplace=True, ignore_index=False)
# poi_df.reset_index(drop=True, inplace=True)

poi_gdf = gpd.GeoDataFrame(
    poi_df,
    geometry=gpd.points_from_xy(poi_df.longitude, poi_df.latitude),
    crs=crs,
).dropna(subset=["latitude", "longitude", "poi_name"])

con.print(missing_meta_txt)

### Useful variables from poi_df, gages_df, and nwis_gages_aoi

In [None]:
gages_list = gages_df.index.to_list()
nwis_gages_aoi_list = nwis_gages_aoi.poi_id.to_list()
nwis_gages_in_gages_list = list(set(nwis_gages_aoi_list) & set(gages_list))
additional_gages = list(set(gages_list) - set(poi_df.poi_id))
nwis_gages_in_additional_gages_list = list(
    set(nwis_gages_aoi_list) & set(additional_gages)
)