In [None]:
#### Get subset of ids for JRB basins ####

import geopandas as gpd


gdf = gpd.read_file(r"C:\Users\LeoLo\Desktop\jrb\jrb_2.gpkg", layer="flowpaths")
nexus = gpd.read_file(r"C:\Users\LeoLo\Desktop\jrb\jrb_2.gpkg", layer="nexus")
# Many more layers 'flowpaths', 'divides', 'lakes', 'nexus', 'pois', 'hydrolocations', 'flowpath-attributes',
# 'flowpath-attributes-ml', 'network', 'divide-attributes'

# print(gdf.head())
print(f"Basins in Juniata RB: {gdf.divide_id} (unique: {gdf.divide_id.nunique()})")

# Select subset of divide_ids
jrb_divide_ids = list(gdf.divide_id)[0:1]
print(f"selecting divide_id: {jrb_divide_ids}")

In [None]:
gdf = gpd.read_file(r"C:\Users\LeoLo\Desktop\jrb\jrb_2.gpkg", layer="network")
# gdf[gdf['divide_id'].isin(jrb_divide_ids)]
gdf.keys()

In [None]:
gdf

In [None]:
# Get the CRS projection; need to use the 'flowpaths' layer
gdf.crs

In [None]:
print(gdf[gdf["divide_id"].isin(jrb_divide_ids)])

In [None]:
#### Convert catchment data gdf to geojson ####

import json
from shapely.geometry import Polygon


filtered_gdf = gdf[gdf["divide_id"].isin(jrb_divide_ids)]

# Reproject to WGS84 (4326)
filtered_gdf = filtered_gdf.to_crs("EPSG:4326")
print(f"converted CRS -> {filtered_gdf.crs}\n")

# Ensure the LINESTRING is closed (first and last points are the same)
line = filtered_gdf.iloc[0].geometry
if line.coords[0] != line.coords[-1]:
    line = Polygon(list(line.coords) + [line.coords[0]])

# Update the geometry in the GeoDataFrame
filtered_gdf.at[filtered_gdf.index[0], "geometry"] = line

# Create GeoJSON object with structure for ngen.
geojson = {
    "type": "FeatureCollection",
    "name": "catchment_data",
    "crs": {
        "type": "name",
        "properties": {"name": "urn:ogc:def:crs:OGC:1.3:CRS84"},
    },
    "features": [
        {
            "type": "Feature",
            "id": filtered_gdf.iloc[0]["divide_id"],  # Use divide_id as the feature ID
            "properties": {
                "area_sqkm": filtered_gdf.iloc[0][
                    "areasqkm"
                ],  # Area in square kilometers
                "toid": filtered_gdf.iloc[0]["toid"],  # Related identifier
            },
            "geometry": {
                "type": "Polygon",
                "coordinates": [
                    list(line.exterior.coords),
                ],  # Extract polygon coordinates
            },
        },
    ],
}

# Save as GeoJSON
with open("catchment_data_cat-88306.geojson", "w") as f:
    json.dump(geojson, f, indent=2)

# Or print it for inspection
print(json.dumps(geojson, indent=2))

In [None]:
#### Convert nexus data gdf to geojson ####

filtered_nexus = nexus[nexus["id"] == filtered_gdf.iloc[0]["toid"]]

# Reproject to WGS84 (4326)
filtered_nexus = filtered_nexus.to_crs("EPSG:4326")
print(f"converted CRS -> {filtered_nexus.crs}\n")

# Create GeoJSON object with structure for ngen.
geojson = {
    "type": "FeatureCollection",
    "name": "nexus_data",
    "crs": {
        "type": "name",
        "properties": {"name": "urn:ogc:def:crs:OGC:1.3:CRS84"},
    },
    "features": [
        {
            "type": "Feature",
            "id": row["id"],  # Use the 'id' column as the feature ID
            "properties": {
                "nexus_type": row["type"],  # Use the 'type' column
                "toid": row["toid"],  # Use the 'toid' column
            },
            "geometry": {
                "type": "Point",
                "coordinates": [
                    row.geometry.x,
                    row.geometry.y,
                ],  # Longitude first, then latitude
            },
        }
        for _, row in filtered_nexus.iterrows()  # Iterate over rows in the GeoDataFrame
    ],
}

# Save as GeoJSON
with open("nexus_data_nex-87405.geojson", "w") as f:
    json.dump(geojson, f, indent=2)

# Or print it for inspection
print(json.dumps(geojson, indent=2))

In [None]:
filtered_gdf.iloc[0].geometry

In [None]:
# Load netcdf forcing and attribute files + trim to JRB.
import xarray as xr
import numpy as np


attrs_path = r"C:\Users\LeoLo\Desktop\attributes.nc"
forc_path = r"X:\forcings.nc"  # "C:\Users\LeoLo\Desktop\forcings.nc"

# Open the NetCDF and convert to DataFrame
d_a = xr.open_dataset(attrs_path)
# attrs = d_a.to_dataframe()

d_f = xr.open_dataset(forc_path)
# forc = d_f.to_dataframe()

# Display the dataset
print(d_a)


# Get the divide_id coordinate
divide_ids = d_a["divide_id"].values

# Find duplicate divide_id values
unique, counts = np.unique(divide_ids, return_counts=True)
duplicates = unique[counts > 1]
print(f"\n --------\nAttribute data has {len(duplicates)} duplicate divide_id values.")


# Find duplicate divide_id values
divide_ids = d_f["divide_id"].values
unique, counts = np.unique(divide_ids, return_counts=True)
duplicates = unique[counts > 1]
print(f"\n --------\nForcing data has {len(duplicates)} duplicate divide_id values.")

In [None]:
d_a

In [None]:
# Only select the divide_ids that are in the JRB, and select the first occurance of any duplicate divide_ids.
import pandas as pd


## For forcing
divide_ids = d_f["divide_id"].values

# Find the first occurrence of each divide_id
unique_indices = np.unique(divide_ids, return_index=True)[1]
first_occurrence_mask = np.zeros_like(divide_ids, dtype=bool)
first_occurrence_mask[unique_indices] = True

# Apply the mask to the dataset
unique_d_f = d_f.isel(divide_id=first_occurrence_mask)

# Subset the dataset to include only the desired divide_ids
subset_d_f = unique_d_f.sel(divide_id=jrb_divide_ids)


## For attributes
divide_ids = d_a["divide_id"].values
unique_indices = np.unique(divide_ids, return_index=True)[1]
first_occurrence_mask = np.zeros_like(divide_ids, dtype=bool)
first_occurrence_mask[unique_indices] = True

unique_d_a = d_a.isel(divide_id=first_occurrence_mask)
subset_d_a = unique_d_a.sel(divide_id=jrb_divide_ids)


## Convert to dataframe
forc = subset_d_f.to_dataframe()
attrs = subset_d_a.to_dataframe()


## Trim time to 2000-2005 (divide_id is subindexed by time)
# Ensure the second level (time) is a DatetimeIndex
forc.index = forc.index.set_levels(pd.to_datetime(forc.index.levels[1]), level=1)
start_date = "2000-01-01"
end_date = "2005-12-31"
forc = forc.loc[(slice(None), slice(start_date, end_date)), :]

# Unstack divide_id so that time is the main index
forc_unstacked = forc.unstack(
    level=0,
)  # Now columns are MultiIndex (divide_id, variable)
forc_array = forc_unstacked.to_numpy().reshape(
    len(forc_unstacked),
    len(forc_unstacked.columns.levels[0]),
    -1,
)

forc_array = np.swapaxes(forc_array, 2, 1)

f_xr = subset_d_f.to_array()
f_xr = np.swapaxes((np.swapaxes(np.swapaxes(f_xr, 1, 0), 2, 1)), 0, 1)

f_xr = f_xr[:2192,]


## Save to file
forc_path = r"C:\Users\LeoLo\Desktop\forcings_jrb"
attrs_path = r"C:\Users\LeoLo\Desktop\attributes_jrb"

np.save(forc_path, forc_array)  # (2192, 794, 3)
np.save(attrs_path, attrs.to_numpy())  # (794, 28)

# save the netcdf files
subset_d_a.to_netcdf(r"C:\Users\LeoLo\Desktop\attributes_jrb.nc")
subset_d_f.to_netcdf(r"C:\Users\LeoLo\Desktop\forcings_jrb.nc")

In [None]:
subset_d_f["time"][2191]

In [None]:
subset_d_a

In [None]:
import torch

mod = torch.load(
    r"C:\Users\LeoLo\Desktop\noaa_owp\dHBV_2_0\ngen_files\data\dhbv_2_0\dhbv_merit_conus_100ep.pt",
    map_location=torch.device("cpu"),
)


---

Other Debug...


In [None]:
#### Get subset of ids for JRB basins ####

import geopandas as gpd


gdf = gpd.read_file(r"C:\Users\LeoLo\Desktop\jrb\jrb_2.gpkg", layer="flowpaths")
nexus = gpd.read_file(r"C:\Users\LeoLo\Desktop\jrb\jrb_2.gpkg", layer="nexus")
# Many more layers 'flowpaths', 'divides', 'lakes', 'nexus', 'pois', 'hydrolocations', 'flowpath-attributes',
# 'flowpath-attributes-ml', 'network', 'divide-attributes'

# print(gdf.head())
print(f"Basins in Juniata RB: {gdf.divide_id} (unique: {gdf.divide_id.nunique()})")

# Select subset of divide_ids
jrb_divide_ids = list(gdf.divide_id)[0:1]
print(f"selecting divide_id: {jrb_divide_ids}")

In [None]:
import geopandas as gpd

gdf = gpd.read_file(
    r"C:\Users\LeoLo\Desktop\noaa_owp\dHBV_2_0\ngen_resources\data\dhbv_2_0\spatial\cat-88306.gpkg",
    layer="flowpath-attributes-ml",
)
# gdf[gdf['divide_id'].isin(jrb_divide_ids)]
gdf.keys()

In [None]:
import xarray as xr

path = (
    '/gpfs/yxs275/data/hourly/CAMELS_HF/forcing/forcing_1990_2018_gauges_00000_00499.nc'
)

root = xr.open_dataset(path)

root['PET'][:].shape, root['PET'][0, :100].values

In [None]:
import pandas as pd
import xarray as xr

path = '/gpfs/yxs275/data/hourly/CAMELS_HF/forcing/forcing_1990_2018_gauges_hourly_00000_00499.nc'

zTest_full_time = pd.date_range('2004-10-01 00:00:00', '2018-10-01 00:00:00', freq='h')[
    :-1
]
hourly_x = xr.open_dataset(path).sel(
    time=zTest_full_time,
)

In [None]:
hourly_x['PET'][0, :25].values

In [None]:
#### Verify hourly dmg matches wencong
import xarray as xr

dmg_path = '/projects/mhpi/leoglonz/ciroh-ua/dmg/hf_outputs/hydrodl2/h-dhbv2_3_Qprimeprime_fixed/hourly_simulation_0_00000_00499.nc'
hybrid_path = '/projects/mhpi/leoglonz/ciroh-ua/dmg/hf_outputs/h-dhbv2_3_Qprimeprime_fixed/hourly_simulation_0_00000_00499.nc'
wencong_path = '/projects/mhpi/leoglonz/ciroh-ua/dmg/hf_outputs/wencong_original/h-dhbv2_3_Qprimeprime_fixed/hourly_simulation_0_00000_00499.nc'
yalan_path = '/gpfs/yxs275/model_outputs/hourly/distributedHourly/HF_outputs/h-dhbv2_3_Qprimeprime_fixed_aggregated_norm2/hourly_simulation_0_00000_00499.nc'


dmg_xr = xr.open_dataset(dmg_path)
hybrid_xr = xr.open_dataset(hybrid_path)
wencong_xr = xr.open_dataset(wencong_path)
yalan_xr = xr.open_dataset(yalan_path)

In [None]:
dmg_xr['Simulation'][0, :10].values

In [None]:
hybrid_xr['Simulation'][0, :10].values

In [None]:
wencong_xr['Simulation'][0, :10].values

In [None]:
yalan_xr['Simulation'][0, :10].values

In [None]:
# Building CAMELS forcing dataset
# 100 catchments, 2010-2015

import pandas as pd
import numpy as np
import xarray as xr

forcing_path = '/projects/mhpi/leoglonz/ciroh-ua/dhbv2_mts/ngen_resources/data/forcing/camels_2010-01-01_00_00_00_2011-12-30_23_00_00.nc'

forcing_xr = xr.open_dataset(forcing_path)

In [None]:
forcing_xr["TMP_2maboveground"][0, 1].values

In [None]:
len(forcing_xr["time"])

In [None]:
forcing_xr

In [None]:
example_path = '/projects/mhpi/leoglonz/ciroh-ua/ciroh-ua-ngen/data/forcing/cats-27_52_67-2015_12_01-2015_12_30.nc'
import xarray as xr

ds = xr.open_dataset(example_path)

In [None]:
ds['ids'][:].values

In [None]:
ds.sel({'ids': 'cat-27'})