In [1]:
import pandas as pd
import geopandas as gpd
from summaflow import (
    GeoLayer,
    SUMMAWorkflow,
    Stats,
)

import os
import glob

In [2]:
# paths
root_path = '../../bb-model-semidistributed/'

# layers' paths
landcover_path = os.path.join(root_path, 'attributes', 'landcover', 'MCD12Q1.061')
soilclass_path = os.path.join(root_path, 'attributes', 'soil')
merithdyr_path = os.path.join(root_path, 'attributes', 'elevation')

# geolayer's path
riv_path = os.path.join(root_path, 'shapefiles', 'bb_rivers_semidistributed.shp')
cat_path = os.path.join(root_path, 'shapefiles', 'bb_subbasins_semidistributed.shp')
hru_path = os.path.join(root_path, 'shapefiles', 'bb_subbasins_semidistributed.shp')

# forcings path
root_path_forcings = os.path.join(root_path, 'forcing', 'remapped')

In [3]:
# Geospatial layers
riv_obj = gpd.read_file(riv_path)
cat_obj = gpd.read_file(cat_path)
hru_obj = gpd.read_file(hru_path)

In [4]:
# layers needed by the setup workflow
# elevation
elv = GeoLayer.from_maf(
    maf_stats=os.path.join(merithdyr_path, 'bb_model_semidistributed_stats_elv.csv'),
    maf_layer=os.path.join(merithdyr_path, 'bb_model_semidistributed_elv.tif'),
    maf_geolayer=os.path.join(cat_path),
    unit = 'meters',
)
# landcover
landcover = GeoLayer.from_maf(
    maf_stats=os.path.join(landcover_path, 'bb_model_semidistributed_stats_MCD12Q1.061_2022.csv'),
    maf_layer=os.path.join(landcover_path, 'bb_model_semidistributed_2022.tif'),
    maf_geolayer=os.path.join(cat_path),
    unit = 'dimensionless',
)
# USDA soil classes
soil = GeoLayer.from_maf(
    maf_stats=os.path.join(soilclass_path, 'bb_model_semidistributed_stats_soil_classes.csv'),
    maf_layer=os.path.join(soilclass_path, 'bb_model_semidistributed_soil_classes.tif'),
    maf_geolayer=os.path.join(cat_path),
    unit = 'dimensionless',
)

# custom layers for `tan_slope`, `contourLength` and `downHRUindex`
# until relevant workflows are implemented inside `gistool`--sorry
# For now, look at various constructors for "GeoLayer"
slope = GeoLayer( # workflow needs `mean` stat
    stats=Stats(pd.DataFrame([0.1] * len(cat_obj), index=cat_obj['COMID'], columns=['mean'])),
    unit='dimensionless',
)
contour = GeoLayer( # workflow needs `length` stat
    stats=Stats(
        pd.DataFrame(
            cat_obj.set_crs(epsg=4326).to_crs('ESRI:54009').length, index=cat_obj['COMID'], columns=['length'])),
    unit='meter',
)
hru_index = GeoLayer( # workflow needs `index` "stat"
    stats=Stats(pd.DataFrame([0] * len(cat_obj), index=cat_obj['COMID'], columns=['index'])),
    unit='dimensionless',
)

In [5]:
# exp = SUMMAWorkflow(
#     forcing_data = glob.glob(os.path.join(root_path_forcings, '**', '*.nc'), recursive=True),
#     forcing_name_mapping = {
#         'CaSR_v3.1_A_PR0_SFC': 'pptrate',
#         'CaSR_v3.1_P_TT_09975': 'airtemp',
#         'CaSR_v3.1_P_P0_SFC': 'airpres',
#         'CaSR_v3.1_P_FI_SFC': 'LWRadAtm',
#         'CaSR_v3.1_P_FB_SFC': 'SWRadAtm',
#         'CaSR_v3.1_P_HU_09975': 'spechum',
#         'CaSR_v3.1_P_UVC_09975': 'windspd',
#     },
#     forcing_unit_mapping = {
#         'pptrate': 'meter / hour',
#         'airtemp': 'degC',
#         'airpres': 'millibar',
#         'LWRadAtm': 'watt / meter ** 2',
#         'SWRadAtm': 'watt / meter ** 2',
#         'spechum': 'dimensionless',
#         'windspd': 'knot',
#     },
#     forcing_to_unit_mapping = {
#         'pptrate': 'millimeter / second',
#         'airtemp': 'kelvin',
#         'airpres': 'pascal',
#         'LWRadAtm': 'watt / meter ** 2',
#         'SWRadAtm': 'watt / meter ** 2',
#         'spechum': 'dimensionless',
#         'windspd': 'meter / second',
#     },
#     forcing_attrs = {
#         'measurement_height': 20,
#         'measurement_height_unit': 'meter',
#         'forcing_time_zone': 'utc', # original timezone of the forcing datatset
#         'target_time_zone': 'utc', # if UTC, SUMMA converts to local time zone internally
#         'local': {},
#         'global': {},
#     },
#     topology_data = {
#         'riv': riv_obj,
#         'hru': hru_obj,
#         'cat': cat_obj,
#     },
#     topology_unit_mapping = {}, # not sure if mizuRoute should be included here
#     topology_to_unit_mapping = {}, # not sure if mizuRoute should be included here
#     topology_attrs = {
#         'gru_fid': 'COMID',
#         'hru_fid': 'COMID',
#         'local': {},
#         'global': {},
#     },
#     geospatial_data = {
#         'elevation': elv,
#         'soilTypeIndex': soil,
#         'vegTypeIndex': landcover,
#         'tan_slope': slope,
#         'contourLength': contour,
#         'downHRUindex': hru_index,
#     },
#     cold_state = {
#         'layers': {
#             'nSoil': 8,
#             'nSnow': 0,
#         },
#         'states': { # dimension manipulation is automated inside the workflow
#             'scalarCanopyIce': 0,
#             'scalarCanopyLiq': 0,
#             'scalarSnowDepth': 0,
#             'scalarSWE': 0,
#             'scalarSfcMeltPond': 0,
#             'scalarAquiferStorage': 0.4,
#             'scalarSnowAlbedo': 0,
#             'scalarCanairTemp': 283.16,
#             'scalarCanopyTemp': 283.16,
#             'mLayerTemp': 283.16,
#             'mLayerVolFracIce': 0,
#             'mLayerVolFracLiq': 0.4,
#             'mLayerMatricHead': -1.0,
#             'mLayerDepth': [0.025, 0.075, 0.15, 0.25, 0.5, 0.5, 1, 1.5],
#         },
#     },
#     decisions = { # Can change all decisions, otherwise default values
#         'soilCatTbl': 'ROSETTA',
#     },
#     auxillary = {
#         # 'dt_init': 450 # if not provided, defaults to forcing data timestep
#     },
#     settings = {
#         'model_path': os.path.join(root_path, 'settings', 'SUMMA'),
#         'start_date': '1980-01-01 00:00',
#         'end_date': '1980-01-10 23:00',
#         'verbose': True,
#     },
#     fillna = {
#         'geospatial_data': {
#             'elevation': 1, # a rough assumption--can be modified to anything
#             'soilTypeIndex': 6, # based on Darri's assumption--can be modified to anything
#             'vegTypeIndex': 1, # a rough assumption--can be modified to anything
#         },
#     },
# )

In [6]:
exp = SUMMAWorkflow(
    forcing_data = glob.glob(os.path.join(root_path_forcings, '**', '*.nc'), recursive=True),
    forcing_name_mapping = {
        'CaSR_v3.1_A_PR0_SFC': 'pptrate',
        'CaSR_v3.1_P_TT_09975': 'airtemp',
        'CaSR_v3.1_P_P0_SFC': 'airpres',
        'CaSR_v3.1_P_FI_SFC': 'LWRadAtm',
        'CaSR_v3.1_P_FB_SFC': 'SWRadAtm',
        'CaSR_v3.1_P_HU_09975': 'spechum',
        'CaSR_v3.1_P_UVC_09975': 'windspd',
    },
    forcing_unit_mapping = {
        'pptrate': 'meter / hour',
        'airtemp': 'degC',
        'airpres': 'millibar',
        'LWRadAtm': 'watt / meter ** 2',
        'SWRadAtm': 'watt / meter ** 2',
        'spechum': 'dimensionless',
        'windspd': 'knot',
    },
    forcing_to_unit_mapping = {
        'pptrate': 'millimeter / second',
        'airtemp': 'kelvin',
        'airpres': 'pascal',
        'LWRadAtm': 'watt / meter ** 2',
        'SWRadAtm': 'watt / meter ** 2',
        'spechum': 'dimensionless',
        'windspd': 'meter / second',
    },
    forcing_attrs = {
        'measurement_height': 20,
        'measurement_height_unit': 'meter',
        'forcing_time_zone': 'utc', # original timezone of the forcing datatset
        'target_time_zone': 'utc', # if UTC, SUMMA converts to local time zone internally
        'local': {},
        'global': {},
    },
    topology_data = {
        'riv': riv_obj,
        'hru': hru_obj,
        'cat': cat_obj,
    },
    # topology_unit_mapping = {}, # not sure if mizuRoute should be included here
    # topology_to_unit_mapping = {}, # not sure if mizuRoute should be included here
    topology_attrs = {
        'gru_fid': 'COMID',
        'hru_fid': 'COMID',
        'local': {},
        'global': {},
    },
    # geospatial_data = {
    #     'elevation': elv,
    #     'soilTypeIndex': soil,
    #     'vegTypeIndex': landcover,
    #     'tan_slope': slope,
    #     'contourLength': contour,
    #     'downHRUindex': hru_index,
    # },
    # cold_state = {
    #     'layers': {
    #         'nSoil': 8,
    #         'nSnow': 0,
    #     },
    #     'states': { # dimension manipulation is automated inside the workflow
    #         'scalarCanopyIce': 0,
    #         'scalarCanopyLiq': 0,
    #         'scalarSnowDepth': 0,
    #         'scalarSWE': 0,
    #         'scalarSfcMeltPond': 0,
    #         'scalarAquiferStorage': 0.4,
    #         'scalarSnowAlbedo': 0,
    #         'scalarCanairTemp': 283.16,
    #         'scalarCanopyTemp': 283.16,
    #         'mLayerTemp': 283.16,
    #         'mLayerVolFracIce': 0,
    #         'mLayerVolFracLiq': 0.4,
    #         'mLayerMatricHead': -1.0,
    #         'mLayerDepth': [0.025, 0.075, 0.15, 0.25, 0.5, 0.5, 1, 1.5],
    #     },
    # },
    # decisions = { # Can change all decisions, otherwise default values
    #     'soilCatTbl': 'ROSETTA',
    # },
    # auxillary = {
    #     # 'dt_init': 450 # if not provided, defaults to forcing data timestep
    # },
    settings = {
        'model_path': os.path.join(root_path, 'settings', 'SUMMA'),
        'start_date': '1980-01-01 00:00',
        'end_date': '1980-01-10 23:00',
        'verbose': True,
    },
    # fillna = {
    #     'geospatial_data': {
    #         'elevation': 1, # a rough assumption--can be modified to anything
    #         'soilTypeIndex': 6, # based on Darri's assumption--can be modified to anything
    #         'vegTypeIndex': 1, # a rough assumption--can be modified to anything
    #     },
    # },
)

2025-06-11 16:14:40,630 - summaflow.core - INFO - SUMMA workflow initialized




## Workflow tests

In [8]:
exp.init_forcing(save=True, save_nc_path='/home/kasra.keshavarz1/test-forcing/', save_list_path='/home/kasra.keshavarz1/test-forcing/fileList.txt')

2025-06-11 16:14:40,644 - summaflow.core - INFO - Initializing attributes for SUMMA workflow...
2025-06-11 16:14:40,644 - summaflow.core - INFO - Assigning timezone
2025-06-11 16:14:40,886 - summaflow.core - INFO - Processing forcing file: remapped_remapped_bb_model_semidistributed_1979123112.nc
2025-06-11 16:14:41,004 - summaflow.core - INFO - Saving dataset to /home/kasra.keshavarz1/test-dave/remapped_remapped_bb_model_semidistributed_1979123112.nc
2025-06-11 16:14:41,105 - summaflow.core - INFO - Processing forcing file: remapped_remapped_bb_model_semidistributed_1980010112.nc
2025-06-11 16:14:41,134 - summaflow.core - INFO - Saving dataset to /home/kasra.keshavarz1/test-dave/remapped_remapped_bb_model_semidistributed_1980010112.nc
2025-06-11 16:14:41,174 - summaflow.core - INFO - Processing forcing file: remapped_remapped_bb_model_semidistributed_1980010212.nc




2025-06-11 16:14:41,206 - summaflow.core - INFO - Saving dataset to /home/kasra.keshavarz1/test-dave/remapped_remapped_bb_model_semidistributed_1980010212.nc
2025-06-11 16:14:41,240 - summaflow.core - INFO - Processing forcing file: remapped_remapped_bb_model_semidistributed_1980010312.nc
2025-06-11 16:14:41,269 - summaflow.core - INFO - Saving dataset to /home/kasra.keshavarz1/test-dave/remapped_remapped_bb_model_semidistributed_1980010312.nc
2025-06-11 16:14:41,305 - summaflow.core - INFO - Processing forcing file: remapped_remapped_bb_model_semidistributed_1980010412.nc
2025-06-11 16:14:41,333 - summaflow.core - INFO - Saving dataset to /home/kasra.keshavarz1/test-dave/remapped_remapped_bb_model_semidistributed_1980010412.nc
2025-06-11 16:14:41,367 - summaflow.core - INFO - Processing forcing file: remapped_remapped_bb_model_semidistributed_1980010512.nc
2025-06-11 16:14:41,392 - summaflow.core - INFO - Saving dataset to /home/kasra.keshavarz1/test-dave/remapped_remapped_bb_model_se



2025-06-11 16:14:41,492 - summaflow.core - INFO - Processing forcing file: remapped_remapped_bb_model_semidistributed_1980010712.nc
2025-06-11 16:14:41,528 - summaflow.core - INFO - Saving dataset to /home/kasra.keshavarz1/test-dave/remapped_remapped_bb_model_semidistributed_1980010712.nc
2025-06-11 16:14:41,570 - summaflow.core - INFO - Processing forcing file: remapped_remapped_bb_model_semidistributed_1980010812.nc
2025-06-11 16:14:41,601 - summaflow.core - INFO - Saving dataset to /home/kasra.keshavarz1/test-dave/remapped_remapped_bb_model_semidistributed_1980010812.nc
2025-06-11 16:14:41,635 - summaflow.core - INFO - Processing forcing file: remapped_remapped_bb_model_semidistributed_1980010912.nc
2025-06-11 16:14:41,662 - summaflow.core - INFO - Saving dataset to /home/kasra.keshavarz1/test-dave/remapped_remapped_bb_model_semidistributed_1980010912.nc
2025-06-11 16:14:41,698 - summaflow.core - INFO - Processing forcing file: remapped_remapped_bb_model_semidistributed_1980011012.n



2025-06-11 16:14:41,730 - summaflow.core - INFO - Saving dataset to /home/kasra.keshavarz1/test-dave/remapped_remapped_bb_model_semidistributed_1980011012.nc
2025-06-11 16:14:41,764 - summaflow.core - INFO - Forcing dataset processed/initialized successfully.
