In [8]:
import geoglows
import pandas as pd

# 1. Load your data
lidar_df = pd.read_excel('/Users/kennyquintana/Downloads/merged_sites.xlsx')
reanalysis_file = '/Users/kennyquintana/Developer/lhd-processor/lhd_processor/data/geoglows_reanalysis.csv'
df_reanalysis = pd.read_csv(reanalysis_file)

# Column mapping
REANALYSIS_ID_COL = 'comid'
LIDAR_ID_COL = 'linkno'

if 'known_baseflow' not in df_reanalysis.columns:
    df_reanalysis['known_baseflow'] = None

lidar_df = lidar_df.dropna(subset=[LIDAR_ID_COL, 'lidar_date'])

# 2. Iterate and Update
for index, row in lidar_df.iterrows():
    try:
        reach_id = int(row[LIDAR_ID_COL])
        date_obj = pd.to_datetime(row['lidar_date'])
        scan_date = date_obj.strftime('%Y-%m-%d')

        # --- FIXED: Added resolution='daily' ---
        history = geoglows.data.retrospective(reach_id, resolution='daily')

        # Format index to match our scan_date string
        history.index = pd.to_datetime(history.index).strftime('%Y-%m-%d')

        if scan_date in history.index:
            # Now history.loc[scan_date] should return a single value/row
            lidar_flow = history.loc[scan_date].iloc[0]

            mask = df_reanalysis[REANALYSIS_ID_COL] == reach_id

            if mask.any():
                df_reanalysis.loc[mask, 'known_baseflow'] = lidar_flow
                print(f"✅ Updated {reach_id}: flow {lidar_flow:.2f} for {scan_date}")
            else:
                print(f"⚠️ Comid {reach_id} not in reanalysis file.")
        else:
            print(f"❌ Date {scan_date} not found for {reach_id}")

    except Exception as e:
        print(f"‼️ Failed on linkno {row[LIDAR_ID_COL]}: {e}")

# 3. Save
df_reanalysis.to_csv('geoglows_reanalysis_updated.csv', index=False)
print("\nDone! File saved.")

✅ Updated 710795843: flow 148.64 for 2018-12-19
✅ Updated 720227605: flow 31.16 for 2019-11-19
✅ Updated 720199958: flow 0.67 for 2011-09-17
✅ Updated 720235790: flow 14.95 for 2019-11-07
✅ Updated 710482666: flow 6.40 for 2020-11-20
✅ Updated 710904968: flow 53.90 for 2016-03-16
✅ Updated 760624835: flow 0.47 for 2016-09-11
✅ Updated 760672712: flow 3.81 for 2017-11-14
✅ Updated 770513014: flow 1.06 for 2019-01-28
✅ Updated 770479639: flow 1.01 for 2019-03-20
✅ Updated 760645077: flow 10.36 for 2017-02-10
✅ Updated 760746356: flow 21.40 for 2011-09-16
✅ Updated 760679798: flow 3.80 for 2012-01-15
✅ Updated 760592909: flow 7.40 for 2020-12-20
✅ Updated 760539639: flow 6.82 for 2020-12-19
✅ Updated 760580122: flow 1.25 for 2021-12-09
✅ Updated 760601460: flow 2.67 for 2011-09-20
✅ Updated 760591413: flow 217.88 for 2020-04-19
✅ Updated 760687921: flow 159.08 for 2020-04-19
✅ Updated 760641293: flow 145.94 for 2020-04-22
✅ Updated 760556394: flow 103.75 for 2020-04-28
✅ Updated 760536426

In [9]:
import pandas as pd

# Load your crosswalk and existing reanalysis
crosswalk = pd.read_csv('/Users/kennyquintana/Developer/lhd-processor/lhd_processor/data/geoglows_to_nhd_crosswalk.csv') # columns: [linkno, nhdplusid]
geoglows_data = pd.read_csv('/Users/kennyquintana/Developer/lhd-processor/lhd_processor/data/geoglows_reanalysis.csv')

# 1. Merge the crosswalk into the GEOGLoWS data
# We map GEOGLoWS 'comid' to the crosswalk 'geoglows_id'
nhd_view = geoglows_data.merge(
    crosswalk,
    left_on='comid',
    right_on='linkno',
    how='inner'
)

# 2. Reorganize to match NHD format
# We drop the old geoglows ID and set the nhd ID as the main identifier
nhd_view = nhd_view.drop(columns=['comid', 'linkno'])
nhd_view = nhd_view.rename(columns={'nhdplusid': 'comid'})

# Move 'comid' to the first column for standard formatting
cols = ['comid'] + [c for c in nhd_view.columns if c != 'comid']
nhd_view = nhd_view[cols]

nhd_view.to_csv('/Users/kennyquintana/Developer/lhd-processor/lhd_processor/data/geoglows_to_nhd_reanalysis.csv', index=False)
print("Created GEOGLoWS to NHD mapping.")

Created GEOGLoWS to NHD mapping.


In [10]:
import pandas as pd

# Load NHD reanalysis and crosswalk
nhd_data = pd.read_csv('/Users/kennyquintana/Developer/lhd-processor/lhd_processor/data/nwm_reanalysis.csv') # Assuming this has 'comid' as nhd IDs
crosswalk = pd.read_csv('/Users/kennyquintana/Developer/lhd-processor/lhd_processor/data/nhd_to_geoglows_crosswalk.csv')

# 1. Merge nhd data with the crosswalk
geoglows_view = nhd_data.merge(
    crosswalk,
    left_on='comid',
    right_on='nhdplusid',
    how='inner'
)

# 2. Reorganize to match GEOGLoWS format
geoglows_view = geoglows_view.drop(columns=['comid', 'nhdplusid'])
geoglows_view = geoglows_view.rename(columns={'linkno': 'comid'})

# Ensure comid is first
cols = ['comid'] + [c for c in geoglows_view.columns if c != 'comid']
geoglows_view = geoglows_view[cols]

geoglows_view.to_csv('/Users/kennyquintana/Developer/lhd-processor/lhd_processor/data/nwm_to_geoglows_reanalysis.csv', index=False)
print("Created nhd to GEOGLoWS mapping.")

Created nhd to GEOGLoWS mapping.
