In [2]:
#Import Libraries
import os
import numpy as np
import pandas as pd
from pathlib import Path
import s3fs
import xarray

In [3]:
#Hydrotable of the HUC8 with Spatial Joined GeoGLOWS Flowlines reaches
hydrotable = Path('./hydrotable/fim45geoglows_12060202.csv')
output_dir = Path('./streamflow')
huc = '12060202'

#start and end date
start_date = '2016-01-01'
end_date = '2016-12-30'
value_time = '2016-10-15'

**Get all the Streamflow for all feature ID based on LINKNO within specified date**

In [4]:
def get_geoglowsdatafromS3():
    bucket_uri = 's3://geoglows-v2-retrospective/retrospective.zarr'
    region_name = 'us-west-2'
    s3 = s3fs.S3FileSystem(anon=True, client_kwargs=dict(region_name=region_name))
    s3store = s3fs.S3Map(root=bucket_uri, s3=s3, check=False)
    
    #All data
    ds = xarray.open_zarr(s3store)
    return ds

def get_rivID(hydrotable):
    df = pd.read_csv(hydrotable)
    return df

def GetGLOWSStreamflow(start_date, end_date, value_time, hydrotable, output_dir, huc, time_column='time'):
    # Get the retrospective dataset
    ds = get_geoglowsdatafromS3()
    hydro_df = pd.read_csv(hydrotable)
    
    # Map LINKNO to feature_id
    linkno_to_featureid = hydro_df.set_index('LINKNO')['feature_id'].to_dict()
    riv_ids = hydro_df['LINKNO'].tolist()
    filtered_ds = ds['Qout'].sel(rivid=riv_ids).to_dataframe()
    filtered_ds.reset_index(inplace=True)
    filtered_ds['time'] = pd.to_datetime(filtered_ds['time'])
    filtered_df = filtered_ds[(filtered_ds['time'] >= start_date) & (filtered_ds['time'] <= end_date)]
    
    # Map rivid (LINKNO) to feature_id
    filtered_df['feature_id'] = filtered_df['rivid'].map(linkno_to_featureid)
    output_df = filtered_df[['feature_id', 'Qout', 'time']]
    
    output_df.rename(columns={'Qout': 'discharge'}, inplace=True)
    
    # Export the filtered data to a CSV file
    out_dir = Path(output_dir) / 'combinedStreamflow'
    out_dir.mkdir(parents=True, exist_ok=True)
    output_file = out_dir / f'{huc}_{start_date}_{end_date}.csv'
    output_df.to_csv(output_file, index=False)
    
    #Filter based on value_time
    value_time_df = output_df[output_df['time'] == value_time]
    value_time_df = value_time_df[['feature_id', 'discharge']]
    
    # Export the value_time data to a separate CSV file
    value_timeSTR = pd.to_datetime(value_time).strftime('%Y%m%d')
    value_time_file = Path(output_dir) / f'{value_timeSTR}_{huc}.csv'
    value_time_df.to_csv(value_time_file, index=False)
    

In [5]:
GetGLOWSStreamflow(start_date, end_date, value_time, hydrotable, output_dir, huc)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['feature_id'] = filtered_df['rivid'].map(linkno_to_featureid)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df.rename(columns={'Qout': 'discharge'}, inplace=True)
