# Process USGS streamflow

### Prepare Workspace

In [2]:
# Import system libraries
import os
import sys

# Import data manipulation librariaes
import datetime
import pandas as pd
import numpy as np

# Import API libraries
import requests
import csv

# Set working directory
path = os.getcwd()

### Load Data from Folders

In [32]:
metadata = pd.read_csv(os.path.join(path, '..', 'assets/data/additional_sites/metadata.csv'))

# Create empty list
usbr_all = []
metadata.nrcs_name = metadata.nrcs_name.str.lower() \
    .replace('[ ]', '_', regex=True)
# Loop through years
for year in range(1985, 2025):
    print(f"#######Retreiving data for {year}#######")

    # Loop through forecasting sites
    for site in metadata.nrcs_name:
        file_path = f"assets/data/usgs_streamflow/usgs_streamflow_other_sites/FY{year}/{site}.csv"

        #  # Check if the file exists for the station
        if not os.path.exists(os.path.join(path, '..', file_path)):
            print(f"File not found for site {site}. Skipping...")
            continue

        # Read in daily flow data for the selected year
        flow_data = pd.read_csv(os.path.join(path, '..', file_path))
        flow_data['site_id'] = site

        # Select data of interest
        flow_data = flow_data[['site_id', 'datetime', '00060_Mean']]

        # Append to list
        usbr_all.append(flow_data)

# Combine data
result = pd.concat(usbr_all, axis=0, ignore_index=True)

# Perform additional cleaning
result['datetime'] = pd.to_datetime(result['datetime']).dt.strftime('%Y-%m-%d')
result['00060_Mean'] = np.where(result['00060_Mean'] == -999999, np.nan, result['00060_Mean'])

# Filter to years of interest
result_week = result.copy()
result_week['date'] = pd.to_datetime(result['datetime']).dt.date
result_week = result_week[pd.to_datetime(
    result_week['date']) >= pd.Timestamp("1984-01-01")]

# Define start and end dates
start_date = datetime.date(1984, 1, 1)
end_date = datetime.date(2024, 1, 1)

# Initialize an empty list to store weeks
week_list = []

# Generate weeks between start_date and end_date
current_date = start_date
while current_date < end_date:
    for day in [1, 8, 15, 22]:
        week = current_date + datetime.timedelta(days=(day - current_date.weekday() - 1))
        if week < end_date:
            week_list.append(week.strftime('%Y-%m-%d'))
    current_date += datetime.timedelta(days=7)
    
# Function to round down the day to the nearest value less than or equal to the day
def round_day_down(date):
    day = date.day
    nearest_values = [1, 8, 15, 22]

    # Find the nearest value less than or equal to the day
    rounded_day = max(filter(lambda x: x <= day, nearest_values))
    return date.replace(day=rounded_day)

 # Create a new column 'Rounded_Day_Column' based on 'Date_Column'
result_week['week_start_date'] = result_week['date'].apply(round_day_down)
result_week = result_week.drop(['date', 'datetime'], axis=1)

# Aggregate by week
result_week = result_week.groupby(
    ['site_id','week_start_date']).mean().reset_index()

# Export data
result_week.to_csv(os.path.join(path, '..', 'assets/data/additional_sites/usgs_streamflow.csv'), index=False)

#######Retreiving data for 1985#######
File not found for site ruedi_reservoir_inflow. Skipping...
File not found for site fontenelle_reservoir_inflow. Skipping...
File not found for site nan. Skipping...
File not found for site nan. Skipping...
File not found for site nan. Skipping...
File not found for site nan. Skipping...
File not found for site ahtanum_ck_at_union_gap. Skipping...
File not found for site alamosa_ck_ab_terrace_reservoir. Skipping...
File not found for site american_fk_ab_upper_powerplant. Skipping...
File not found for site american_r_a_fair_oaks_ca. Skipping...
File not found for site american_r_nr_nile. Skipping...
File not found for site applegate_lake_inflow. Skipping...
File not found for site arkansas_r_at_salida. Skipping...
File not found for site ash_canyon_ck_nr_carson_city. Skipping...
File not found for site ashley_ck_nr_vernal. Skipping...
File not found for site asotin_ck_at_asotin. Skipping...
File not found for site badger_ck_nr_browning. Skipping..

In [33]:
# Export data
result_week.to_csv(os.path.join(path, '..', 'assets/data/additional_sites/usgs_streamflow.csv'), index=False)

In [34]:
result_week

Unnamed: 0,site_id,week_start_date,00060_Mean
0,ahtanum_ck_at_union_gap,2019-10-01,28.0
1,ahtanum_ck_at_union_gap,2019-10-08,27.7
2,ahtanum_ck_at_union_gap,2019-10-15,28.4
3,ahtanum_ck_at_union_gap,2019-10-22,27.7
4,ahtanum_ck_at_union_gap,2019-11-01,28.6
...,...,...,...
419225,zuni_r_ab_black_rock_reservoir,2023-11-15,0.0
419226,zuni_r_ab_black_rock_reservoir,2023-11-22,0.0
419227,zuni_r_ab_black_rock_reservoir,2023-12-01,0.0
419228,zuni_r_ab_black_rock_reservoir,2023-12-08,0.0


In [35]:
os.getcwd()

'/Users/emilryd/programming/water-supply-forecast/assets'