# Applied Climate Information System (ACIS)

### Prepare Workspace

In [5]:
# Import system libraries
import os
import sys

# Import data manipulation libraries
import pandas as pd
import numpy as np
import datetime

# Import geospatial libraries
import geopandas as gpd

# Import API libraries
import requests
import json

# Set working directory
os.chdir('/Users/jessicarapson/Documents/GitHub/water-supply-forecast')

### Load Data from API

In [6]:
# Load in site geospatial data
gdf_sites = gpd.read_file('assets/data/geospatial.gpkg')

# Initialize an empty list to store catchment bounding boxes
site_bboxes = []

# Iterate through each polygon (catchment) in the GeoDataFrame
for index, row in gdf_sites.iterrows():
    # Get the bounding box for each polygon
    bbox = row.geometry.bounds  # Extract the bounding box as (minx, miny, maxx, maxy)
    site_bboxes.append(bbox)  # Append the bounding box to the list

# Initialise list
df_all = []

# Loop through catchments
for i in range(0,len(gdf_sites)):
    print("Processing ACIS for:", gdf_sites.iloc[i]['site_id'], f"({i + 1}/{len(gdf_sites)})")

    # Call data using web servies
    input_dict = {
      'bbox': site_bboxes[i],
      'sdate' : '1985-01-01',
      'edate': '2024-01-01',
      'meta' : 'name, sids',
      'elems' : [{
        'name' : 'pcpn',
        'interval' : [0,0,1],
        'duration' : "dly",
        'reduce' : {'reduce':'sum'},
      },{
        'name' : 'pcpn',
        'interval' : [0,0,1],
        'duration' : "dly",
        'reduce' : {'reduce':'sum'},
        'normal' : 'departure'
      },{
        'name' : 'avgt',
        'interval' : [0,0,1],
        'duration' : "dly",
        'reduce' : {'reduce':'mean'},
      },{
        'name' : 'avgt',
        'interval' : [0,0,1],
        'duration' : "dly",
        'reduce' : {'reduce':'mean'},
        'normal' : 'departure'
      }]
    }
    params = {'params': json.dumps(input_dict)}
    headers = {'Accept': 'application/json'}
    req = requests.post('http://data.rcc-acis.org/MultiStnData', data=params, headers=headers)
    response = req.json()
    acis_data = response['data']

    # Extract column names
    columns = list(acis_data[0]['meta'].keys()) + [f'data_{i+1}' for i in range(
        len(acis_data[0]['data'][0]))]

    # Extract data values
    values = []
    for row in acis_data:
        meta_values = list(row['meta'].values())
        data_values = row['data']
        for d in data_values:
            values.append(meta_values + d)

    # # Create DataFrame
    df = pd.DataFrame(values, columns=columns)
    df.columns = ['station_name', 'pcpn', 'pcpn_d', 'avgt', 'avgt_d']

    # # Create list of weeks
    week_list = []

    # Define start and end dates
    start_date = datetime.date(1985, 1, 1)
    end_date = datetime.date(2024, 1, 1)

    # Initialize an empty list to store weeks
    week_list = []

    # Generate weeks between start_date and end_date
    current_date = start_date
    while current_date < end_date:
        for day in [1, 8, 15, 22]:
            week = current_date + datetime.timedelta(days=(day - current_date.weekday() - 1))
            if week < end_date:
                week_list.append(week.strftime('%Y-%m-%d'))
        current_date += datetime.timedelta(days=7)

    # Generate a sequence of dates starting from '1985-01-01'
    ndays = len(df[df['station_name'] == df['station_name'][0]])
    start_date = datetime.date(1985, 1, 1)
    date_sequence = [start_date + datetime.timedelta(days=i) for i in range(ndays)]

    # Add the date_sequence as a new column 'Date_Column' in the DataFrame
    df['date'] = date_sequence * int(len(df) / ndays)

    # Function to round down the day to the nearest value less than or equal to the day
    def round_day_down(date):
        day = date.day
        nearest_values = [1, 8, 15, 22]

        # Find the nearest value less than or equal to the day
        rounded_day = max(filter(lambda x: x <= day, nearest_values))
        return date.replace(day=rounded_day)

    # Create a new column 'Rounded_Day_Column' based on 'Date_Column'
    df['week_start_date'] = df['date'].apply(round_day_down)
    df = df.drop('date', axis=1)

    # # Clean data and take average
    df = df.replace(['M','T'], np.nan)
    df.iloc[:, 1:5] = df.iloc[:, 1:5].apply(pd.to_numeric, errors='coerce')
    averages = df.iloc[:,1:].groupby('week_start_date').mean().reset_index()
    sums = df.groupby(['station_name', 'week_start_date'])['pcpn'].apply(
        lambda x: np.nan if x.isnull().all() else x.sum()).reset_index().drop(
        'station_name', axis=1).groupby('week_start_date').mean().reset_index()

    # Combine data
    df_site = averages.copy()
    df_site['pcpn_sum'] = sums['pcpn']
    df_site['site_id'] = gdf_sites.iloc[i]['site_id']
    df_site = df_site[['site_id', 'week_start_date', 'pcpn_sum', 'pcpn_d', 'avgt', 'avgt_d']]

    # Append row to data
    df_all.append(df_site)

# Export dataframe
result = pd.concat(df_all)
result.to_csv('assets/data/acis/acis.csv', index=False)

Processing ACIS for: hungry_horse_reservoir_inflow (1/26)
Processing ACIS for: snake_r_nr_heise (2/26)
Processing ACIS for: pueblo_reservoir_inflow (3/26)
Processing ACIS for: sweetwater_r_nr_alcova (4/26)
Processing ACIS for: missouri_r_at_toston (5/26)
Processing ACIS for: animas_r_at_durango (6/26)
Processing ACIS for: yampa_r_nr_maybell (7/26)
Processing ACIS for: libby_reservoir_inflow (8/26)
Processing ACIS for: boise_r_nr_boise (9/26)
Processing ACIS for: green_r_bl_howard_a_hanson_dam (10/26)
Processing ACIS for: taylor_park_reservoir_inflow (11/26)
Processing ACIS for: dillon_reservoir_inflow (12/26)
Processing ACIS for: ruedi_reservoir_inflow (13/26)
Processing ACIS for: fontenelle_reservoir_inflow (14/26)
Processing ACIS for: weber_r_nr_oakley (15/26)
Processing ACIS for: san_joaquin_river_millerton_reservoir (16/26)
Processing ACIS for: merced_river_yosemite_at_pohono_bridge (17/26)
Processing ACIS for: american_river_folsom_lake (18/26)
Processing ACIS for: colville_r_at_k