# Process SWANN

### Prepare Workspace

In [1]:
# Import system libraries
import os
import sys

# Import data manipulation libraries
import pandas as pd
import numpy as np
import datetime

# Set working directory
os.chdir('/Users/jessicarapson/Documents/GitHub/water-supply-forecast')

### Download Data from Files

In [9]:
# Download data
swe_volumes = pd.read_csv('assets/data/swann/swann_swe.csv')

# Filter to years of interest
swe_volumes_week = swe_volumes.copy()
swe_volumes_week['date'] = pd.to_datetime(swe_volumes['time']).dt.date
swe_volumes_week = swe_volumes_week[pd.to_datetime(
    swe_volumes_week['date']) >= pd.Timestamp("1985-01-01")]

# Define start and end dates
start_date = datetime.date(1985, 1, 1)
end_date = datetime.date(2024, 1, 1)

# Initialize an empty list to store weeks
week_list = []

# Generate weeks between start_date and end_date
current_date = start_date
while current_date < end_date:
    for day in [1, 8, 15, 22]:
        week = current_date + datetime.timedelta(days=(day - current_date.weekday() - 1))
        if week < end_date:
            week_list.append(week.strftime('%Y-%m-%d'))
    current_date += datetime.timedelta(days=7)
    
# Function to round down the day to the nearest value less than or equal to the day
def round_day_down(date):
    day = date.day
    nearest_values = [1, 8, 15, 22]

    # Find the nearest value less than or equal to the day
    rounded_day = max(filter(lambda x: x <= day, nearest_values))
    return date.replace(day=rounded_day)

 # Create a new column 'Rounded_Day_Column' based on 'Date_Column'
swe_volumes_week['week_start_date'] = swe_volumes_week['date'].apply(round_day_down)
swe_volumes_week = swe_volumes_week.drop(['date', 'time'], axis=1)

# Aggregate by week
swe_volumes_week = swe_volumes_week.groupby(
    ['site_id','week_start_date']).mean().reset_index()

In [11]:
swe_volumes_week.to_csv('swann_swe_week.csv')