## Threshold Level Method Equations (Different ways to calculate)

### Monthly Threshold vs Monthly Average Discharge

In [None]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import urllib.parse
import urllib.request
import os
import numpy as np
import plotly.graph_objs as go
import math

In [None]:
#DATA
station_number = '01636500'#input("(Type 'csv' if you wish to import a csv instead)\nWhat is the USGS Station ID?\t")
start_date = '2010-01-01' #input("Start Date (YYYY-MM-DD):\t")
end_date = '2020-12-31' #input("End Date (YYYY-MM-DD):\t\t")

# Find the USGS web link matching the user's inputs
section1 = 'https://nwis.waterdata.usgs.gov/nwis/dv?referred_module=sw&search_site_no='
section2 = '&search_site_no_match_type=exact&site_tp_cd=OC&site_tp_cd=OC-CO&site_tp_cd=ES&site_tp_cd='\
'LK&site_tp_cd=ST&site_tp_cd=ST-CA&site_tp_cd=ST-DCH&site_tp_cd=ST-TS&index_pmcode_00060=1&group_key='\
'NONE&sitefile_output_format=html_table&column_name=agency_cd&column_name=site_no&column_name=station_nm&range_selection=date_range&begin_date='
section3 = '&end_date='
section4 = '&format=rdb&date_format=YYYY-MM-DD&rdb_compression=value&list_of_search_criteria=search_site_no%2Csite_tp_cd%2Crealtime_parameter_selection'

link = (section1 + station_number + section2 + start_date + section3 + end_date + section4)
print("Click here to see the generated USGS link: \n", link)

# Read the webpage and put data into a variable
USGS_page = urllib.request.urlopen(link)
downloaded_data = USGS_page.read()

# Format data as a string instead of bytes
str_data = downloaded_data.decode()

# Seperate the data by each new line. Every line of data will be split into it's own index
f_str_data = str_data.split('\n')

# Find station name from data and make it a global variable
global station_name
station_name = ''
# Loop through every line in the data and look for the line that contains the station name
for line in range(len(f_str_data)):
    if f_str_data[line].startswith("#    USGS"):
        # Drop the the "#"" and the "USGS" from the station name
        station_name = f_str_data[line][5:]

# Get rid of all columns that aren't date and streamflow
date_flow = ''

# Loop through all lines of data in the table
for line in range(len(f_str_data)):
    if f_str_data[line].startswith("USGS"):
        # Drop the first part of the string leaving only the characters after the date
        data = f_str_data[line][14:]
        # Split the data by where it is tabbed seperating it into columns of date, streamflow and publication
        columns = data.split('\t')
        # Drop the thrid column
        rows = ','.join([columns[0],(columns[1])])
        date_flow += rows + '\n'
date_flow = date_flow.encode()

# Create a file from the data and upload it to the runtime
folder = os.getcwd()
with open(folder + '/USGS_Data_for_' + station_number  + '.txt', 'wb') as text:
    text.write(date_flow)
filename = folder + '/USGS_Data_for_' + station_number  + '.txt'

# Name the columns in the dataframe that's about to be created
columns = ['Date','Streamflow (cfs)']

# Turn the data into a dataframe using your file, and new column names
# parse_dates = [0] is turning the first column into datetime objects

df = pd.read_csv(filename, header = None, names = columns, parse_dates = [0])


Click here to see the generated USGS link: 
 https://nwis.waterdata.usgs.gov/nwis/dv?referred_module=sw&search_site_no=01636500&search_site_no_match_type=exact&site_tp_cd=OC&site_tp_cd=OC-CO&site_tp_cd=ES&site_tp_cd=LK&site_tp_cd=ST&site_tp_cd=ST-CA&site_tp_cd=ST-DCH&site_tp_cd=ST-TS&index_pmcode_00060=1&group_key=NONE&sitefile_output_format=html_table&column_name=agency_cd&column_name=site_no&column_name=station_nm&range_selection=date_range&begin_date=2010-01-01&end_date=2020-12-31&format=rdb&date_format=YYYY-MM-DD&rdb_compression=value&list_of_search_criteria=search_site_no%2Csite_tp_cd%2Crealtime_parameter_selection


In [None]:
def average_month_threshold_level_method(dates, streamflow_list, percentile, year_to_compare):
    df = pd.DataFrame({'Date': dates})
    df['Year'] = df.loc[:,'Date'].dt.year
    df['Month'] = df.loc[:,'Date'].dt.month
    df['Streamflow (cfs)'] = streamflow_list

    month_quantiles = []
    for i, j in zip(range(1, 13), [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]):
        month_i = df['Month'] == i

        month_i_values = df.loc[month_i, 'Streamflow (cfs)']
        month_i_percentile = month_i_values.quantile(percentile)
        month_quantiles.extend([month_i_percentile] * j)


    feb_length = len(df.loc[(df['Month'] == 2) & (df['Year'] == year_to_compare), 'Streamflow (cfs)'])

    streamflow_averages = []
    for i, j in zip(range(1, 13), [31, feb_length, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]):
        monthly_average = (df.loc[(df['Month'] == i) & (df['Year'] == year_to_compare), 'Streamflow (cfs)']).mean()
        streamflow_averages.extend([monthly_average] * j)

    if len(streamflow_averages) == 366:
        streamflow_averages = streamflow_averages
    else:
        streamflow_averages = pd.DataFrame(streamflow_averages)
        streamflow_averages = pd.concat([streamflow_averages[:59], pd.DataFrame([np.nan]), streamflow_averages[59:]])


    plt.figure(figsize = (30, 6))
    plt.plot(range(1, 367), streamflow_averages, label = 'Streamflow (cfs)')
    plt.plot(range(1, 367), month_quantiles, label = 'Threshold at the chosen percentile')

    xtick_positions = np.arange(15, 367, 30.5)
    xtick_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

    plt.xticks(xtick_positions, xtick_labels)

    plt.xlabel('Day of the Year')
    plt.ylabel('Hydrological Flow (cfs)')
    plt.title('Threshold Level Method')
    plt.legend()
    plt.show()


### Monthly Threshold vs Daily Discharge

In [None]:
def monthly_threshold_level_method(dates, streamflow_list, percentile, year_to_compare):
    df = pd.DataFrame({'Date': dates})
    df['Year'] = df.loc[:,'Date'].dt.year
    df['Month'] = df.loc[:,'Date'].dt.month
    df['Streamflow (cfs)'] = streamflow_list

    month_quantiles = []
    for i, j in zip(range(1, 13), [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]):
        month_i = df['Month'] == i

        month_i_values = df.loc[month_i, 'Streamflow (cfs)']
        month_i_percentile = month_i_values.quantile(percentile)
        month_quantiles.extend([month_i_percentile] * j)


    streamflow_year_to_compare = df.loc[df['Year'] == year_to_compare, 'Streamflow (cfs)']
    if len(streamflow_year_to_compare) == 366:
        streamflow_year_to_compare = streamflow_year_to_compare
    else:
        streamflow_year_to_compare = pd.concat([streamflow_year_to_compare[:59], pd.DataFrame([np.nan]), streamflow_year_to_compare[59:]])


    plt.figure(figsize = (30, 6))
    plt.plot(range(1, 367), streamflow_year_to_compare, label = 'Streamflow (cfs)')
    plt.plot(range(1, 367), month_quantiles, label = 'Threshold at the chosen percentile')

    plt.xlabel('Day of the Year')
    plt.ylabel('Hydrological Flow (cfs)')
    plt.title('Threshold Level Method')
    plt.legend()
    plt.show()


### Weekly Threshold vs Weekly Average Discharge

In [None]:
def average_week_threshold_level_method(dates, streamflow_list, percentile, year_to_compare):
    df = pd.DataFrame({'Date': dates})
    df['Week'] = df.loc[:,'Date'].dt.isocalendar().week
    df['Year'] = df.loc[:,'Date'].dt.year
    df['Streamflow (cfs)'] = streamflow_list


    week_quantiles = []
    for i in range(1, 54):
        week_i = df['Week'] == i
        week_i_values = df.loc[week_i, 'Streamflow (cfs)']
        week_i_percentile = week_i_values.quantile(percentile)
        week_quantiles.extend([week_i_percentile] * 7)
    week_quantiles = week_quantiles[:-5]


    streamflow_averages = []
    for i in range(1, 54):
        weekly_average = (df.loc[(df['Week'] == i) & (df['Year'] == year_to_compare), 'Streamflow (cfs)']).mean()
        streamflow_averages.extend([weekly_average] * 7)
    streamflow_averages = streamflow_averages[:-5]


    plt.figure(figsize = (30, 6))
    plt.plot(range(1, 367), streamflow_averages, label = 'Streamflow (cfs)')
    plt.plot(range(1, 367), week_quantiles, label = 'Threshold at the chosen percentile')

    plt.xlabel('Week of the Year')

    xtick_positions = np.arange(1, 367, 7)
    xtick_labels = list(range(1,54))

    plt.xticks(xtick_positions, xtick_labels)
    plt.ylabel('Hydrological Flow (cfs)')
    plt.title('Threshold Level Method')
    plt.legend()
    plt.show()


### Weekly Threshold vs Daily Discharge

In [None]:
def weekly_threshold_level_method(dates, streamflow_list, percentile, year_to_compare):
    df = pd.DataFrame({'Date': dates})
    df['Week'] = df.loc[:,'Date'].dt.isocalendar().week
    df['Month'] = df.loc[:, 'Date'].dt.month
    df['Year'] = df.loc[:,'Date'].dt.year

    df['Streamflow (cfs)'] = streamflow_list


    week_quantiles = []
    for i in range(1, 54):
        week_i = df['Week'] == i
        week_i_values = df.loc[week_i, 'Streamflow (cfs)']
        week_i_percentile = week_i_values.quantile(percentile)
        week_quantiles.extend([week_i_percentile] * 7)
    week_quantiles = week_quantiles[:-5]

    streamflow_year_to_compare = df.loc[df['Year'] == year_to_compare, 'Streamflow (cfs)']
    if len(streamflow_year_to_compare) == 366:
        streamflow_year_to_compare = streamflow_year_to_compare
    else:
        streamflow_year_to_compare = pd.concat([streamflow_year_to_compare[:59], pd.DataFrame([np.nan]), streamflow_year_to_compare[59:]])


    length_52s = len(df.loc[(df['Month'] == 1) & (df['Week'].isin([52, 53])) & (df['Year'] == year_to_compare), 'Streamflow (cfs)'])
    streamflow_year_to_compare = pd.concat([streamflow_year_to_compare[length_52s:], streamflow_year_to_compare[:length_52s]])

    length_1s = len(df.loc[(df['Month'] == 12) & (df['Week'].isin([1, 2])) & (df['Year'] == year_to_compare), 'Streamflow (cfs)'])
    index_number_for_1s = len(streamflow_year_to_compare) - length_1s
    streamflow_year_to_compare = pd.concat([streamflow_year_to_compare[index_number_for_1s:], streamflow_year_to_compare[:index_number_for_1s]])

    plt.figure(figsize = (30, 6))
    plt.plot(range(1, 367), streamflow_year_to_compare, label = 'Streamflow (cfs)')
    plt.plot(range(1, 367), week_quantiles, label = 'Threshold at the chosen percentile')

    plt.xlabel('Week of the Year')

    xtick_positions = np.arange(1, 367, 7)  # Every 7 days
    xtick_labels = list(range(1,54))

    plt.xticks(xtick_positions, xtick_labels)
    plt.ylabel('Hydrological Flow (cfs)')
    plt.title('Threshold Level Method')
    plt.legend()
    plt.show()


### Daily Threshold vs Daily Discharge

In [None]:
def daily_threshold_level_method(dates, streamflow_list, percentile, year_to_compare):

    # Creation of new date related columns for function later on
    df = pd.DataFrame({'Date': dates})
    df['Year'] = df.loc[:,'Date'].dt.year
    df['Day'] = df.loc[:,'Date'].dt.dayofyear
    df['Streamflow (cfs)'] = streamflow_list


    # Calculation of thresholds for each day and putting into list to plot on y-axis
    day_quantiles = []
    for i in range(1, 367):
        day_i = df['Day'] == i
        day_i_values = df.loc[day_i, 'Streamflow (cfs)']
        day_i_percentile = day_i_values.quantile(percentile)
        day_quantiles.append(day_i_percentile)


    # Organization of streamflow for chosen year to get ready for plotting on y-axis
    streamflow_year_to_compare = df.loc[df['Year'] == year_to_compare, 'Streamflow (cfs)']
    if len(streamflow_year_to_compare) == 366:
        streamflow_year_to_compare = streamflow_year_to_compare
    else:
        streamflow_year_to_compare = pd.concat([streamflow_year_to_compare[:59], pd.DataFrame([np.nan]), streamflow_year_to_compare[59:]], ignore_index=True)


    # Logic for changing title to match percentile input
    percentile_string = str(percentile)
    percentile_string = percentile_string[2:]

    if len(percentile_string) == 1:
        percentile_string = percentile_string + '0'
    elif percentile_string[0] == '0':
        percentile_string = percentile_string[1:]

    if percentile_string == '1':
        percentile_string = percentile_string + 'st'
    elif percentile_string == '2':
        percentile_string = percentile_string + 'nd'
    elif percentile_string == '3':
        percentile_string = percentile_string + 'rd'
    else:
        percentile_string = percentile_string + 'th'

    if percentile_string[0] == '0':
        percentile_string = '.' + percentile_string[1:-2]


    # Plotting
    plt.figure(figsize = (30, 6))
    plt.plot(range(1, 367), streamflow_year_to_compare, label = 'Streamflow (cfs)')
    plt.plot(range(1, 367), day_quantiles, label = 'Threshold at the ' + percentile_string + ' percentile')

    plt.xlabel('Day of the Year')
    plt.ylabel('Hydrological Flow (cfs)')
    plt.title('Threshold Level Method')
    plt.legend()
    plt.show()


## Other Plots

### Interactive

In [None]:
def create_interactive_plot(plotting_function, *function_parameters):

    # Create a interactive figure type plot
    fig = go.Figure()





    first_arg = function_parameters[0]
    second_arg = function_parameters[1]


    # Create a line trace for the streamflow data
    fig.add_trace(go.Scatter(x=first_arg, y=second_arg, mode="lines", name="Streamflow"))

    # fig.add_trace(go.Scatter(x=streamflowData_df["Date"], y=streamflowData_df["Streamflow (cfs)"], mode="lines", name="Streamflow"))
    # fig.add_trace(go.Scatter(x=streamflowData_df["Date"], y=streamflowData_df["Streamflow (cfs)"], mode="lines", name="Streamflow"))





    # Customize layout
    fig.update_layout(
        title = "Interactive Hydrograph Plot",
        xaxis_title = "Date",
        yaxis_title = "Streamflow (cfs)",
        width = 1400,  # You can adjust the width of the plot
        height = 400,  # You can adjust the height of the plot
    )

    # Show the plot with hover features
    fig.show()


In [None]:
create_interactive_plot('plot', df['Date'], df['Streamflow (cfs)'])

### Baseflow vs Streamflow Plotting

In [None]:
def plot_baseflow(date, streamflow_list, baseflow_list, x_fig_size=30, y_fig_size=6):
    plt.figure(figsize = (x_fig_size, y_fig_size))
    plt.plot(date, streamflow_list, label = 'Streamflow (cfs)')
    plt.plot(date, baseflow_list, label = 'Baseflow (cfs)')

    plt.xlabel('Date')
    plt.ylabel('Hydrological Flow (cfs)')
    plt.title('Streamflow vs Baseflow')
    plt.legend()
    plt.show()

### Baseflow vs Streamflow Plotting Using Function Call

In [None]:
def plot_baseflow_with_function(date, streamflow_list, function, *inputs, x_fig_size=30, y_fig_size=6):
    plt.figure(figsize = (x_fig_size, y_fig_size))
    plt.plot(date, streamflow_list, label = 'Streamflow (cfs)')
    plt.plot(date, function(streamflow_list, inputs), label = 'Baseflow (cfs)')

    plt.xlabel('Date')
    plt.ylabel('Hydrological Flow (cfs)')
    plt.title('Baseflow function used: ' + str(function)[10:-19])
    plt.legend()
    plt.show()

In [None]:
def plot_hydrograph_recession(dates, streamflow_values):
    """
        Generate a hydrograph recession chart based on streamflow data.

        Parameters:
            dates (list): A numpy array vector of dates
            streamflow_values (list): A numpy array vector of streamflow values

        Returns:
            None. Displays the hydrograph recession chart.
    """
    # Calculate recession index (logarithm of discharge)
    data["recession_index"] = data["discharge"].apply(lambda q: pd.np.log10(q))

    # Sort data based on recession index
    data = data.sort_values("recession_index")

    # Create the plot
    plt.figure(figsize=(10, 6))
    plt.scatter(data["recession_index"], data["discharge"], color='blue', marker='o')
    plt.xlabel("Recession Index (log10(Q))")
    plt.ylabel("Streamflow Discharge")
    plt.title("Hydrograph Recession Chart")
    plt.grid(True)

    # Show the plot
    plt.show()

## Plots in progress

In [None]:
'''
    what we need:
        1- Hydrograph recession curve
        2- each model plot
        3- threshold method plot (quantiles based on year and month periods)
        4- BFO interval highlight
'''

'\n    what we need:\n        1- Hydrograph recession curve\n        2- each model plot\n        3- threshold method plot (quantiles based on year and month periods)\n        4- BFO interval highlight\n'

In [None]:
def plot_hydrograph_recession(data):
    """
        Generate a hydrograph recession chart based on streamflow data.

        Parameters:
            data_path (str): Path to the CSV file containing streamflow data.

        Returns:
            None. Displays the hydrograph recession chart.
    """
    # Calculate recession index (logarithm of discharge)
    data["recession_index"] = data["discharge"].apply(lambda q: pd.np.log10(q))

    # Sort data based on recession index
    data = data.sort_values("recession_index")

    # Create the plot
    plt.figure(figsize=(10, 6))
    plt.scatter(data["recession_index"], data["discharge"], color='blue', marker='o')
    plt.xlabel("Recession Index (log10(Q))")
    plt.ylabel("Streamflow Discharge")
    plt.title("Hydrograph Recession Chart")
    plt.grid(True)

    # Show the plot
    plt.show()


In [None]:
def plot_streamflow_timeseries(data_path):
    """
    Plot a timeseries of streamflow based on timestamped data.

    Parameters:
        data_path (str): Path to the CSV file containing timestamped streamflow data.

    Returns:
        None. Displays the streamflow timeseries plot.
    """
    # Load data from CSV
    data = pd.read_csv(data_path, parse_dates=['timestamp'])

    # Create the plot
    plt.figure(figsize=(10, 6))
    plt.plot(data["timestamp"], data["streamflow"], color='blue')
    plt.xlabel("Timestamp")
    plt.ylabel("Streamflow")
    plt.title("Streamflow Timeseries")
    plt.grid(True)

    # Show the plot
    plt.show()


In [None]:
def plot_models_comparison(data):
    """
        Plots a comparison chart for multiple models over time.

        Parameters:
            data (dict or DataFrame): A dictionary-like dataset where keys represent model names and values are lists of
                                    values over time, or a DataFrame where columns represent models and the index
                                    represents a sorted time series.

        Returns:
            None: Displays the comparison chart using Matplotlib.
    """

    # Create a figure and axis
    fig, ax = plt.subplots(figsize=(10, 6))

    # Plot each column as a line on the same chart
    for column in data.columns:
        ax.plot(data.index, data[column], label=column)

    # Add labels and title
    ax.set_xlabel('Time')
    ax.set_ylabel('Value')
    ax.set_title('Comparison of Models')

    # Add a legend
    ax.legend()

    # Show the plot
    plt.show()
