In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import geopandas as gpd
import networkx as nx
import xarray as xr
from typing import Union, Set, Dict
from datetime import datetime

In [None]:
def import_HYPE_output(file_path: str
) -> pd.DataFrame:
    ''' Returns DataFrame after reading the HYPE output .txt file, the first row is dropped because it 
    contains the labels.
        
    Parameters
    ----------
    file_path: str
        Path to the .txt file to be converted
                
    Outputs
    ----------
    df: DataFrame
        DataFrame containing the info from the .txt file
    '''
    try:
        # Read the .txt file and create a DataFrame
        df = pd.read_csv(file_path, delimiter='\t',skiprows=1)  # Assuming tab-separated values in the file
        
        
        # Assuming 'df' is your DataFrame
        df = df.drop(df.columns[-1], axis=1)

        # Return the DataFrame
        return df
    except FileNotFoundError:
        print("File not found!")
    except Exception as e:
        print("An error occurred:", e)

In [None]:
def import_Temp_Prec(file_path: str
) -> pd.DataFrame:
    ''' Returns DataFrame after reading Temperature or Precipitaiton.txt file. The IDs are removed from 
    the index and converted to int in order to sort them against a set of int later. 
        
    Parameters
    ----------
    file_path: str
        Path to the .txt file to be converted
                
    Outputs
    ----------
    df: DataFrame
        DataFrame containing the info from the .txt file
    '''
    try:
        # Read the .txt file and create a DataFrame
        df = pd.read_csv(file_path, delimiter='\t')  # Assuming tab-separated values in the file

        # remove IDs from the index
        df = df.reset_index()
        df.loc[-1] = df.columns
        df.sort_index(inplace=True)
        df.drop('index', axis=1, inplace=True)
        
        # convert IDs to int ignoring the first column
        df.iloc[0, 1:] = df.iloc[0, 1:].astype(int)
        
            # Reset the index of the DataFrame
        df = df.reset_index(drop=True)

        # Return the DataFrame
        return df
    
    except FileNotFoundError:
        print("File not found!")
    except Exception as e:
        print("An error occurred:", e)

In [None]:
def read_excel_to_dataframe(file_path, sheet_name=None, start_date=None, end_date=None, column_index=None):
    """
    Read an Excel spreadsheet, trim it based on start and end dates and a column index,
    and save the trimmed data to a pandas DataFrame.

    Parameters:
    - file_path (str): The path to the Excel file.
    - sheet_name (str, optional): The name of the sheet to read. If not provided, the first sheet is read.
    - start_date (str, optional): The start date for trimming the data. Format: 'YYYY-MM-DD'.
    - end_date (str, optional): The end date for trimming the data. Format: 'YYYY-MM-DD'.
    - column_index (int, optional): The index number of the column to include in the trimmed DataFrame.

    Returns:
    - df (pandas.DataFrame): The trimmed DataFrame containing the specified data from the Excel spreadsheet.
    """

    # Check if the file_path is valid
    try:
        pd.ExcelFile(file_path)
    except FileNotFoundError:
        raise FileNotFoundError(f"File '{file_path}' not found.")

    # Read the Excel file
    try:
        if sheet_name is None:
            df = pd.read_excel(file_path, skiprows=3)
        else:
            df = pd.read_excel(file_path, sheet_name=sheet_name, skiprows=3)
    except Exception as e:
        raise Exception(f"Error reading Excel file: {str(e)}")

    # Trim the DataFrame based on start and end dates
    if start_date and end_date:
        start_date = datetime.strptime(start_date, "%Y-%m-%d")
        end_date = datetime.strptime(end_date, "%Y-%m-%d")
        df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])
        df = df[(df.iloc[:, 0] >= start_date) & (df.iloc[:, 0] <= end_date)]

    # Trim the DataFrame based on column index
    if column_index is not None:
        df = df.iloc[:, [0, column_index]]

    return df


In [None]:
def shapefile_ID(
    modifiedcat= str,
    riv= str
) -> gpd.GeoDataFrame:
    
    '''
    Reads the modified catchment shapefile where IDs have been assigned to all rivers and the river
    shapefile. It then concatinates the extra river IDs with the river shapefile. It then extracts 
    the seg_nhm and ds_nhm from the river shapefile and returns those two columns in a geodataframe. 
    That information can then be used to find precipitation at a given ID. 
    
    Parameters
    ----------
    modifiedcat: str
        Path to the modified catchment shapefile where the river ID of 0 have been replaced with IDs
    riv= str
        Path to the river shapefile
    
    
    Returns
    --------
    gpd: GeoDataFrame
        Geodataframe with the river IDs and corresponding downriver ID 
    
    '''
    
    # Read the modified catchment shapefile
    modifiedcat = gpd.read_file(modifiedcat)
    # Read the river shapefile
    riv = gpd.read_file(riv)
    
    # Sort the GeoDataFrame by a specific column
    sorted_modifiedcat = modifiedcat.sort_values(by='seg_nhm')
    sorted_riv = riv.sort_values(by='seg_nhm')
    
    # Select the specific rows based on the range in a column
    selected_rows = modifiedcat[(modifiedcat['seg_nhm'] >= 58662 )]
    
    # Concatenate the selected rows with the river shapefile
    merged_data = pd.concat([riv, selected_rows], ignore_index=True)

    # Convert the merged data to a GeoDataFrame
    merged_riv = gpd.GeoDataFrame(merged_data, crs=riv.crs)
    
    #Fill Na in the data
    merged_riv=merged_riv.fillna(0)
    
    # Assuming you have a GeoDataFrame named 'gdf'
    merged_riv['seg_nhm'] = merged_riv['seg_nhm'].astype(int)
    merged_riv['ds_seg_nhm'] = merged_riv['ds_seg_nhm'].astype(int)
    
    return merged_riv

From Kasra: 

In [None]:
def find_upstream(
    gdf: gpd.GeoDataFrame,
    target_id: str,
    main_id: str,
    ds_main_id: str,
) -> Set:
    '''Find "ancestors" or upstream segments in a river network given
    in the from of a geopandas.GeoDataFrame `gdf`
    
    Parameters
    ----------
    gdf: geopandas.GeoDataFrame
        GeoDataFrame of river segments including at least three pieces
        of information: 1) geometries of segments, 2) segment IDs, and
        3) downstream segment IDs
    target_id: str, int, or any other data type as included in `gdf`
        Indicating the target ID anscestor or upstream of which is
        desired
    main_id: str
        String defining the column of element IDs in the input geopandas
        dataframe
    ds_main_id: str
        String defining the column of downstream element IDs in the
        input geopandas dataframe
    
    Returns
    -------
    nodes: list
        IDs of nodes being upstream or anscestor of the `target_id`
    
    '''
    # creating a DiGraph out of `gdf` object
    riv_graph = nx.from_pandas_edgelist(gdf, source=main_id, target=ds_main_id, create_using=nx.DiGraph)
    
    # return nodes in a list
    nodes = nx.ancestors(riv_graph, target_id)

    return nodes

In [None]:
def upstream_precipitation(
    df: pd.DataFrame, 
    columns: set, 
    target_id: int
) -> pd.DataFrame:
    '''
    Finds total precipitation at a station by summing the daily precipitation from all of it's upstream
    rivers. This returns a DataFrame with the dates in one column and the sum of precipitation at the 
    target ID in another column.
    
    Parameters:
    -----------
    df: pd.DataFrame
        DataFrame containing the Precipitation data (pobs.txt in hype)
    columns: set
        Set of column names (station IDs) that are upstream of target_id and must be summed
    target_id: int
        Target ID for analysis. This will be included in the sum
    
    Returns:
    --------
    pd.DataFrame
        DataFrame with dates in the first column and summed precipitation values for the 
        target ID in the second column
    '''
    # Filter columns by including the first one (dates), checking against upstream lists and target_id
    filtered_columns = [col for col in df.columns[1:] if int(col) in columns or int(col) == target_id]
        
    # Sum precipitation values for each date from row 1 onwards
    summed_precipitation = df.iloc[1:, 1:][filtered_columns].sum(axis=1)
        
    # Create a new DataFrame with dates and summed precipitation
    result_df = pd.DataFrame({'Dates': df.iloc[1:, 0], str(target_id): summed_precipitation})

    return result_df

In [None]:
def update_weekly_flowrates(calculated_df: pd.DataFrame, natural_df: pd.DataFrame) -> pd.DataFrame:
    """
       Converts the daily flows from the HYPE output to weekly flows for comparison with natural flows.
    This function ensures that the dates in the converted weekly flows file matches the dates in natural flows.
    Can also be used to convert precipitation into weekly. 

    Args:
        calculated_df (pd.DataFrame): DataFrame containing daily flowrates with the date in the first column.
        natural_df (pd.DataFrame): DataFrame containing weekly flowrates with the date in the first column.

    Returns:
        pd.DataFrame: DataFrame with the same weekly dates as natural_df and the updated weekly flowrates.

    """
    # Set the date column as the index for calculated_df
    calculated_df.index = pd.to_datetime(calculated_df.iloc[:, 0])
    calculated_df.drop(calculated_df.columns[0], axis=1, inplace=True)
    #calculated_df.drop(calculated_df.columns[1], axis=1, inplace=True)  # Drop the third column

    # Set the date column as the index for natural_df
    natural_df.index = pd.to_datetime(natural_df.iloc[:, 0])
    natural_df.drop(natural_df.columns[0], axis=1, inplace=True)

    # Create a new DataFrame with the index of natural_df and the weekly flowrates
    results_df = pd.DataFrame(index=natural_df.index, columns=['WeeklyFlow'])

    # Iterate through the rows of natural_df starting from the second row
    for i in range(1, len(natural_df)):
        # Find the corresponding date in calculated_df
        date = natural_df.index[i]

        # Find the previous date in natural_df
        prev_date = natural_df.index[i - 1]

        # Find the corresponding rows in calculated_df
        calculated_rows = calculated_df[(calculated_df.index > prev_date) & (calculated_df.index <= date)]

        # Sum the flowrates in the calculated rows
        weekly_flow = calculated_rows.sum().values[0]

        # Save the weekly flowrate in results_df
        results_df.loc[date] = weekly_flow

    return results_df


In [None]:
def plot_hydrograph_with_precipitation(df1: pd.DataFrame, df2: pd.DataFrame, df3: pd.DataFrame, station_id: int):
    """
    Plots hydrographs from two dataframes on top of each other and precipitation as an upside-down bar plot above them.

    Args:
        df1 (pd.DataFrame): Dataframe containing hydrograph data.
        df2 (pd.DataFrame): Dataframe containing hydrograph data.
        df3 (pd.DataFrame): Dataframe containing precipitation data.
        station_id (int): Station ID to be added to the plot title.
    """
    
        # Convert index to datetime format
    df1.index = pd.to_datetime(df1.index)
    df2.index = pd.to_datetime(df2.index)
    df3.index = pd.to_datetime(df3.index)
    
    # Create a new figure and axis with a larger size
    fig, ax = plt.subplots(figsize=(12, 8))
    
            # Rotate x-axis labels by 45 degrees
    plt.xticks(rotation=45)

    # Plot the first hydrograph
    ax.plot(df1.index, df1.iloc[:, 0], color='blue', label='Hydrograph 1')
    ax.set_ylabel('Hydrograph (m$^3$/s)', color='blue')
    ax.tick_params(axis='y', labelcolor='blue')

    # Create a second y-axis for precipitation
    ax2 = ax.twinx()

    # Plot the second hydrograph on the left y-axis
    ax.plot(df2.index, df2.iloc[:, 0], color='red', label='Hydrograph 2')
    ax.set_ylabel('Hydrograph (m$^3$/s)', color='red')
    ax.tick_params(axis='y', labelcolor='red')

    # Plot precipitation as upside-down bar plot on the right y-axis
    ax2.bar(df3.index, -df3.iloc[:, 0], width=0.8, color='gray', alpha=0.5, label='Precipitation')
    ax2.set_ylabel('Precipitation (mm/week)', color='gray')
    ax2.tick_params(axis='y', labelcolor='gray')

    # Invert the y-axis for precipitation
    ax2.invert_yaxis()

    # Set the labels and title
    ax.set_xlabel('Date')
    ax.set_title(f'Hydrograph with Precipitation (Station ID: {station_id})')

    # Adjust x-axis tick labels by year
    #years = pd.DatetimeIndex(df1.index).year
    #ax.set_xticks(df1.index)
    #ax.set_xticklabels(years, rotation=45, ha='right')

    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    
    # Flip the right y-axis scale
    ax2.invert_yaxis()

    # Combine the legends
    lines1, labels1 = ax.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax.legend(lines1 + lines2, labels1 + labels2, loc='upper left')

    # Adjust layout to prevent overlapping
    plt.tight_layout()

    # Show the plot
    plt.show()

In [None]:
target_id= 58308

In [None]:
nat= read_excel_to_dataframe('/home/paulc600/local/Nat_flow_update_Prabin_2023_03_13.xlsx', 'Nat flow_1909-2021',
                            '1980-01-01', '2018-12-31', 4)

In [None]:
nat

In [None]:
data_frame = import_HYPE_output('/home/paulc600/local/02_HYPE_outputs/0058308.txt')

In [None]:
data_frame

In [None]:
pobs=import_Temp_Prec('/home/paulc600/local/02_HYPE_inputs/Pobs.txt')

In [None]:
ID=shapefile_ID('/home/paulc600/SMM/SMM HYPE files/Modified_SMMcat.shp',
                  '/home/paulc600/github/StMaryMilk2023-UofC/modified_TGF/smm_tgf_modified/smm_riv.shp')

In [None]:
upstream = find_upstream(ID, target_id, 'seg_nhm', 'ds_seg_nhm')

In [None]:
summed_precipitation=upstream_precipitation(pobs, upstream, target_id)

In [None]:
weekly_calc= update_weekly_flowrates(data_frame, nat)

In [None]:
nat.insert(0, 'Dates', nat.index)

In [None]:
weekly_prec= update_weekly_flowrates(summed_precipitation, nat)

In [None]:
plot_hydrograph_with_precipitation(weekly_calc, nat, summed_precipitation, target_id)