In [None]:
# ==============================================================================
#                  MONTHLY OBSERVED DISCHARGE DATA PROCESSOR
# ==============================================================================
#
# Description:
#   This script reads a directory of CSV files containing daily observed
#   discharge data for multiple subbasins. It aggregates the daily data into
#   a monthly format based on a user-defined calculation (e.g., mean, sum),
#   creates a 'Year_Month' column, and exports the processed data for each
#   subbasin into a corresponding CSV file in a new directory.
#
# ==============================================================================

# ------------------------------------------------------------------------------
#   1. DEPENDENCY IMPORTS
# ------------------------------------------------------------------------------

import pandas as pd
import os
from tqdm import tqdm
import glob

# ------------------------------------------------------------------------------
#   2. USER-DEFINABLE OPTIONS
# ------------------------------------------------------------------------------
# Modify the variables in this section to control the script's behavior.

# --- File and Directory Paths ---
input_directory_path = "Observed_daily_dscharge/Raw_netcdf/02_Final_Merged_Streamflow"
output_directory_path = "Observed_monthly_discharge"

# --- Input File and Column Names ---
# The pattern to find input files. "*.csv" finds all CSVs.
# You could change this to "Subbasin_*.csv" to only find files starting with "Subbasin_".
input_file_pattern = "*.csv"

# The exact name of the column in your source files that contains the date.
source_date_column = "DATE"

# The exact name of the column in your source files that contains the discharge data.
source_data_column = "DISCHARGE"

# --- Output Formatting and Calculation ---
# The name you want for the data column in the final output files.
output_data_column_name = "OBSERVED"

# The format for the 'Year_Month' column.
#   - '%Y_%b'  ->  '1995_Dec'
#   - '%Y-%m'  ->  '1995-12'
#   - '%b-%Y'  ->  'Dec-1995'
output_date_format = '%Y_%b'

# The method for aggregating daily data to monthly.
# Common options (must be in quotes): 'mean', 'sum', 'median', 'min', 'max'
aggregation_method = 'mean'


# ------------------------------------------------------------------------------
#   3. SCRIPT EXECUTION
# ------------------------------------------------------------------------------
# Do not modify the code below unless you intend to change the script's logic.

print("\n--- Starting Observed Daily Discharge Processing Script ---")

try:
    # --- Step A: Locate Input Files ---

    print(f"\nSearching for '{input_file_pattern}' files in: '{input_directory_path}'...")
    search_pattern = os.path.join(input_directory_path, input_file_pattern)
    csv_files = glob.glob(search_pattern)

    if not csv_files:
        raise FileNotFoundError(f"No files matching the pattern '{input_file_pattern}' were found in the specified input directory.")

    print(f"Found {len(csv_files)} files to process.")


    # --- Step B: Ensure Output Directory Exists ---

    print(f"\nEnsuring output directory exists at: '{output_directory_path}'")
    os.makedirs(output_directory_path, exist_ok=True)


    # --- Step C: Process Each CSV File ---

    print(f"\nProcessing and exporting files using '{aggregation_method}' aggregation...")
    for file_path in tqdm(csv_files, total=len(csv_files), desc="Files Processed"):
        file_name = os.path.basename(file_path)

        # --- Loading the daily data ---
        daily_df = pd.read_csv(file_path, parse_dates=[source_date_column])


        # --- Aggregating daily data to monthly ---
        daily_df.set_index(source_date_column, inplace=True)

        # Dynamically apply the user-specified aggregation method
        # Using 'ME' for month-end frequency to align with modern pandas versions.
        resampled_data = daily_df[source_data_column].resample('ME')
        monthly_aggregated_series = getattr(resampled_data, aggregation_method)()

        monthly_df = monthly_aggregated_series.reset_index()


        # --- Formatting the output DataFrame ---
        monthly_df.rename(columns={source_data_column: output_data_column_name}, inplace=True)
        monthly_df['Year_Month'] = monthly_df[source_date_column].dt.strftime(output_date_format)


        # --- Saving the processed data ---
        output_df = monthly_df[['Year_Month', output_data_column_name]]
        output_filename = os.path.join(output_directory_path, file_name)
        output_df.to_csv(output_filename, index=False)


    print("\nExport process completed successfully.")


except FileNotFoundError as e:
    print(f"\n[ERROR] {e}")
    print("        Please check the path and pattern in the 'USER-DEFINABLE OPTIONS' section and try again.")
except AttributeError:
    print(f"\n[ERROR] Invalid 'aggregation_method': '{aggregation_method}'.")
    print("        Please choose a valid pandas resampling method like 'mean', 'sum', 'median', 'min', or 'max'.")
except KeyError as e:
    print(f"\n[ERROR] A column was not found: {e}.")
    print("        Please ensure 'source_date_column' and 'source_data_column' are set correctly in the options.")
except Exception as e:
    print(f"\n[ERROR] An unexpected error occurred: {e}")
    print("        Please review the error message to diagnose the issue.")


finally:
    print("\n--- Script has finished execution. ---\n")

