# tRIBS-Sandbox: Generate Met Forcing
The purpose of this script is to convert the raw observation data from the stations into a format that tRIBS can ingest. For point station data tRIBS requires an meteorological data file (`*.mdf`) and a station descriptor file (`*.sdf`). The first essentially contains meta data for the stations and filepaths to the `*.sdf`. The `*.sdf` contains the actualy time series data.

This notebook prepares those files and the required relevant unit conversions. Note that running this script is not required to complete the example but if you would like to chnage the simulation length or model a different storm event entirely then this script will need to be reran.

In [1]:
# Import Packages we might need
import pandas as pd
import numpy as np
import os
from pytRIBS.shared.inout import InOut

### Define Functions
Before we start processing our data we will define some functions that we can call to complete the processing we need. Things like execute the pytRIBS functions that write the `*.mdf` and `*.sdf` files or a function that does the general processing of the raw observational data. The functions are not required but they do make the code simpler to apply and read.

In [None]:
# Meteorological Data: Import Station Data

class TribsInputWriter(InOut):
    """
    A lightweight wrapper around pytRIBS InOut to process
    observed station data into .mdf and .sdf files.
    """
    def __init__(self):
        super().__init__()

    def process_station_data(self, 
                             met_df, 
                             rain_df, 
                             metadata, 
                             output_dir, 
                             file_prefix="Station"):
        """
        Converts a cleaned Pandas DataFrame of observed station data into 
        tRIBS-compatible MDF and SDF files.

        Parameters
        ----------
        station_df : pd.DataFrame
            DataFrame indexed by datetime. Must contain columns mapped to:
            - 'TA': Air Temperature [deg C]
            - 'RH': Relative Humidity [%]
            - 'PA': Atmospheric Pressure [hPa / mb]
            - 'US': Wind Speed [m/s]
            - 'IS': Incoming Shortwave Radiation [W/m^2]
            - 'R':  Precipitation [mm/hr]
        
        metadata : dict
            Dictionary containing station location info:
            {'id': int, 'name': str, 'lat': float, 'lon': float, 
             'x': float, 'y': float, 'z': float}
             
        output_dir : str
            Path to save the files.
            
        file_prefix : str
            Prefix for filenames (e.g., 'Met_Obs').
        """
        # Prepare File Paths
        # Using the station ID in the filename prevents overwriting when processing multiple stations
        met_filename = f"met_{file_prefix}_{metadata['id']}.mdf"
        precip_filename = f"precip_{file_prefix}_{metadata['id']}.mdf"
        
        met_path = os.path.join(output_dir, met_filename)
        precip_path = os.path.join(output_dir, precip_filename)
        
        # Add Dummy Columns for Met if missing
        # tRIBS expects specific columns in the .mdf
        for col in ['XC', 'TS', 'NR']:
            if col not in met_df.columns:
                met_df[col] = 9999.99

        # Ensure 'date' column exists for the pytRIBS writer
        # pytRIBS looks for a column named 'date' to extract Year, Month, Day, Hour
        met_df = met_df.copy()
        met_df['date'] = met_df.index
        
        rain_df = rain_df.copy()
        rain_df['date'] = rain_df.index

        # Write the .mdf Data Files
        print(f"Writing station {metadata['id']} data files to {output_dir}...")
        
        # Write Precip (.mdf), This will have 4 rows per hour for 15-min data
        self.write_precip_station(rain_df[['R', 'date']].copy(), precip_path) 
        
        # Write Meteorology (.mdf)
        met_cols = ['PA', 'RH', 'XC', 'TS', 'NR', 'TA', 'US', 'IS', 'date']
        self.write_met_station(met_df[met_cols].copy(), met_path)

        # Prepare the Metadata Dictionaries for the .sdf (Header) files
        # record_length is simply the number of rows in the .mdf file
        met_sdf_entry = {
            'station_id': metadata['id'],
            'file_path': met_path,
            'lat_dd': metadata['lat'],
            'long_dd': metadata['lon'],
            'x': metadata['x'],
            'y': metadata['y'],
            'GMT': metadata.get('gmt', -7),
            'record_length': len(met_df),
            'num_parameters': 12,
            'other': metadata['z']
        }

        precip_sdf_entry = {
            'station_id': metadata['id'],
            'file_path': precip_path,
            'x': metadata['x'],
            'y': metadata['y'],
            'record_length': len(rain_df),
            'num_parameters': 5,
            'elevation': metadata['z']
        }
        
        return met_sdf_entry, precip_sdf_entry

    def write_sdf_files(self, met_sdf_list, precip_sdf_list, output_dir, file_prefix="Station"):
        """
        Writes the compiled list of station metadata to .sdf files.
        """
        met_sdf_path = os.path.join(output_dir, f"{file_prefix}_Met.sdf")
        precip_sdf_path = os.path.join(output_dir, f"{file_prefix}_Precip.sdf")
        
        self.write_met_sdf(met_sdf_path, met_sdf_list)
        self.write_precip_sdf(precip_sdf_list, precip_sdf_path)
        
        print(f"Successfully wrote SDF headers: \n  {met_sdf_path}\n  {precip_sdf_path}")

Now that we have our function above defined for processing the data into the correct format for tRIBS we will make a second function that takes our observation data and converts it into the dataframe format required and unit conversions.

In [3]:

def process_raw_observations(excel_path, sheet_mapping, start_date, end_date, freq='h'):
    """
    Reads a multi-sheet Excel file with separate Date/Time columns and 
    specific headers starting on Row 7.

    Parameters
    ----------
    excel_path : str
        Path to the .xlsx file.
    sheet_mapping : dict
        Dictionary mapping { 'Excel_Sheet_Name': ('tRIBS_Var', 'Excel_Column_Name') }
        
        Example: 
        { 
          'Precip_Data': ('R', 'Incremental Inches'),
          'Temp_Data':   ('TA', 'Avg Temp C') 
        }
        
    start_date : str
        Start of simulation (e.g., '2010-06-01 00:00:00')
    end_date : str
        End of simulation.
    freq: 'h' for hourly, '15min' for sub-hourly precipitation

    Returns
    -------
    pd.DataFrame
        Hourly resampled DataFrame.
    """
    print(f"--- Processing File: {os.path.basename(excel_path)} (Freq: {freq}) ---")
    processed_series = []

    for sheet_name, (tribs_var, target_col) in sheet_mapping.items():
        try:
            raw_df = pd.read_excel(excel_path, sheet_name=sheet_name, header=6)
            raw_df.columns = [c.strip() for c in raw_df.columns]
            
            date_col = next((c for c in raw_df.columns if c.lower() == 'date'), None)
            time_col = next((c for c in raw_df.columns if c.lower() == 'time'), None)
            
            dt_index = pd.to_datetime(raw_df[date_col].astype(str) + ' ' + raw_df[time_col].astype(str))
            raw_df.set_index(dt_index, inplace=True)
            
            data_series = raw_df[target_col]

            # Use the passed frequency (freq) for resampling
            if tribs_var == 'R':
                data_resampled = data_series.resample(freq).sum()
            else:
                data_resampled = data_series.resample('h').mean() # Met is always hourly

            data_resampled.name = tribs_var
            processed_series.append(data_resampled)

        except Exception as e:
            print(f"    ERROR on sheet '{sheet_name}': {e}")
            continue

    df_final = pd.concat(processed_series, axis=1)
    df_final = df_final[start_date:end_date]
    
    if 'R' in df_final.columns:
        df_final['R'] = df_final['R'].fillna(0.0)
    df_final = df_final.interpolate(method='linear', limit_direction='both')

    return df_final

### Process Data
Before we cna execute the functions above we need to setup define the required metadata and details how our Excel file with the observations is setup. For this workflow we are going to provide data for the South Mountain Fan and South Mountain Park HQ Gages.

In [None]:
start_sim = '2014-08-01 00:00:00'
end_sim   = '2014-08-20 23:00:00'
output_folder = '../smf_init_data/met/'
excel_path = '../smf_init_data/met/SMF_Observations_1993-2025.xlsx'

# Variable Name Mapping for South Mountain Fan Gage
map_rain_st1 = {
    # Sheet Name        # (tRIBS Var,  Excel Column Header)
    'SMF Rain':         ('R',          'Incremental inches')
}
map_weather_st1 = {
    # Sheet Name        # (tRIBS Var,  Excel Column Header)
    'Temperature':      ('TA',         'Degrees F'),
    'Humidity':         ('RH',         'percent'),
    'Wind':             ('US',         'miles per hour'),
    'Solar Radiation':  ('IS',         'watts/sqm'),
    'Pressure':         ('PA',         'millibars')
}

# Mapping for South Mountain Park HQ Gage, note we are using the same data from SMF gage except for the rainfall
map_rain_st2 = {
    # Sheet Name        # (tRIBS Var,  Excel Column Header)
    'SMPHQ Rain':       ('R',          'Incremental inches')
}

#  Process Raw Data
# Get Met (Hourly)
df_st1_met = process_raw_observations(excel_path, map_weather_st1, start_sim, end_sim, freq='h')
# Get Rain (15-Min)
df_st1_rain = process_raw_observations(excel_path, map_rain_st1, start_sim, end_sim, freq='15min')
df_st2_rain = process_raw_observations(excel_path, map_rain_st2, start_sim, end_sim, freq='15min')

# Convert Units
df_st1_met['TA'] = (df_st1_met['TA'] - 32) * 5/9
df_st1_met['US'] = df_st1_met['US'] * 0.44704
# Convert Rain Depth (in) to Rate (mm/hr) for 15-min data
# Formula: (Inches * 25.4) / 0.25 hours
df_st1_rain['R'] = (df_st1_rain['R'] * 25.4) / 0.25
df_st2_rain['R'] = (df_st2_rain['R'] * 25.4) / 0.25


# GENERATE FILES
writer = TribsInputWriter()

# Create lists to hold headers for the master files
all_met_headers = []
all_precip_headers = []

# Here we enter in the station metadata that is inserted into the station data file (sdf)
meta_station_1 = {
    'id': 1,
    'lat': 33.31518, 'lon': -112.13369,
    'x': 394483, 'y': 3686807, # UTM Zone 12N
    'z': 389,   # Elevation in meters
    'gmt': -7
}
## Second Generate the South Mountain Park HQ Gage Files
meta_station_2 = {
    'id': 2,
    'lat': 33.34683, 'lon': -112.08456,
    'x': 398949, 'y': 3690196, # UTM Zone 12N
    'z': 431,   # Elevation in meters
    'gmt': -7
}

# Process Station 1 and collect metadata
m_meta1, p_meta1 = writer.process_station_data(
    df_st1_met, df_st1_rain, meta_station_1, output_dir=output_folder, file_prefix="SMF"
)
all_met_headers.append(m_meta1)
all_precip_headers.append(p_meta1)

# Process Station 2 and collect metadata
m_meta2, p_meta2 = writer.process_station_data(
    df_st1_met, df_st2_rain, meta_station_2, output_dir=output_folder, file_prefix="SMPHQ"
)
all_met_headers.append(m_meta2)
all_precip_headers.append(p_meta2)


# WRITE MASTER SDF FILES
writer.write_sdf_files(all_met_headers, all_precip_headers, output_dir=output_folder, file_prefix="Master")

--- Processing File: SMF_Observations_1993-2025.xlsx (Freq: h) ---
--- Processing File: SMF_Observations_1993-2025.xlsx (Freq: 15min) ---
--- Processing File: SMF_Observations_1993-2025.xlsx (Freq: 15min) ---
Writing station 1 data files to ../smf_init_data/met/...
Writing station 2 data files to ../smf_init_data/met/...
Successfully wrote SDF headers: 
  ../smf_init_data/met/Master_Met.sdf
  ../smf_init_data/met/Master_Precip.sdf
