In [1]:
import os
from contextlib import redirect_stdout

import sys
sys.path.append('./coeqwalpackage')

import numpy as np
import pandas as pd
import datetime as dt
import re
from coeqwalpackage.metrics import *
import cqwlutils as cu
import plotting as pu

## Init

In [2]:
CtrlFile = 'CalSim3DataExtractionInitFile_v4.xlsx'
CtrlTab = 'Init'

ScenarioListFile, ScenarioListTab, ScenarioListPath, DVDssNamesOutPath, SVDssNamesOutPath, ScenarioIndicesOutPath, DssDirsOutPath, VarListPath, VarListFile, VarListTab, VarOutPath, DataOutPath, ConvertDataOutPath, ExtractionSubPath, DemandDeliverySubPath, ModelSubPath, GroupDataDirPath, ScenarioDir, DVDssMin, DVDssMax, SVDssMin, SVDssMax, NameMin, NameMax, DirMin, DirMax, IndexMin, IndexMax, StartMin, StartMax, EndMin, EndMax, VarMin, VarMax, DemandFilePath, DemandFileName, DemandFileTab, DemMin, DemMax, InflowOutSubPath, InflowFilePath, InflowFileName, InflowFileTab, InflowMin, InflowMax = cu.read_init_file(CtrlFile, CtrlTab)

df, dss_names = read_in_df(ConvertDataOutPath,DVDssNamesOutPath)

df.columns = ['_'.join(map(str, col)) if isinstance(col, tuple) else col for col in df.columns]


## Define variables

In [3]:
hist_percentiles=[0.2, 0.333, 0.5] # percentiles for historical record
tier_thresholds=(0.9, 0.67, 0.3) # tier assignment thresholds
cdec_calsim_vars = {
    "SHA": "S_SHSTA",
    "CLE": "S_TRNTY",
    "ORO": "S_OROVL",
    "FOL": "S_FOLSM",
    "NML": "S_MELON",
    "MIL": "S_MLRTN",
    "LUS": "S_SLUIS_SWP",
    "SLF": "S_SLUIS_CVP"
} # mapping between cdec and calsim var names

cdec_file = "CDEC_Station_Key.csv" # cdec key file name
metadata_file = "reservoir_metadata_table.csv" # reservoir metadata output file name

## Aux functions

In [4]:
def find_calsim_model_root(start_dir=os.getcwd(), folder_name="CalSim3_Model_Runs"):
    current = start_dir
    while True:
        if folder_name in os.listdir(current):
            return os.path.join(current, folder_name)
        parent = os.path.dirname(current)
        if parent == current:
            raise FileNotFoundError(f"Could not find {folder_name} in any parent directories.")
        current = parent

def get_relative_folder(full_folder_path, known_tail):

    full_folder_path = os.path.normpath(full_folder_path)
    known_tail = os.path.normpath(known_tail)
    
    # Find where the known folder structure starts
    idx = full_folder_path.lower().find(known_tail.lower())
    
    if idx == -1:
        raise ValueError("Known tail not found in full path.")
    
    relative_suffix = full_folder_path[idx:]
    
    return os.path.join("..", "..", relative_suffix)

def generate_tier_assignment_matrix(
    df, cdec_df, start_date="1921-10-01",
    percentiles=[0.25, 0.5, 0.9], tier_thresholds=(0.9, 0.5, 0.2), 
    saveprobs = False, verbose = False
):
    def load_historical_storage_csv(filepath):
        df_raw = pd.read_csv(filepath, header=None)
        start_row = df_raw[df_raw.apply(lambda row: row.astype(str).str.contains('RESERVOIR STORAGE').any(), axis=1)].index[0]
        df_data = pd.read_csv(filepath, skiprows=start_row)
        df_data.columns = df_data.columns.str.strip()
        df_data["DATE"] = pd.to_datetime(df_data.iloc[:, 0], format="%Y-%m-%d", errors="coerce")
        df_data = df_data.dropna(subset=["DATE"])
        storage_col = next((col for col in df_data.columns if "RESERVOIR STORAGE" in col.upper()), None)
        df_data["STORAGE"] = pd.to_numeric(df_data[storage_col], errors="coerce")
        df_data = df_data.dropna(subset=["STORAGE"])
        return df_data[["DATE", "STORAGE"]]

    def extract_historical_thresholds(df, percentiles):
        may = df[df["DATE"].dt.month == 5]
        may_1 = may.groupby(may["DATE"].dt.year).first()
        thresholds = may_1["STORAGE"].quantile(percentiles)
        return thresholds / 1000  # Convert AF to TAF

    def extract_variable_by_scenario(df, variable):
        return df[
            [col for col in df.columns
             if variable in col and "_STORAGE_" in col and "LEVEL" not in col.upper()]
        ]

    def assign_tiers_from_calsim(var_df, thresholds, date_series, var, tier_thresholds, saveprobs = saveprobs, verbose = verbose):
        tier_rows = []

        for col in var_df.columns:
            match = re.search(r's\d{4}', col)
            if not match:
                continue
            sid = match.group(0)

            series = var_df[col].copy()
            if not pd.api.types.is_datetime64_any_dtype(series.index):
                series.index = date_series

            april_series = series[series.index.month == 4]
            april_by_year = april_series.groupby(april_series.index.year).last()
            print(f"\n Scenario {sid} ({var})")

            if verbose:
                print("  April-end values:")
                print(april_by_year.head())

            if april_by_year.empty:
                print(f" No April data found for {var} in scenario {sid}")
                continue

            low_thresh = thresholds[percentiles[0]]
            mid_thresh = thresholds[percentiles[1]]
            high_thresh = thresholds[percentiles[2]]

            top = (april_by_year >= high_thresh).sum()
            mid = ((april_by_year >= mid_thresh) & (april_by_year < high_thresh)).sum()
            low = ((april_by_year >= low_thresh) & (april_by_year < mid_thresh)).sum()
            bot = (april_by_year < low_thresh).sum()
            total = len(april_by_year)

            top_frac = top / total
            mid_frac = mid / total
            low_frac = low / total
            bot_frac = bot / total

            tt1, tt2, tt3 = tier_thresholds
            if top_frac >= tt1:
                tier = 1
            elif (top_frac + mid_frac) >= tt2:
                tier = 2
            elif (top_frac + mid_frac) >= tt3:
                tier = 3
            else:
                tier = 4

            tier_rows.append({
                "Scenario": sid,
                "Variable": var,
                "TopProb": round(top_frac, 3),
                "MidProb": round(mid_frac, 3),
                "LowProb": round(low_frac, 3),
                "BotProb": round(bot_frac, 3),
                "StorageTier": tier
            })

        return pd.DataFrame(tier_rows).drop_duplicates(subset=["Scenario", "Variable"])

    try:
        base_model_dir = find_calsim_model_root()
    except FileNotFoundError as e:
        print(e)
        return pd.DataFrame()

    hist_data_dir = os.path.join(base_model_dir, "Scenarios", "CDEC_Historical_Monthly_Storage")
    tiers_output_dir = os.path.join(
        base_model_dir, "Scenarios", "Performance_Metrics", "Tiered_Outcome_Measures", "Reservoir_Storage", "Tiers"
    )
    metrics_output_dir = os.path.join(
        base_model_dir, "Scenarios", "Performance_Metrics", "Metrics", "Reservoir_Storage"
    )
    os.makedirs(tiers_output_dir, exist_ok=True)
    if not pd.api.types.is_datetime64_any_dtype(df.index):
        df.index = pd.date_range(start=start_date, periods=len(df), freq="MS")
    df["DATE"] = df.index  


    tier_matrix = pd.DataFrame()
    prob_matrix = pd.DataFrame()

    for _, row in cdec_df.iterrows():
        var = row["CalSim_Variable"]
        file = row["filename"]
        label = f"{var}_Storage"

        if verbose:
            print(f"\n Processing reservoir: {row['ReservoirName']}")
            print(f"  ↳ CalSim variable: {var}")
            print(f"  ↳ Historical file: {file}")

        try:
            hist_path = os.path.join(hist_data_dir, file)
            hist_df = load_historical_storage_csv(hist_path)
            thresholds = extract_historical_thresholds(hist_df, percentiles)
            if verbose:
                print(f"  ↳ Historical thresholds: {thresholds.to_dict()}")

            var_df = extract_variable_by_scenario(df, var)
            if verbose:
                print(f"  ↳ Matched CalSim columns: {var_df.columns.tolist()}")

            if var_df.empty:
                print(f" No CalSim data found for variable {var}")
                continue

            tier_df = assign_tiers_from_calsim(var_df, thresholds, df["DATE"], var, tier_thresholds)

            for _, r in tier_df.iterrows():
                sid = r["Scenario"]
                prob_matrix.loc[sid, f"{label}_TopProb"] = r["TopProb"]
                prob_matrix.loc[sid, f"{label}_MidProb"] = r["MidProb"]
                prob_matrix.loc[sid, f"{label}_LowProb"] = r["LowProb"]
                prob_matrix.loc[sid, f"{label}_BotProb"] = r["BotProb"]
                tier_matrix.loc[sid, f"{label}_Tier"] = r["StorageTier"]

        except Exception as e:
            print(f" Failed to process {var}: {e}")
            continue

    tier_matrix.index.name = "Scenario"
    prob_matrix.index.name = "Scenario"

    if verbose:
        print("tier_matrix:")
        print(tier_matrix.head(2))
        print("prob_matrix:")
        print(prob_matrix.head(2))

    # check if output directory exists
    if not os.path.exists(tiers_output_dir):
        # print warning
        print("Warning: directory " + tiers_output_dir + " does not exists and will be created")
        
        # Create the directory
        os.makedirs(tiers_output_dir)

    if not os.path.exists(metrics_output_dir):
        # print warning
        print("Warning: directory " + metrics_output_dir + " does not exists and will be created")
        
        # Create the directory
        os.makedirs(metrics_output_dir)

    tiers_output_path = os.path.join(tiers_output_dir, "tier_assignment.csv")
    metrics_output_path = os.path.join(metrics_output_dir, "level_probabilities.csv")
    tier_matrix.to_csv(tiers_output_path)
    print(f"\n Tier assignment CSV saved to:\n{tiers_output_path}")
    if saveprobs:
        prob_matrix.to_csv(metrics_output_path)
        print(f"\n Level probabilities CSV saved to:\n{metrics_output_path}")

    return tier_matrix, prob_matrix


In [5]:
base_model_dir = find_calsim_model_root()

hist_full_subdir = os.path.join(base_model_dir, "Scenarios", "CDEC_Historical_Monthly_Storage")
known_tail = r"CalSim3_Model_Runs\Scenarios\CDEC_Historical_Monthly_Storage"

hist_subdir = get_relative_folder(hist_full_subdir, known_tail)
print("hist_subdir: " + hist_subdir)

key_file = os.path.join(hist_subdir, cdec_file)
cdec_df = pd.read_csv(key_file)
cdec_df.columns = cdec_df.columns.str.strip()

cdec_df["CalSim_Variable"] = cdec_df["CDEC_Key"].map(cdec_calsim_vars)
cdec_df = cdec_df.dropna(subset=["CalSim_Variable"])

file_prefix = "20250506_MonthlyResData_CDEC-"
cdec_df["filename"] = file_prefix + cdec_df["CDEC_Key"] + ".csv"
cdec_df["file_path"] = cdec_df["filename"].apply(lambda f: os.path.join(hist_subdir, f))

final_table = cdec_df[["ReservoirName", "CDEC_Key", "CalSim_Variable", "filename", "file_path"]]
print(final_table)

# check if output directory exists
if not os.path.exists(hist_subdir):
    # print warning
    print("Warning: directory " + hist_subdir + " does not exists and will be created")
    
    # Create the directory
    os.makedirs(hist_subdir)
    
output_path = os.path.join(hist_subdir, metadata_file)
final_table.to_csv(output_path, index=False)
print(f"\n Metadata table saved to: {output_path}")



hist_subdir: ..\..\CalSim3_Model_Runs\Scenarios\CDEC_Historical_Monthly_Storage
        ReservoirName CDEC_Key CalSim_Variable  \
0              Shasta      SHA         S_SHSTA   
1             Trinity      CLE         S_TRNTY   
2            Oroville      ORO         S_OROVL   
3              Folsom      FOL         S_FOLSM   
4         New Melones      NML         S_MELON   
5           Millerton      MIL         S_MLRTN   
6  San Luis - Federal      SLF     S_SLUIS_CVP   
7    San Luis - State      LUS     S_SLUIS_SWP   

                               filename  \
0  20250506_MonthlyResData_CDEC-SHA.csv   
1  20250506_MonthlyResData_CDEC-CLE.csv   
2  20250506_MonthlyResData_CDEC-ORO.csv   
3  20250506_MonthlyResData_CDEC-FOL.csv   
4  20250506_MonthlyResData_CDEC-NML.csv   
5  20250506_MonthlyResData_CDEC-MIL.csv   
6  20250506_MonthlyResData_CDEC-SLF.csv   
7  20250506_MonthlyResData_CDEC-LUS.csv   

                                           file_path  
0  ..\..\CalSim3_Model_Run

In [6]:
tier_df, prob_df = generate_tier_assignment_matrix(
    # df, cdec_df, hist_data_dir,
    df, cdec_df, hist_subdir,
    percentiles=hist_percentiles,
    tier_thresholds=tier_thresholds
)



 Scenario s0001 (S_SHSTA)

 Scenario s0002 (S_SHSTA)

 Scenario s0003 (S_SHSTA)

 Scenario s0004 (S_SHSTA)

 Scenario s0005 (S_SHSTA)

 Scenario s0006 (S_SHSTA)

 Scenario s0007 (S_SHSTA)

 Scenario s0008 (S_SHSTA)

 Scenario s0009 (S_SHSTA)

 Scenario s0010 (S_SHSTA)

 Scenario s0011 (S_SHSTA)

 Scenario s0012 (S_SHSTA)

 Scenario s0013 (S_SHSTA)

 Scenario s0014 (S_SHSTA)

 Scenario s0015 (S_SHSTA)

 Scenario s0016 (S_SHSTA)

 Scenario s0018 (S_SHSTA)

 Scenario s0019 (S_SHSTA)

 Scenario s0020 (S_SHSTA)

 Scenario s0021 (S_SHSTA)

 Scenario s0022 (S_SHSTA)

 Scenario s0023 (S_SHSTA)

 Scenario s0024 (S_SHSTA)

 Scenario s0025 (S_SHSTA)

 Scenario s0027 (S_SHSTA)

 Scenario s0029 (S_SHSTA)

 Scenario s0030 (S_SHSTA)

 Scenario s0039 (S_SHSTA)

 Scenario s0040 (S_SHSTA)

 Scenario s0041 (S_SHSTA)

 Scenario s0042 (S_SHSTA)

 Scenario s0044 (S_SHSTA)

 Scenario s0046 (S_SHSTA)

 Scenario s0047 (S_SHSTA)

 Scenario s0051 (S_SHSTA)

 Scenario s0056 (S_SHSTA)

 Scenario s0062 (S_SHSTA)



In [7]:
tier_df

Unnamed: 0_level_0,S_SHSTA_Storage_Tier,S_TRNTY_Storage_Tier,S_OROVL_Storage_Tier,S_FOLSM_Storage_Tier,S_MELON_Storage_Tier,S_MLRTN_Storage_Tier,S_SLUIS_CVP_Storage_Tier,S_SLUIS_SWP_Storage_Tier
Scenario,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
s0001,2.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0
s0002,2.0,3.0,3.0,3.0,2.0,3.0,3.0,3.0
s0003,3.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0
s0004,3.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0
s0005,3.0,3.0,3.0,3.0,2.0,3.0,3.0,3.0
s0006,2.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0
s0007,2.0,3.0,3.0,3.0,2.0,3.0,3.0,3.0
s0008,2.0,3.0,3.0,3.0,2.0,3.0,3.0,3.0
s0009,2.0,3.0,3.0,3.0,2.0,3.0,3.0,2.0
s0010,2.0,3.0,3.0,3.0,2.0,3.0,2.0,2.0


In [8]:
prob_df

Unnamed: 0_level_0,S_SHSTA_Storage_TopProb,S_SHSTA_Storage_MidProb,S_SHSTA_Storage_LowProb,S_SHSTA_Storage_BotProb,S_TRNTY_Storage_TopProb,S_TRNTY_Storage_MidProb,S_TRNTY_Storage_LowProb,S_TRNTY_Storage_BotProb,S_OROVL_Storage_TopProb,S_OROVL_Storage_MidProb,...,S_MLRTN_Storage_LowProb,S_MLRTN_Storage_BotProb,S_SLUIS_CVP_Storage_TopProb,S_SLUIS_CVP_Storage_MidProb,S_SLUIS_CVP_Storage_LowProb,S_SLUIS_CVP_Storage_BotProb,S_SLUIS_SWP_Storage_TopProb,S_SLUIS_SWP_Storage_MidProb,S_SLUIS_SWP_Storage_LowProb,S_SLUIS_SWP_Storage_BotProb
Scenario,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
s0001,0.55,0.25,0.06,0.14,0.42,0.1,0.24,0.24,0.17,0.51,...,0.15,0.53,0.56,0.04,0.22,0.18,0.31,0.09,0.04,0.56
s0002,0.5,0.23,0.11,0.16,0.4,0.09,0.22,0.29,0.15,0.51,...,0.14,0.54,0.52,0.07,0.16,0.25,0.33,0.08,0.08,0.51
s0003,0.39,0.25,0.12,0.24,0.42,0.08,0.19,0.31,0.1,0.43,...,0.07,0.23,0.41,0.11,0.1,0.38,0.32,0.06,0.05,0.57
s0004,0.34,0.23,0.14,0.29,0.38,0.09,0.14,0.39,0.08,0.42,...,0.1,0.21,0.38,0.11,0.09,0.42,0.31,0.08,0.08,0.53
s0005,0.31,0.21,0.15,0.33,0.33,0.13,0.1,0.44,0.08,0.41,...,0.1,0.23,0.37,0.07,0.1,0.46,0.3,0.09,0.07,0.54
s0006,0.55,0.2,0.06,0.13,0.42,0.1,0.19,0.23,0.38,0.3,...,0.07,0.45,0.5,0.14,0.23,0.07,0.28,0.07,0.05,0.54
s0007,0.55,0.21,0.06,0.12,0.42,0.1,0.18,0.24,0.38,0.29,...,0.07,0.45,0.52,0.11,0.26,0.05,0.53,0.07,0.06,0.28
s0008,0.55,0.21,0.06,0.12,0.42,0.1,0.19,0.23,0.38,0.29,...,0.07,0.45,0.54,0.11,0.2,0.09,0.58,0.07,0.06,0.23
s0009,0.54,0.22,0.05,0.13,0.41,0.11,0.19,0.23,0.38,0.29,...,0.07,0.45,0.55,0.11,0.2,0.08,0.62,0.05,0.08,0.19
s0010,0.54,0.22,0.05,0.13,0.41,0.11,0.19,0.23,0.38,0.29,...,0.07,0.45,0.55,0.12,0.19,0.08,0.62,0.05,0.09,0.18


In [9]:
print("Done!")

Done!
