Import

In [None]:
import os
from contextlib import redirect_stdout

import sys
sys.path.append('./coeqwalpackage')

import numpy as np
import pandas as pd
import datetime as dt
import re
from coeqwalpackage.metrics import *
import cqwlutils as cu
import plotting as pu
from collections import OrderedDict

### Initialize

In [None]:
CtrlFile = 'CalSim3DataExtractionInitFile_v4.xlsx'
CtrlTab = 'Init'

ScenarioListFile, ScenarioListTab, ScenarioListPath, DVDssNamesOutPath, SVDssNamesOutPath, ScenarioIndicesOutPath, DssDirsOutPath, VarListPath, VarListFile, VarListTab, VarOutPath, DataOutPath, ConvertDataOutPath, ExtractionSubPath, DemandDeliverySubPath, ModelSubPath, GroupDataDirPath, ScenarioDir, DVDssMin, DVDssMax, SVDssMin, SVDssMax, NameMin, NameMax, DirMin, DirMax, IndexMin, IndexMax, StartMin, StartMax, EndMin, EndMax, VarMin, VarMax, DemandFilePath, DemandFileName, DemandFileTab, DemMin, DemMax, InflowOutSubPath, InflowFilePath, InflowFileName, InflowFileTab, InflowMin, InflowMax = cu.read_init_file(CtrlFile, CtrlTab)

### Read scenario indices

In [None]:
indexhdr, index_name = cu.read_from_excel(ScenarioListPath, ScenarioListTab, IndexMin, IndexMax, hdr=True)
index_names = []
for i in range(len(index_name)):
    index_names.append(index_name[i][0])
index_names

### Read dataset

In [None]:
df, dss_names = read_in_df(ConvertDataOutPath,DVDssNamesOutPath)
df = add_water_year_column(df)
df

### Define variables

In [None]:
in_delta_vars = ["EM_EC_MONTH", "JP_EC_MONTH"]
export_vars = ["TRACYEC_MAX14DAY", "BANKSEC_MAX14DAY"]
indelta_thresholds={"Top": 2500, "Mid": 1600, "Low": 900}
export_thresholds={"Top": 2500, "Mid": 1600, "Low": 900}
indelta_station_list=["EM", "JP"]
export_station_list=["BANKSEC", "TRACYEC"]
indelta_rules = OrderedDict([
    (1, {"LT_A": 0.75, "LT_B": None, "GT_C": 0.05}),
    (2, {"LT_A": 0.65, "LT_B": 0.75, "GT_C": 0.12}),
    (3, {"LT_A": 0.55, "LT_B": 0.65, "GT_C": 0.20}),
])
x2 = 'X2_PRV_KM'
compliance_points_indelta = ["EM_EC_MONTH", "JP_EC_MONTH", "RS_EC_MONTH", "CO_EC_MONTH"]
compliance_points_export = ["BANKSEC", "TRACYEC"]

### Subset data set

In [None]:
in_delta_df = create_subset_list(df, in_delta_vars)
in_delta_df

In [None]:
export_df = create_subset_list(df, export_vars)
export_df

### X2

In [None]:
# Annual Average
april_x2_ann_avg = compute_annual_means(df, x2, units="KM", months=[4])
september_x2_ann_avg = compute_annual_means(df, x2, units="KM", months=[9])

# Annual CV
april_x2_ann_cv = compute_cv(df, x2, "April_X2_CV", months=[4], units="KM")
april_x2_ann_cv.index.name = 'Scenario'
september_x2_ann_cv = compute_cv(df, x2, "September_X2_CV", months=[9], units="KM")
september_x2_ann_cv.index.name = 'Scenario'

In [None]:
# Clean up dataframes for export
april_x2_ann_avg.columns = 'AprilX2_' + april_x2_ann_avg.columns.get_level_values('B').str.split('_').str[3]
april_x2_ann_avg.columns.name = None

september_x2_ann_avg.columns = 'SeptemberX2_' + september_x2_ann_avg.columns.get_level_values('B').str.split('_').str[3]
september_x2_ann_avg.columns.name = None

### Salinity at compliance points

In [None]:
salinity_compliance_points_indelta_df = create_subset_list(df, compliance_points_indelta)
salinity_compliance_points_export_df = create_subset_list(df, compliance_points_export)

In [None]:
# Clean up dataframes for export
salinity_compliance_points_indelta_df.columns = salinity_compliance_points_indelta_df.columns.get_level_values('B')
salinity_compliance_points_indelta_df.columns.name = None

salinity_compliance_points_export_df.columns = salinity_compliance_points_export_df.columns.get_level_values('B')
salinity_compliance_points_export_df.columns.name = None

### Tier calculation functions

In [None]:
def calc_indelta_tier(
    df,
    scenID,
    stations=["EM_EC_MONTH", "JP_EC_MONTH"],
    thresholds={"Top": 2500, "Mid": 1600, "Low": 900},
    tier_rules=OrderedDict([
        (1, {"LT_A": 0.75, "LT_B": None, "GT_C": 0.05}),
        (2, {"LT_A": 0.65, "LT_B": 0.75, "GT_C": 0.12}),
        (3, {"LT_A": 0.55, "LT_B": 0.65, "GT_C": 0.20}),
    ])
):
    """
    Calculate in-delta tier designation for a given scenario.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe with salinity variables.
    scenID : str
        Scenario identifier.
    in_delta_vars : list of str, optional
        Variables to include (default: ["EM_EC_MONTH", "JP_EC_MONTH"]).
    thresholds : dict, optional
        Thresholds for salinity (default: {"Top": 2500, "Mid": 1600, "Low": 900}).
    tier_rules : dict, optional
        Rules for assigning tiers. Each tier is an ordered dict with keys "LT_A", "LT_B", "GT_C".
        Example (default):
        ([
            (1, {"LT_A": 0.75, "LT_B": None, "GT_C": 0.05}),
            (2, {"LT_A": 0.65, "LT_B": 0.75, "GT_C": 0.12}),
            (3, {"LT_A": 0.55, "LT_B": 0.65, "GT_C": 0.20}),
        ])        
    If no rule matches, returns tier = np.nan.
    """
    import pandas as pd
    idx = pd.IndexSlice

    tA, tB, tC = thresholds["Low"], thresholds["Mid"], thresholds["Top"]

    # get the data for this scenario
    selcols = [c for c in df.columns if scenID in c[1]]
    if len(selcols) < len(in_delta_vars):
        raise ValueError(f"Didn't find the salinity columns for scenario {scenID}")

    thisdat = df.loc[:, selcols]

    # store fractions for each variable
    fracs = {}
    for var in in_delta_vars:
        col = idx[:, f"{var}_{scenID}"]
        values = thisdat.loc[:, col].values

        fracs[var] = {
            "LT_A": sum(values < tA) / len(values),
            "LT_B": sum(values < tB) / len(values),
            "LT_C": sum(values < tC) / len(values),
            "GT_C": sum(values > tC) / len(values),
        }

    # aggregate across vars
    max_GT_C = max(v["GT_C"] for v in fracs.values())
    min_LT_A = min(v["LT_A"] for v in fracs.values())
    min_LT_B = min(v["LT_B"] for v in fracs.values())

    # apply tier rules in order
    for tier, rule in tier_rules.items():
        cond_A = min_LT_A >= rule["LT_A"] if rule["LT_A"] is not None else True
        cond_B = min_LT_B >= rule["LT_B"] if rule["LT_B"] is not None else True
        cond_C = max_GT_C < rule["GT_C"] if rule["GT_C"] is not None else True

        if cond_A and cond_B and cond_C:
            return tier

    # default if no rule matches
    return np.nan

In [None]:
def generate_salinity_tier_assignment_matrix(
    df,
    station_list=["EM", "JP"],
    thresholds={"Top": 2500, "Mid": 1600, "Low": 900},
    start_date="1921-10-01"
):
    def extract_scenario_id(colname):
        name = "_".join(colname) if isinstance(colname, tuple) else str(colname)
        match = re.search(r's\d{4}', name)
        return match.group(0) if match else None

    def extract_station_name(colname):
        name = "_".join(colname) if isinstance(colname, tuple) else str(colname)
        for st in station_list:
            if name.startswith(st + "_") or f"_{st}_" in name:
                return st
        return None

    def assign_tiers_by_scenario(df, date_series):
        tier_rows = []
        scenario_map = {}

        for col in df.columns:
            sid = extract_scenario_id(col)
            station = extract_station_name(col)
            if sid and station:
                scenario_map.setdefault(sid, {})[station] = col

        print(f"Found {len(scenario_map)} scenarios: {list(scenario_map.keys())}")

        for sid, col_dict in scenario_map.items():
            if not all(st in col_dict for st in station_list):
                print(f" Skipping {sid}: missing one or more station columns")
                continue

            df_scenario = pd.DataFrame(
                {st: df[col_dict[st]] for st in station_list},
                index=date_series
            )
            df_scenario["Year"] = df_scenario.index.year

            valid_rows = df_scenario.dropna(subset=station_list)
            if valid_rows.empty:
                print(f" Skipping {sid}: all data is NaN")
                continue

            yearly = valid_rows.groupby("Year")
            valid_years = list(yearly.groups.keys())
            total_years = len(valid_years)

            tier4_flag = False
            tier3_flag = False
            tier3_years_with_1month_over_mid = 0
            tier2_valid_years = 0
            tier1_valid_years = 0
            any_year_exceeds_mid = False

            for year, group in yearly:
                readings = {st: group[st] for st in station_list}

                if any((r > thresholds["Top"]).sum() >= 2 for r in readings.values()):
                    tier4_flag = True
                    break

                if any((r > thresholds["Mid"]).sum() >= 2 for r in readings.values()):
                    tier3_flag = True

                if any((r > thresholds["Mid"]).any() for r in readings.values()):
                    tier3_years_with_1month_over_mid += 1


                if any((r > thresholds["Mid"]).any() for r in readings.values()):
                    any_year_exceeds_mid = True
                else:
                    in_range_counts = [((r >= thresholds["Low"]) & (r <= thresholds["Mid"])).sum() for r in readings.values()]
                    if all(count >= 10 for count in in_range_counts):
                        tier2_valid_years += 1


                if all(((r < thresholds["Low"]).sum() == 12) for r in readings.values()):
                    tier1_valid_years += 1

            if total_years == 0:
                print(f" Scenario {sid}: No valid years with complete data.")
                continue

            if tier4_flag:
                tier = 4
            elif tier3_flag or (tier3_years_with_1month_over_mid / total_years > 0.05):
                tier = 3
            elif not any_year_exceeds_mid and (tier2_valid_years / total_years >= 0.95):
                tier = 2
            elif tier1_valid_years / total_years >= 0.95:
                tier = 1
            else:
                tier = None
                print(f" Scenario {sid} did not match any tier.")
                print(f"   Summary: tier3_flag={tier3_flag}, tier3_pct={tier3_years_with_1month_over_mid / total_years:.2f}, "
                      f"tier2_pct={tier2_valid_years / total_years:.2f}, tier1_pct={tier1_valid_years / total_years:.2f}, "
                      f"any_year_exceeds_mid={any_year_exceeds_mid}")
                continue

            print(f"→ Scenario {sid} assigned Tier {tier}")
            tier_rows.append({
                "Scenario": sid,
                "Salinity_Tier": tier
            })

        return pd.DataFrame(tier_rows, columns=["Scenario", "Salinity_Tier"])

    df = df.copy()
    if not pd.api.types.is_datetime64_any_dtype(df.index):
        df.index = pd.date_range(start=start_date, periods=len(df), freq="MS")

    date_series = df.index
    tier_df = assign_tiers_by_scenario(df, date_series)

    if tier_df.empty:
        print(" No valid scenario-station pairs were found.")
        return pd.DataFrame(columns=["Salinity_Tier"])

    return tier_df.set_index("Scenario")


### In-Delta tier assignment (new version):

In [None]:
tiers = {} #<- dictionary to store results
for scenID in index_names: # iterate through the list of scenario IDs
    # call the function defined above
    tiers[scenID] = calc_indelta_tier(df = in_delta_df, scenID = scenID, stations = in_delta_vars, thresholds =  indelta_thresholds, tier_rules= indelta_rules)
    # print out the progress
    print(f"assigned tier {tiers[scenID]} to scenario {scenID}")

# create a dataframe from the dictionary - index is the scenario id, 
# single column is the tier value
tier_indelta_df = pd.DataFrame.from_dict(tiers, orient='index', columns=['Salinity_Tier']) #, index='ScenarioID')
tier_indelta_df.index.name = 'ScenarioID' #rename the column

In [None]:
tier_indelta_df

### In-Delta tier assignment (old version):

In [None]:
# tier_indeltaold_df = generate_salinity_tier_assignment_matrix(
#     df=in_delta_df,
#     station_list=indelta_station_list,
#     thresholds=indelta_thresholds,
#     start_date="1921-10-01"
# )

In [None]:
# tier_indeltaold_df

### Export tier assignment:

In [None]:
tier_export_df = generate_salinity_tier_assignment_matrix(
    df=export_df,
    station_list=export_station_list,
    thresholds=export_thresholds,
    start_date="1921-10-01"
)

In [None]:
tier_export_df

### Save Files

In [None]:
def find_calsim_base_path(start_path, target_folder="CalSim3_Model_Runs"):
    # Go up twice to reach DSP, then look for sibling folder
    current_path = os.path.abspath(start_path)
    dsp_root = os.path.dirname(os.path.dirname(current_path))  # notebooks → coeqwal → DSP
    candidate = os.path.join(dsp_root, target_folder)
    if os.path.isdir(candidate):
        return candidate
    raise FileNotFoundError(f"{target_folder} not found alongside {dsp_root}")


base_dir = os.path.abspath(".")
calsim_base_path = find_calsim_base_path(base_dir)

salinity_output_dir = os.path.join(
    calsim_base_path,
    "Scenarios",
    "Performance_Metrics",
    "Tiered_Outcome_Measures",
    "Salinity"
)
os.makedirs(salinity_output_dir, exist_ok=True)

april_x2_ann_avg_path = os.path.join(salinity_output_dir, "AprilX2_AnnualAverage.csv")
april_x2_ann_avg.to_csv(april_x2_ann_avg_path, index=True)

april_x2_ann_cv_path = os.path.join(salinity_output_dir, "AprilX2_AnnualCV.csv")
april_x2_ann_cv.to_csv(april_x2_ann_cv_path, index=True)

september_x2_ann_avg_path = os.path.join(salinity_output_dir, "SeptemberX2_AnnualAverage.csv")
september_x2_ann_avg.to_csv(september_x2_ann_avg_path, index=True)

september_x2_ann_cv_path = os.path.join(salinity_output_dir, "SeptemberX2_AnnualCV.csv")
september_x2_ann_cv.to_csv(september_x2_ann_cv_path, index=True)

salinity_compliance_points_indelta_path = os.path.join(salinity_output_dir, "InDeltaSalinity.csv")
salinity_compliance_points_indelta_df.to_csv(salinity_compliance_points_indelta_path, index=True)

salinity_compliance_points_export_path = os.path.join(salinity_output_dir, "ExportSalinity.csv")
salinity_compliance_points_export_df.to_csv(salinity_compliance_points_export_path, index=True)

In [None]:
salinity_output_path = os.path.join(salinity_output_dir, "InDeltaTierAssignment.csv")
tier_indelta_df.to_csv(salinity_output_path, index=True)

salinity_output_path = os.path.join(salinity_output_dir, "ExportTierAssignment.csv")
tier_export_df.to_csv(salinity_output_path, index=True)

In [None]:
print(salinity_output_dir)

In [None]:
print("Done!")