Import

In [1]:
import os
from contextlib import redirect_stdout

import sys
sys.path.append('./coeqwalpackage')

import numpy as np
import pandas as pd
import datetime as dt
import re
from coeqwalpackage.metrics import *
import cqwlutils as cu
import plotting as pu
from collections import OrderedDict

## Paths 

In [2]:
def find_calsim_base_path(start_path, target_folder="CalSim3_Model_Runs"):
    # Go up twice to reach DSP, then look for sibling folder
    current_path = os.path.abspath(start_path)
    dsp_root = os.path.dirname(os.path.dirname(current_path))  # notebooks → coeqwal → DSP
    candidate = os.path.join(dsp_root, target_folder)
    if os.path.isdir(candidate):
        return candidate
    raise FileNotFoundError(f"{target_folder} not found alongside {dsp_root}")

base_dir = os.path.abspath(".")
calsim_base_path = find_calsim_base_path(base_dir)

salinity_tiers_output_dir = os.path.join(
    calsim_base_path,
    "Scenarios",
    "Performance_Metrics",
    "Tiered_Outcome_Measures",
    "Salinity",
    "Tiers"
)

salinity_data_output_dir = os.path.join(
    calsim_base_path,
    "Scenarios",
    "Performance_Metrics",
    "Metrics",
    "Salinity"
)
os.makedirs(salinity_tiers_output_dir, exist_ok=True)
os.makedirs(salinity_data_output_dir, exist_ok=True)


## File names

In [3]:
april_x2_ann_avg_path = os.path.join(salinity_data_output_dir, "AprilX2_AnnualAverage.csv")
april_x2_ann_cv_path = os.path.join(salinity_data_output_dir, "AprilX2_AnnualCV.csv")
september_x2_ann_avg_path = os.path.join(salinity_data_output_dir, "SeptemberX2_AnnualAverage.csv")
september_x2_ann_cv_path = os.path.join(salinity_data_output_dir, "SeptemberX2_AnnualCV.csv")
salinity_compliance_points_indelta_path = os.path.join(salinity_data_output_dir, "InDeltaSalinity.csv")
salinity_compliance_points_export_path = os.path.join(salinity_data_output_dir, "ExportSalinity.csv")
indelta_salinity_output_path = os.path.join(salinity_tiers_output_dir, "InDeltaTierAssignment.csv")
export_salinity_output_path = os.path.join(salinity_tiers_output_dir, "ExportTierAssignment.csv")


### Initialize

In [4]:
CtrlFile = 'CalSim3DataExtractionInitFile_v4.xlsx'
CtrlTab = 'Init'

ScenarioListFile, ScenarioListTab, ScenarioListPath, DVDssNamesOutPath, SVDssNamesOutPath, ScenarioIndicesOutPath, DssDirsOutPath, VarListPath, VarListFile, VarListTab, VarOutPath, DataOutPath, ConvertDataOutPath, ExtractionSubPath, DemandDeliverySubPath, ModelSubPath, GroupDataDirPath, ScenarioDir, DVDssMin, DVDssMax, SVDssMin, SVDssMax, NameMin, NameMax, DirMin, DirMax, IndexMin, IndexMax, StartMin, StartMax, EndMin, EndMax, VarMin, VarMax, DemandFilePath, DemandFileName, DemandFileTab, DemMin, DemMax, InflowOutSubPath, InflowFilePath, InflowFileName, InflowFileTab, InflowMin, InflowMax = cu.read_init_file(CtrlFile, CtrlTab)

### Read scenario indices

In [5]:
indexhdr, index_name = cu.read_from_excel(ScenarioListPath, ScenarioListTab, IndexMin, IndexMax, hdr=True)
index_names = []
for i in range(len(index_name)):
    index_names.append(index_name[i][0])
index_names

['s0001',
 's0002',
 's0003',
 's0004',
 's0005',
 's0006',
 's0007',
 's0008',
 's0009',
 's0010',
 's0011',
 's0012',
 's0013',
 's0014',
 's0015',
 's0016',
 's0018',
 's0019',
 's0020',
 's0021',
 's0022',
 's0023',
 's0024',
 's0025',
 's0027',
 's0029',
 's0030',
 's0039',
 's0042',
 's0046',
 's0047',
 's0051',
 's0056',
 's0062',
 's0063']

### Read dataset

In [6]:
df, dss_names = read_in_df(ConvertDataOutPath,DVDssNamesOutPath)
df = add_water_year_column(df)
df

  return df_copy.drop(["Date", "Year", "Month"], axis=1)


A,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,...,CALCULATED,CALCULATED,CALCULATED,CALCULATED,CALCULATED,CALCULATED,CALCULATED,CALCULATED,CALCULATED,WaterYear
B,AWOANN_64_XADV_s0001,AWOANN_72_XA1DV_s0001,AWOANN_72_XA2DV_s0001,AWOANN_72_XA3DV_s0001,AWOANN_73_XADV_s0001,BANKSEC_MAX14DAY_s0001,COREQSACDV_s0001,CO_EC_MONTH_s0001,C_AMR004_s0001,C_AMR004_ADD_s0001,...,DEL_SOD_AG_s0063,DEL_SOD_MI_s0063,DEL_NOD_MI_s0063,TOTAL_EXPORTS_s0063,DEL_CVP_TOTAL_s0063,DEL_CVPSWP_TOTAL_s0063,DEL_CVP_PAG_TOTAL_s0063,DEL_CVP_PSCEX_TOTAL_s0063,DEL_CVP_PRF_TOTAL_s0063,Unnamed: 21_level_1
C,ANNUAL-APPLIED-WATER,ANNUAL-APPLIED-WATER,ANNUAL-APPLIED-WATER,ANNUAL-APPLIED-WATER,ANNUAL-APPLIED-WATER,SALINITY-APPROX,FLOW,SALINITY,CHANNEL,FLOW-ADDITIONAL-INSTREAM,...,DELIVERY-CALC,DELIVERY-CALC,DELIVERY-CALC,EXPORTS-CALC,DELIVERY-CALC,DELIVERY-CALC,DELIVERY-CALC,DELIVERY-CALC,DELIVERY-CALC,Unnamed: 21_level_2
D,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,...,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,Unnamed: 21_level_3
E,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,...,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,Unnamed: 21_level_4
F,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,...,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,Unnamed: 21_level_5
Units,TAF,TAF,TAF,TAF,TAF,UMHOS/CM,CFS,UMHOS/CM,CFS,CFS,...,TAF,TAF,TAF,TAF,TAF,TAF,TAF,TAF,TAF,Unnamed: 21_level_6
1921-10-31,59.04970,136.83063,331.60556,128.51282,65.573940,284.73438,0.0000,2173.8489,2164.72950,1664.729500,...,246.803854,299.437513,13.164266,200.569021,393.096087,639.899978,165.176618,112.716413,94.618369,1922
1921-11-30,59.04970,136.83063,331.60556,128.51282,65.573940,281.05875,0.0000,2621.5984,2206.79600,1706.795900,...,52.768843,306.344024,12.206396,571.304029,97.698616,150.467459,14.390856,13.762068,48.402718,1922
1921-12-31,59.04970,136.83063,331.60556,128.51282,65.573940,376.29092,4017.6848,5590.7207,2024.89980,1524.899800,...,55.886206,287.185031,8.269689,501.429565,81.315318,137.201524,30.843228,5.839966,23.663360,1922
1922-01-31,59.04970,136.83063,331.60556,128.51282,65.573940,623.95900,0.0000,10744.2570,1704.54830,1454.548300,...,37.640688,41.292789,7.812246,446.810163,75.163374,112.804062,20.581829,14.773176,21.941681,1922
1922-02-28,59.04970,136.83063,331.60556,128.51282,65.573940,416.49713,0.0000,3148.9802,5005.48340,4755.483400,...,15.771729,37.389351,7.360072,521.281358,44.724558,60.496284,9.339586,3.413107,16.061511,1922
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-31,60.19477,140.76320,343.78314,130.04979,69.374054,393.38092,0.0000,4102.8306,645.44150,380.476620,...,93.062250,22.457865,21.318074,67.324400,445.562997,538.625216,14.376823,392.908992,10.777886,2021
2021-06-30,60.19477,140.76320,343.78314,130.04979,69.374054,383.89062,0.0000,6367.1660,1839.48110,1574.516100,...,113.279773,26.234891,25.974124,88.781996,455.382520,568.662353,10.325558,395.408740,11.337964,2021
2021-07-31,60.19477,140.76320,343.78314,130.04979,69.374054,446.86966,0.0000,8060.9453,1755.98750,1491.022700,...,137.978670,27.306032,27.190712,92.256000,520.294872,658.273499,10.236709,456.045518,10.687415,2021
2021-08-31,60.19477,140.76320,343.78314,130.04979,69.374054,464.96625,0.0000,8370.9795,1914.82500,1649.860000,...,78.763480,28.757776,25.332778,100.732672,357.876609,436.640083,7.713760,304.033953,9.119533,2021


### Define variables

In [7]:
in_delta_vars = ["EM_EC_MONTH", "JP_EC_MONTH"]
export_vars = ["TRACYEC_MAX14DAY", "BANKSEC_MAX14DAY"]
indelta_thresholds={"Top": 2500, "Mid": 1600, "Low": 900}
export_thresholds={"Top": 2500, "Mid": 1600, "Low": 900}
indelta_station_list=["EM", "JP"]
export_station_list=["BANKSEC", "TRACYEC"]
indelta_rules = OrderedDict([
    (1, {"LT_A": 0.75, "LT_B": None, "GT_C": 0.05}),
    (2, {"LT_A": 0.65, "LT_B": 0.75, "GT_C": 0.12}),
    (3, {"LT_A": 0.55, "LT_B": 0.65, "GT_C": 0.20}),
])
x2 = 'X2_PRV_KM'
compliance_points_indelta = ["EM_EC_MONTH", "JP_EC_MONTH", "RS_EC_MONTH", "CO_EC_MONTH"]
compliance_points_export = ["BANKSEC", "TRACYEC"]

### Subset data set

In [8]:
in_delta_df = create_subset_list(df, in_delta_vars)
in_delta_df

A,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM
B,EM_EC_MONTH_s0001,JP_EC_MONTH_s0001,EM_EC_MONTH_s0002,JP_EC_MONTH_s0002,EM_EC_MONTH_s0003,JP_EC_MONTH_s0003,EM_EC_MONTH_s0004,JP_EC_MONTH_s0004,EM_EC_MONTH_s0005,JP_EC_MONTH_s0005,...,EM_EC_MONTH_s0047,JP_EC_MONTH_s0047,EM_EC_MONTH_s0051,JP_EC_MONTH_s0051,EM_EC_MONTH_s0056,JP_EC_MONTH_s0056,EM_EC_MONTH_s0062,JP_EC_MONTH_s0062,EM_EC_MONTH_s0063,JP_EC_MONTH_s0063
C,SALINITY,SALINITY,SALINITY,SALINITY,SALINITY,SALINITY,SALINITY,SALINITY,SALINITY,SALINITY,...,SALINITY,SALINITY,SALINITY,SALINITY,SALINITY,SALINITY,SALINITY,SALINITY,SALINITY,SALINITY
D,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,...,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON
E,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,...,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A
F,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,...,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER
Units,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,...,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM
1921-10-31,420.35240,308.33957,420.35240,308.33957,378.06560,269.90536,403.16763,253.26294,403.16763,253.26294,...,378.06560,269.90536,378.06560,269.90536,403.16763,253.26294,378.06560,269.90536,378.06560,269.90536
1921-11-30,561.92170,298.96088,560.35170,299.63480,478.07477,248.80586,436.82724,245.62279,433.17044,246.07486,...,483.56130,250.49168,297.15817,222.52359,435.04657,246.98073,486.28450,250.52190,474.33484,251.75038
1921-12-31,1132.24480,1211.59310,1200.42440,1283.97570,1018.15460,836.14660,998.01874,863.27500,1009.44750,866.01990,...,1047.98740,845.89764,734.70776,539.09840,1023.14557,865.28357,1045.47830,851.51337,1055.86150,856.56335
1922-01-31,713.35815,1374.92550,724.70953,1500.18680,379.06232,840.27026,338.66785,742.52374,380.75513,871.08923,...,387.52400,875.51654,361.37730,748.34940,385.75858,876.94510,388.06503,874.85110,413.50037,909.57635
1922-02-28,359.12146,654.43500,345.48660,656.80850,214.80246,287.58038,243.60349,300.92972,268.10992,347.91302,...,217.32722,290.82214,208.26457,284.65042,274.61520,356.35434,217.31633,291.19208,233.41467,305.95435
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-31,849.52420,517.29944,838.77576,506.96332,875.91693,493.00034,1032.20580,498.76810,1138.69620,588.47820,...,2313.99680,874.16900,910.20636,501.19516,2624.86080,1078.36300,2353.72560,872.90210,2474.64480,902.98770
2021-06-30,1616.85250,842.68225,1669.95700,872.56220,2112.61400,846.22380,2514.34940,973.80164,2580.97020,1075.26390,...,2710.41850,1023.69990,1932.40280,727.71850,1452.52800,861.85596,2644.41040,1009.74084,2476.85450,965.92190
2021-07-31,2524.26300,1285.16800,2613.50560,1316.08480,2823.19630,1496.07540,2750.93070,1495.42990,2718.73780,1464.44200,...,3284.37900,1571.02190,2819.55740,1425.52840,2801.52200,946.54400,3335.29170,1572.84330,2884.49460,1386.73010
2021-08-31,2431.64900,1367.02320,2499.81080,1346.60570,2629.35770,1401.00840,2567.57520,1231.75630,2655.68460,1206.59100,...,2873.74500,1706.55320,2690.18800,1501.77270,3365.83300,1411.34890,2882.13570,1717.47800,3084.61450,1699.88490


In [9]:
export_df = create_subset_list(df, export_vars)
export_df

A,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM,CALSIM
B,BANKSEC_MAX14DAY_s0001,TRACYEC_MAX14DAY_s0001,BANKSEC_MAX14DAY_s0002,TRACYEC_MAX14DAY_s0002,BANKSEC_MAX14DAY_s0003,TRACYEC_MAX14DAY_s0003,BANKSEC_MAX14DAY_s0004,TRACYEC_MAX14DAY_s0004,BANKSEC_MAX14DAY_s0005,TRACYEC_MAX14DAY_s0005,...,BANKSEC_MAX14DAY_s0047,TRACYEC_MAX14DAY_s0047,BANKSEC_MAX14DAY_s0051,TRACYEC_MAX14DAY_s0051,BANKSEC_MAX14DAY_s0056,TRACYEC_MAX14DAY_s0056,BANKSEC_MAX14DAY_s0062,TRACYEC_MAX14DAY_s0062,BANKSEC_MAX14DAY_s0063,TRACYEC_MAX14DAY_s0063
C,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,...,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX,SALINITY-APPROX
D,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,...,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON,1MON
E,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,...,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A,L2020A
F,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,...,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER,PER-AVER
Units,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,...,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM,UMHOS/CM
1921-10-31,284.73438,357.93190,284.73438,357.93190,329.17500,394.31543,360.45148,419.92150,360.45148,419.92150,...,329.17500,394.31543,329.17500,394.31543,360.45148,419.92150,329.17500,394.31543,329.17500,394.31543
1921-11-30,281.05875,354.92264,280.69608,354.62573,296.69278,367.72226,358.37662,418.22280,356.22980,416.46520,...,297.14044,368.08875,293.37952,365.00967,353.59570,414.30870,298.37338,369.09814,290.73495,362.84457
1921-12-31,376.29092,432.88925,400.25623,452.50967,411.21050,461.47794,345.13815,407.38450,343.65073,406.16672,...,407.54718,458.47876,291.39407,363.38420,343.75415,406.25140,412.18370,462.27466,408.64716,459.37933
1922-01-31,623.95900,635.65515,686.99084,687.25934,565.80280,588.04266,447.98657,491.58650,460.42834,501.77258,...,567.32446,589.28845,411.23196,461.49550,462.33432,503.33300,572.61080,593.61633,580.52167,600.09296
1922-02-28,416.49713,465.80610,419.23160,468.04480,298.34177,369.07227,297.62354,368.48425,328.37253,393.65848,...,298.46136,369.17017,297.25986,368.18652,329.90387,394.91217,298.93350,369.55673,301.95110,372.02725
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-31,393.38092,446.88086,395.85843,448.90918,366.52634,424.89500,382.08102,437.62964,381.19556,436.90470,...,388.65176,443.00910,377.07236,433.52902,446.42690,490.30960,389.50647,443.70883,392.83798,446.43634
2021-06-30,383.89062,439.11115,383.63202,438.89940,373.53330,430.63160,417.02010,466.23425,417.58517,466.69687,...,465.07578,505.57742,364.19476,422.98615,514.09357,545.70830,466.80276,506.99130,463.98532,504.68466
2021-07-31,446.86966,490.67210,459.99188,501.41525,482.44656,519.79890,537.46344,564.84125,528.66693,557.63950,...,519.45380,550.09674,466.37808,506.64365,414.26834,463.98138,521.57750,551.83540,480.77103,518.42710
2021-08-31,464.96625,505.48776,473.36250,512.36176,493.06670,528.49360,493.62520,528.95087,490.47433,526.37120,...,575.02310,595.59130,485.32825,522.15814,467.29202,507.39188,589.30286,607.28217,565.28735,587.62067


### X2

In [10]:
# Annual Average
april_x2_ann_avg = compute_annual_means(df, x2, units="KM", months=[4])
september_x2_ann_avg = compute_annual_means(df, x2, units="KM", months=[9])

# Annual CV
april_x2_ann_cv = compute_cv(df, x2, "April_X2_CV", months=[4], units="KM")
april_x2_ann_cv.index.name = 'Scenario'
september_x2_ann_cv = compute_cv(df, x2, "September_X2_CV", months=[9], units="KM")
september_x2_ann_cv.index.name = 'Scenario'

In [11]:
# Clean up dataframes for export
april_x2_ann_avg.columns = 'AprilX2_' + april_x2_ann_avg.columns.get_level_values('B').str.split('_').str[3]
april_x2_ann_avg.columns.name = None

september_x2_ann_avg.columns = 'SeptemberX2_' + september_x2_ann_avg.columns.get_level_values('B').str.split('_').str[3]
september_x2_ann_avg.columns.name = None

### Salinity at compliance points

In [12]:
salinity_compliance_points_indelta_df = create_subset_list(df, compliance_points_indelta)
salinity_compliance_points_export_df = create_subset_list(df, compliance_points_export)

In [13]:
# Clean up dataframes for export
salinity_compliance_points_indelta_df.columns = salinity_compliance_points_indelta_df.columns.get_level_values('B')
salinity_compliance_points_indelta_df.columns.name = None

salinity_compliance_points_export_df.columns = salinity_compliance_points_export_df.columns.get_level_values('B')
salinity_compliance_points_export_df.columns.name = None

### Tier calculation functions

In [14]:
def calc_indelta_tier(
    df,
    scenID,
    stations=["EM_EC_MONTH", "JP_EC_MONTH"],
    thresholds={"Top": 2500, "Mid": 1600, "Low": 900},
    tier_rules=OrderedDict([
        (1, {"LT_A": 0.75, "LT_B": None, "GT_C": 0.05}),
        (2, {"LT_A": 0.65, "LT_B": 0.75, "GT_C": 0.12}),
        (3, {"LT_A": 0.55, "LT_B": 0.65, "GT_C": 0.20}),
    ])
):
    """
    Calculate in-delta tier designation for a given scenario.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe with salinity variables.
    scenID : str
        Scenario identifier.
    in_delta_vars : list of str, optional
        Variables to include (default: ["EM_EC_MONTH", "JP_EC_MONTH"]).
    thresholds : dict, optional
        Thresholds for salinity (default: {"Top": 2500, "Mid": 1600, "Low": 900}).
    tier_rules : dict, optional
        Rules for assigning tiers. Each tier is an ordered dict with keys "LT_A", "LT_B", "GT_C".
        Example (default):
        ([
            (1, {"LT_A": 0.75, "LT_B": None, "GT_C": 0.05}),
            (2, {"LT_A": 0.65, "LT_B": 0.75, "GT_C": 0.12}),
            (3, {"LT_A": 0.55, "LT_B": 0.65, "GT_C": 0.20}),
        ])        
    If no rule matches, returns tier = np.nan.
    """
    import pandas as pd
    idx = pd.IndexSlice

    tA, tB, tC = thresholds["Low"], thresholds["Mid"], thresholds["Top"]

    # get the data for this scenario
    selcols = [c for c in df.columns if scenID in c[1]]
    if len(selcols) < len(in_delta_vars):
        raise ValueError(f"Didn't find the salinity columns for scenario {scenID}")

    thisdat = df.loc[:, selcols]

    # store fractions for each variable
    fracs = {}
    for var in in_delta_vars:
        col = idx[:, f"{var}_{scenID}"]
        values = thisdat.loc[:, col].values

        fracs[var] = {
            "LT_A": sum(values < tA) / len(values),
            "LT_B": sum(values < tB) / len(values),
            "LT_C": sum(values < tC) / len(values),
            "GT_C": sum(values > tC) / len(values),
        }

    # aggregate across vars
    max_GT_C = max(v["GT_C"] for v in fracs.values())
    min_LT_A = min(v["LT_A"] for v in fracs.values())
    min_LT_B = min(v["LT_B"] for v in fracs.values())

    # apply tier rules in order
    for tier, rule in tier_rules.items():
        cond_A = min_LT_A >= rule["LT_A"] if rule["LT_A"] is not None else True
        cond_B = min_LT_B >= rule["LT_B"] if rule["LT_B"] is not None else True
        cond_C = max_GT_C < rule["GT_C"] if rule["GT_C"] is not None else True

        if cond_A and cond_B and cond_C:
            return tier

    # default if no rule matches
    return np.nan

In [15]:
def generate_salinity_tier_assignment_matrix(
    df,
    station_list=["EM", "JP"],
    thresholds={"Top": 2500, "Mid": 1600, "Low": 900},
    start_date="1921-10-01"
):
    def extract_scenario_id(colname):
        name = "_".join(colname) if isinstance(colname, tuple) else str(colname)
        match = re.search(r's\d{4}', name)
        return match.group(0) if match else None

    def extract_station_name(colname):
        name = "_".join(colname) if isinstance(colname, tuple) else str(colname)
        for st in station_list:
            if name.startswith(st + "_") or f"_{st}_" in name:
                return st
        return None

    def assign_tiers_by_scenario(df, date_series):
        tier_rows = []
        scenario_map = {}

        for col in df.columns:
            sid = extract_scenario_id(col)
            station = extract_station_name(col)
            if sid and station:
                scenario_map.setdefault(sid, {})[station] = col

        print(f"Found {len(scenario_map)} scenarios: {list(scenario_map.keys())}")

        for sid, col_dict in scenario_map.items():
            if not all(st in col_dict for st in station_list):
                print(f" Skipping {sid}: missing one or more station columns")
                continue

            df_scenario = pd.DataFrame(
                {st: df[col_dict[st]] for st in station_list},
                index=date_series
            )
            df_scenario["Year"] = df_scenario.index.year

            valid_rows = df_scenario.dropna(subset=station_list)
            if valid_rows.empty:
                print(f" Skipping {sid}: all data is NaN")
                continue

            yearly = valid_rows.groupby("Year")
            valid_years = list(yearly.groups.keys())
            total_years = len(valid_years)

            tier4_flag = False
            tier3_flag = False
            tier3_years_with_1month_over_mid = 0
            tier2_valid_years = 0
            tier1_valid_years = 0
            any_year_exceeds_mid = False

            for year, group in yearly:
                readings = {st: group[st] for st in station_list}

                if any((r > thresholds["Top"]).sum() >= 2 for r in readings.values()):
                    tier4_flag = True
                    break

                if any((r > thresholds["Mid"]).sum() >= 2 for r in readings.values()):
                    tier3_flag = True

                if any((r > thresholds["Mid"]).any() for r in readings.values()):
                    tier3_years_with_1month_over_mid += 1


                if any((r > thresholds["Mid"]).any() for r in readings.values()):
                    any_year_exceeds_mid = True
                else:
                    in_range_counts = [((r >= thresholds["Low"]) & (r <= thresholds["Mid"])).sum() for r in readings.values()]
                    if all(count >= 10 for count in in_range_counts):
                        tier2_valid_years += 1


                if all(((r < thresholds["Low"]).sum() == 12) for r in readings.values()):
                    tier1_valid_years += 1

            if total_years == 0:
                print(f" Scenario {sid}: No valid years with complete data.")
                continue

            if tier4_flag:
                tier = 4
            elif tier3_flag or (tier3_years_with_1month_over_mid / total_years > 0.05):
                tier = 3
            elif not any_year_exceeds_mid and (tier2_valid_years / total_years >= 0.95):
                tier = 2
            elif tier1_valid_years / total_years >= 0.95:
                tier = 1
            else:
                tier = None
                print(f" Scenario {sid} did not match any tier.")
                print(f"   Summary: tier3_flag={tier3_flag}, tier3_pct={tier3_years_with_1month_over_mid / total_years:.2f}, "
                      f"tier2_pct={tier2_valid_years / total_years:.2f}, tier1_pct={tier1_valid_years / total_years:.2f}, "
                      f"any_year_exceeds_mid={any_year_exceeds_mid}")
                continue

            print(f"→ Scenario {sid} assigned Tier {tier}")
            tier_rows.append({
                "Scenario": sid,
                "Salinity_Tier": tier
            })

        return pd.DataFrame(tier_rows, columns=["Scenario", "Salinity_Tier"])

    df = df.copy()
    if not pd.api.types.is_datetime64_any_dtype(df.index):
        df.index = pd.date_range(start=start_date, periods=len(df), freq="MS")

    date_series = df.index
    tier_df = assign_tiers_by_scenario(df, date_series)

    if tier_df.empty:
        print(" No valid scenario-station pairs were found.")
        return pd.DataFrame(columns=["Salinity_Tier"])

    return tier_df.set_index("Scenario")


### In-Delta tier assignment (new version):

In [16]:
tiers = {} #<- dictionary to store results
for scenID in index_names: # iterate through the list of scenario IDs
    # call the function defined above
    tiers[scenID] = calc_indelta_tier(df = in_delta_df, scenID = scenID, stations = in_delta_vars, thresholds =  indelta_thresholds, tier_rules= indelta_rules)
    # print out the progress
    print(f"assigned tier {tiers[scenID]} to scenario {scenID}")

# create a dataframe from the dictionary - index is the scenario id, 
# single column is the tier value
tier_indelta_df = pd.DataFrame.from_dict(tiers, orient='index', columns=['Salinity_Tier']) #, index='ScenarioID')
tier_indelta_df.index.name = 'ScenarioID' #rename the column

assigned tier 2 to scenario s0001
assigned tier 2 to scenario s0002
assigned tier 3 to scenario s0003
assigned tier 3 to scenario s0004
assigned tier 3 to scenario s0005
assigned tier 2 to scenario s0006
assigned tier 2 to scenario s0007
assigned tier 2 to scenario s0008
assigned tier 2 to scenario s0009
assigned tier 2 to scenario s0010
assigned tier 2 to scenario s0011
assigned tier 2 to scenario s0012
assigned tier 3 to scenario s0013
assigned tier 3 to scenario s0014
assigned tier 2 to scenario s0015
assigned tier 3 to scenario s0016
assigned tier 1 to scenario s0018
assigned tier 2 to scenario s0019
assigned tier 2 to scenario s0020
assigned tier 2 to scenario s0021
assigned tier 2 to scenario s0022
assigned tier 2 to scenario s0023
assigned tier 3 to scenario s0024
assigned tier 2 to scenario s0025
assigned tier 2 to scenario s0027
assigned tier 1 to scenario s0029
assigned tier 1 to scenario s0030
assigned tier 1 to scenario s0039
assigned tier 1 to scenario s0042
assigned tier 

In [17]:
tier_indelta_df

Unnamed: 0_level_0,Salinity_Tier
ScenarioID,Unnamed: 1_level_1
s0001,2
s0002,2
s0003,3
s0004,3
s0005,3
s0006,2
s0007,2
s0008,2
s0009,2
s0010,2


### In-Delta tier assignment (old version):

In [18]:
# tier_indeltaold_df = generate_salinity_tier_assignment_matrix(
#     df=in_delta_df,
#     station_list=indelta_station_list,
#     thresholds=indelta_thresholds,
#     start_date="1921-10-01"
# )

In [19]:
# tier_indeltaold_df

### Export tier assignment:

In [20]:
tier_export_df = generate_salinity_tier_assignment_matrix(
    df=export_df,
    station_list=export_station_list,
    thresholds=export_thresholds,
    start_date="1921-10-01"
)

Found 35 scenarios: ['s0001', 's0002', 's0003', 's0004', 's0005', 's0006', 's0007', 's0008', 's0009', 's0010', 's0011', 's0012', 's0013', 's0014', 's0015', 's0016', 's0018', 's0019', 's0020', 's0021', 's0022', 's0023', 's0024', 's0025', 's0027', 's0029', 's0030', 's0039', 's0042', 's0046', 's0047', 's0051', 's0056', 's0062', 's0063']
→ Scenario s0001 assigned Tier 1
→ Scenario s0002 assigned Tier 1
→ Scenario s0003 assigned Tier 1
→ Scenario s0004 assigned Tier 1
→ Scenario s0005 assigned Tier 1
→ Scenario s0006 assigned Tier 1
→ Scenario s0007 assigned Tier 1
→ Scenario s0008 assigned Tier 1
→ Scenario s0009 assigned Tier 1
→ Scenario s0010 assigned Tier 1
→ Scenario s0011 assigned Tier 1
→ Scenario s0012 assigned Tier 1
→ Scenario s0013 assigned Tier 1
→ Scenario s0014 assigned Tier 1
→ Scenario s0015 assigned Tier 1
→ Scenario s0016 assigned Tier 1
→ Scenario s0018 assigned Tier 1
→ Scenario s0019 assigned Tier 1
→ Scenario s0020 assigned Tier 1
→ Scenario s0021 assigned Tier 1
→ Sc

In [21]:
tier_export_df

Unnamed: 0_level_0,Salinity_Tier
Scenario,Unnamed: 1_level_1
s0001,1
s0002,1
s0003,1
s0004,1
s0005,1
s0006,1
s0007,1
s0008,1
s0009,1
s0010,1


## Save files

In [22]:
april_x2_ann_avg.to_csv(april_x2_ann_avg_path, index=True)
april_x2_ann_cv.to_csv(april_x2_ann_cv_path, index=True)
september_x2_ann_avg.to_csv(september_x2_ann_avg_path, index=True)
september_x2_ann_cv.to_csv(september_x2_ann_cv_path, index=True)
salinity_compliance_points_indelta_df.to_csv(salinity_compliance_points_indelta_path, index=True)
salinity_compliance_points_export_df.to_csv(salinity_compliance_points_export_path, index=True)
tier_indelta_df.to_csv(indelta_salinity_output_path, index=True)
tier_export_df.to_csv(export_salinity_output_path, index=True)

In [23]:
print(salinity_tiers_output_dir)
print(salinity_data_output_dir)

C:\Dino\COEQWAL\COEQWAL_GIT\CalSim3_Model_Runs\Scenarios\Performance_Metrics\Tiered_Outcome_Measures\Salinity\Tiers
C:\Dino\COEQWAL\COEQWAL_GIT\CalSim3_Model_Runs\Scenarios\Performance_Metrics\Metrics\Salinity


In [24]:
print("Done!")

Done!
