## Import standard libraries

In [None]:
import os
import sys
# append coeqwal packages to path
sys.path.append('./coeqwalpackage')
import datetime as dt
import pandas as pd
import numpy as np
import cqwlutils as cu
import re
import matplotlib.pyplot as plt
import math
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

%matplotlib inline

## Import custom modules

In [None]:
# Import custom modules - NEED WINDOWS OS (NOTE: I had to run this twice, must check why this happens!)
from coeqwalpackage.DataExtraction import *
from coeqwalpackage.metrics import *


## Define contol file name

In [None]:
CtrlFile = 'CalSim3GroundWaterDataExtractionInitFile_v1.xlsx'
CtrlTab = 'Init'

## Read from control file

In [None]:
ScenarioListFile, ScenarioListTab, ScenarioListPath, GW1DssNamesOutPath, GW2DssNamesOutPath, ScenarioIndicesOutPath, DssDirsOutPath, VarListPath, VarListFile, VarListTab, VarOutPath, DataOutPath, ConvertDataOutPath, ExtractionSubPath, DemandDeliverySubPath, ModelSubPath, GroupDataDirPath, ScenarioDir, GW1DssMin, GW1DssMax, GW2DssMin, GW2DssMax, NameMin, NameMax, DirMin, DirMax, IndexMin, IndexMax, StartMin, StartMax, EndMin, EndMax, VarMin, VarMax, DemandFilePath, DemandFileName, DemandFileTab, DemMin, DemMax, InflowOutSubPath, InflowFilePath, InflowFileName, InflowFileTab, InflowMin, InflowMax = cu.read_init_file(CtrlFile, CtrlTab)

In [None]:
print([ScenarioListFile, ScenarioListTab, ScenarioListPath, GW1DssNamesOutPath, GW2DssNamesOutPath, ScenarioIndicesOutPath, DssDirsOutPath, VarListPath, VarListFile, VarListTab, VarOutPath, DataOutPath, ConvertDataOutPath, ExtractionSubPath, DemandDeliverySubPath, ModelSubPath, GroupDataDirPath, ScenarioDir, GW1DssMin, GW1DssMax, GW2DssMin, GW2DssMax, NameMin, NameMax, DirMin, DirMax, IndexMin, IndexMax, StartMin, StartMax, EndMin, EndMax, VarMin, VarMax, DemandFilePath, DemandFileName, DemandFileTab, DemMin, DemMax, InflowOutSubPath, InflowFilePath, InflowFileName, InflowFileTab, InflowMin, InflowMax])


## Check for output directory and create if necessary (not necessary)

In [None]:
# check if output directory exists
if not os.path.exists(GroupDataDirPath):
    # print warning
    print("Warning: directory " + GroupDataDirPath + " does not exists and will be created")
    
    # Create the directory
    os.makedirs(GroupDataDirPath)


## Define Nan Values

In [None]:
# NaN values as defined by CalSim3
Nan1 = -901
Nan2 = -902

## Read indeces, dss names, directory names, start and end dates, time range (not necessary)

In [None]:
gw1dsshdr, gw1dssname = cu.read_from_excel(ScenarioListPath, ScenarioListTab, GW1DssMin, GW1DssMax, hdr=True)
gw1dss_names = []
for i in range(len(gw1dssname)):
    gw1dss_names.append(gw1dssname[i][0])
gw1dss_names

In [None]:
gw2dsshdr, gw2dssname = cu.read_from_excel(ScenarioListPath, ScenarioListTab, GW2DssMin, GW2DssMax, hdr=True)
gw2dss_names = []
for i in range(len(gw2dssname)):
    gw2dss_names.append(gw2dssname[i][0])
gw2dss_names

In [None]:
indexhdr, index_name = cu.read_from_excel(ScenarioListPath, ScenarioListTab, IndexMin, IndexMax, hdr=True)
index_names = []
for i in range(len(index_name)):
    index_names.append(index_name[i][0])
index_names

In [None]:
studyhdr, study_name = cu.read_from_excel(ScenarioListPath, ScenarioListTab, NameMin, NameMax, hdr=True)
study_names = []
for i in range(len(study_name)):
    study_names.append(study_name[i][0])
study_names

In [None]:
dirhdr, dir_name = cu.read_from_excel(ScenarioListPath, ScenarioListTab, DirMin, DirMax, hdr=True)
dir_names = []
for i in range(len(dir_name)):
    dir_names.append(dir_name[i][0])
dir_names

In [None]:
starthdr, start_date = cu.read_from_excel(ScenarioListPath, ScenarioListTab, StartMin, StartMax, hdr=True)
start_dates = []
for i in range(len(start_date)):
    start_dates.append(start_date[i][0])
datetime_start_dates = pd.to_datetime(start_dates)
# turns out that dss reading library wands a dt datetime, not pd datetime
dt_datetime_start_dates = [dt.to_pydatetime() for dt in datetime_start_dates]


In [None]:
endhdr, end_date = cu.read_from_excel(ScenarioListPath, ScenarioListTab, EndMin, EndMax, hdr=True)
end_dates = []
for i in range(len(end_date)):
    end_dates.append(end_date[i][0])
# turns out that dss reading library wands a dt datetime, not pd datetime
datetime_end_dates = pd.to_datetime(end_dates)
dt_datetime_end_dates = [dt.to_pydatetime() for dt in datetime_end_dates]


In [None]:
min_datetime = min(dt_datetime_start_dates)
print('Min time: ')
print(min_datetime)
max_datetime = max(dt_datetime_end_dates)
print('Max time: ')
print(max_datetime)


## Read variables list (not necessary)

In [None]:
# get vars
hdr, vars = cu.read_from_excel(VarListPath, VarListTab,VarMin,VarMax,hdr=True)
gw1var_df = pd.DataFrame(data=vars, columns=hdr)
gw1var_df

## Read the compund data from CSV to df

In [None]:
# read the dataframe from CSV
print('Reading ' + DataOutPath)
gw1_df, gw1dss_names = read_in_df(DataOutPath,GW1DssNamesOutPath)

In [None]:
print("gw1dss_names:")
gw1dss_names

In [None]:
print("gw1_df:")
gw1_df

## Drop the LT:E999 columns

In [None]:
mask = ~gw1_df.columns.to_frame().apply(lambda col: col.astype(str).str.contains('LT:E999')).any(axis=1)
gw1_df = gw1_df.loc[:, mask.values]

In [None]:
print("new gw1_df:")
gw1_df

## Add water year column to df

In [None]:
def add_water_year_column(df):
    df_copy = df.copy().sort_index()
    df_copy['Date'] = pd.to_datetime(df_copy.index)
    df_copy.loc[:, 'Year'] = df_copy['Date'].dt.year
    df_copy.loc[:, 'Month'] = df_copy['Date'].dt.month
    df_copy.loc[:, 'WaterYear'] = np.where(df_copy['Month'] >= 10, df_copy['Year'] + 1, df_copy['Year'])
    return df_copy.drop(["Date", "Year", "Month"], axis=1)

In [None]:
gw1_df = add_water_year_column(gw1_df)

In [None]:
print("gw1_df with water year column:")
gw1_df

In [None]:
base_dir = os.path.abspath(".")

wresl_path = os.path.join(base_dir, "CalSim3GWregionIndex.wresl")
wba_area_path = os.path.join(base_dir, "CalSim3_WBA.csv")

# Read the area data (CSV)
wba_df = pd.read_csv(wba_area_path)

# Check what columns are present
print("Columns in WBA Area CSV:", wba_df.columns)

# Preview key columns
print(wba_df[['fid', 'GIS_Acres']].head())

# === Step 3: Parse WRESL File to Map SRxx to WBAxx ===

with open(wresl_path, 'r') as f:
    wresl_lines = f.readlines()

# Extract SRxx → WBAxx or DETAW
sr_to_wba_map = {}
for line in wresl_lines:
    # Match standard WBA format: indxWBA_XX = SRYY
    match = re.match(r'\s*indxWBA_(\d+)\s*=\s*(SR\d+)', line)
    if match:
        wba_num, sr_num = match.groups()
        sr_to_wba_map[sr_num] = f'WBA{wba_num}'
    else:
        # Match DETAW format: indxDETAW = SRYY
        match_detaw = re.match(r'\s*indxDETAW\s*=\s*(SR\d+)', line)
        if match_detaw:
            sr_num = match_detaw.group(1)
            sr_to_wba_map[sr_num] = 'DETAW'

# Convert the mapping to a DataFrame for easier viewing
mapping_df = pd.DataFrame(list(sr_to_wba_map.items()), columns=['SR_number', 'WBA_name'])

# Preview result
print("\n=== SR to WBA Mapping Preview ===")
print(mapping_df.head())

# Save mapping to CSV if needed
# mapping_df.to_csv("sr_to_wba_mapping.csv", index=False)


In [None]:
base_dir = os.path.abspath(".")

wresl_path = os.path.join(base_dir, "CalSim3GWregionIndex.wresl")
wba_csv_path = os.path.join(base_dir, "CalSim3_WBA.csv")
with open(wresl_path, 'r') as f:
    lines = f.readlines()

# Parse mappings
sr_to_wba_map = {}

for line in lines:
    line = line.strip()

    # Handle standard: define indxWBA_2 {value 1 }
    match_wba = re.match(r'define\s+indxWBA_([0-9A-Za-z]+)\s+\{value\s+(\d+)\s+\}', line)
    if match_wba:
        wba_id, sr_num = match_wba.groups()
        sr_key = f"SR{int(sr_num):02d}"       # e.g. 1 → SR01
        wba_value = f"WBA{wba_id}"            # e.g. 2 → WBA2
        sr_to_wba_map[sr_key] = wba_value
        continue

    # Handle special case: define indxDETAW {value 42 }
    match_detaw = re.match(r'define\s+indxDETAW\s+\{value\s+(\d+)\s+\}', line)
    if match_detaw:
        sr_num = match_detaw.group(1)
        sr_key = f"SR{int(sr_num):02d}"       # e.g. 42 → SR42
        sr_to_wba_map[sr_key] = "DETAW"

# Convert to DataFrame
mapping_df = pd.DataFrame(list(sr_to_wba_map.items()), columns=["SR_number", "WBA_name"])
print(mapping_df.tail(10))  # check the DETAW row

# Optionally save
# mapping_df.to_csv("sr_to_wba_mapping.csv", index=False)


## End of initialization

In [None]:
print('Done Initializing!')

In [None]:
base_dir = os.path.abspath(".")

wresl_path = os.path.join(base_dir, "CalSim3GWregionIndex.wresl")
wba_csv_path = os.path.join(base_dir, "CalSim3_WBA.csv")

with open(wresl_path, "r") as file:
    lines = file.readlines()

mapping_records = []

i = 1
for line in lines:
    line = line.strip()
    # print("line " + str(i) + ":")
    # print(line)
    match_wba = re.match(r'define\s+indxWBA_([0-9A-Za-z]+)\s+\{value\s+(\d+)\s+\}', line)
    # print("match_wba " + str(i) + ":")
    # print(match_wba)
    
    if match_wba:
        wba_id, sr_num = match_wba.groups()
        # print("wba_id " + str(i) + ":")
        # print(wba_id)
        # print("sr_num " + str(i) + ":")
        # print(sr_num)
        mapping_records.append({
            "Subregion_ID": f"SR{int(sr_num):02d}",
            "WBA_ID": f"WBA{wba_id}"
        })
        continue
    i = i + 1
    match_detaw = re.match(r'define\s+indxDETAW\s+\{value\s+(\d+)\s+\}', line)
    if match_detaw:
        sr_num = match_detaw.group(1)
        mapping_records.append({
            "Subregion_ID": f"SR{int(sr_num):02d}",
            "WBA_ID": "DETAW"
        })

# print("mapping_records:")
# print(mapping_records)

wresl_df = pd.DataFrame(mapping_records).sort_values("Subregion_ID")
display(wresl_df)

output_csv_path = os.path.join(base_dir, "groundwater_wresl_mapping.csv")
wresl_df.to_csv(output_csv_path, index=False)
print(f"Saved WRESL mapping to: {output_csv_path}")


## Monthly, Annual Data and Trend 

In [None]:
# === Setup base paths ===
base_dir = os.path.abspath(".")
wresl_path = os.path.join(base_dir, "CalSim3GWregionIndex.wresl")
wba_csv_path = os.path.join(base_dir, "CalSim3_WBA.csv")
#GroupDataDirPath = os.path.join(base_dir, "output")

# === Load mapping files ===
wba_df = pd.read_csv(wba_csv_path)
sr_to_fid = {f"SR{int(fid):02d}": fid for fid in wba_df['fid']}
fid_to_acres = dict(zip(wba_df['fid'], wba_df['GIS_Acres']))

sr_to_wba = {}
with open(wresl_path, "r") as file:
    i = 1
    for line in file:

        # print("line " + str(i) + ":")
        # print(line)
        
        match_wba = re.match(r'define\s+indxWBA_([0-9A-Za-z]+)\s+\{value\s+(\d+)\s+\}', line)
        # print("match_wba " + str(i) + ":")
        # print(match_wba)
    
        if match_wba:
            wba_id, sr_num = match_wba.groups()
            # print("wba_id " + str(i) + ":")
            # print(wba_id)
            # print("sr_num " + str(i) + ":")
            # print(sr_num)
            sr_to_wba[f"SR{int(sr_num):02d}"] = f"WBA{wba_id}"
        else:
            match_detaw = re.match(r'define\s+indxDETAW\s+\{value\s+(\d+)\s+\}', line)
            if match_detaw:
                sr_num = match_detaw.group(1)
                sr_to_wba[f"SR{int(sr_num):02d}"] = "DETAW"
        i = i + 1
        
# print("sr_to_wba:")
# print(sr_to_wba)

# === Process data ===
converted_data = {}
monthly_trend = {}
annual_trend = {}
scenario_drops = {}
start_year = 1960

# print("gw1_df:")
# print(gw1_df)

for col in gw1_df.columns:
    if not isinstance(col, tuple) or len(col) < 5:
        continue

    model, var_tag, var_type, timestep, unit = col[:5]
    if not re.match(r'^SR\d+:TOT_s\d{4}$', var_tag):
        continue

    sr, rest = var_tag.split(":")
    scenario_raw = rest.split("_s")[-1]
    scenario = f"s{int(scenario_raw):04d}"
    if sr not in sr_to_wba:
        continue

    wba = sr_to_wba[sr]
    fid = sr_to_fid.get(sr)
    area = fid_to_acres.get(fid, None)
    if area is None or area == 0:
        continue

    new_var_tag = f"{wba}_{scenario}"
    values = gw1_df[col]
    values = values[values.index >= pd.Timestamp(f"{start_year}-01-01")]
    values_ft = (values / area) * 1000

    drop_indices = np.where(values_ft < 0)[0]
    if len(drop_indices) > 0:
        cutoff_idx = drop_indices[0]
        drop_range = (values_ft.index[cutoff_idx].date(), values_ft.index[-1].date())
        scenario_drops[scenario] = drop_range
        values_ft = values_ft.iloc[:cutoff_idx]

    new_col = (model, new_var_tag, var_type, timestep, "FT")
    converted_data[new_col] = values_ft

    x_m = np.arange(len(values_ft))
    slope_m = np.polyfit(x_m, values_ft.values, 1)[0] if len(values_ft.dropna()) >= 2 else np.nan
    monthly_trend.setdefault(scenario, {})[f"{wba}:TOT"] = slope_m

    df_annual = values_ft.resample("YE").mean()
    df_annual.index = df_annual.index.year
    x_a = np.arange(len(df_annual))
    slope_a = np.polyfit(x_a, df_annual.values, 1)[0] if len(df_annual.dropna()) >= 2 else np.nan
    annual_trend.setdefault(scenario, {})[f"{wba}:TOT"] = slope_a

# print("annual_trend:")
# print(annual_trend)

# === Save processed monthly data ===
gw1_df_filtered = pd.concat(converted_data, axis=1)
gw1_df_filtered.columns = pd.MultiIndex.from_tuples(
    gw1_df_filtered.columns, names=["Model", "Variable", "Type", "Timestep", "Unit"]
)
gw1_df_filtered = gw1_df_filtered[gw1_df_filtered.index >= pd.Timestamp("1960-01-01")]
monthly_csv_path = os.path.join(GroupDataDirPath, f"GroundWater_DataMonthly.csv")
gw1_df_filtered.to_csv(monthly_csv_path)

# === Save processed annual data ===
annual_df = gw1_df_filtered.resample("YE").mean()
annual_df.index = annual_df.index.year
annual_df.columns = pd.MultiIndex.from_tuples(
    [(model, var, typ, "1YEAR", unit) for model, var, typ, _, unit in annual_df.columns],
    names=["Model", "Variable", "Type", "Timestep", "Unit"]
)
annual_csv_path = os.path.join(GroupDataDirPath, f"GroundWater_DataAnnual.csv")
annual_df.to_csv(annual_csv_path)

# === Save monthly trend slopes ===
monthly_df = pd.DataFrame(monthly_trend).T.sort_index()
monthly_df.columns.name = "Variable"
monthly_units = pd.DataFrame([["FT/MON"] * monthly_df.shape[1]], columns=monthly_df.columns)
monthly_combined = pd.concat([monthly_units, monthly_df], ignore_index=True)
monthly_combined.index = ["Unit"] + list(monthly_df.index)
monthly_trend_path = os.path.join(GroupDataDirPath, f"GroundWater_TrendMonthly.csv")
monthly_combined.to_csv(monthly_trend_path)

# === Save annual trend slopes ===
annual_df_trend = pd.DataFrame(annual_trend).T.sort_index()
annual_df_trend.columns.name = "Variable"

# print("annual_df_trend:")
# print(annual_df_trend)

annual_units = pd.DataFrame([["FT/YEAR"] * annual_df_trend.shape[1]], columns=annual_df_trend.columns)
annual_combined = pd.concat([annual_units, annual_df_trend], ignore_index=True)
annual_combined.index = ["Unit"] + list(annual_df_trend.index)
annual_trend_path = os.path.join(GroupDataDirPath, f"GroundWater_TrendAnnual.csv")
annual_combined.to_csv(annual_trend_path)

# === Print drop summary ===
print("\n--- Summary of Dropped Data by Scenario ---")
for scen, (start, end) in scenario_drops.items():
    print(f"Scenario {scen} dropped from {start} to {end}")

# === Calculate differences from baseline ===
baseline_scenario = "s0011"
diff_data = {}

for col in gw1_df_filtered.columns:
    model, var, typ, timestep, unit = col
    if baseline_scenario not in var:
        scenario_code = var.split("_")[-1]
        base_var = var.replace(f"_{scenario_code}", f"_{baseline_scenario}")
        baseline_col = (model, base_var, typ, timestep, unit)

        if baseline_col in gw1_df_filtered.columns:
            series_other = gw1_df_filtered[col]
            series_base = gw1_df_filtered[baseline_col]
            valid_index = series_other.dropna().index.intersection(series_base.dropna().index)
            if len(valid_index) > 0:
                avg_diff = (series_other.loc[valid_index] - series_base.loc[valid_index]).mean()
                diff_key = var.replace(f"_{scenario_code}", "")
                diff_data.setdefault(scenario_code, {})[diff_key] = avg_diff

# === Save AvgDiff output ===
diff_df = pd.DataFrame(diff_data).T.sort_index()
diff_df.columns.name = "Variable"

note_row = pd.DataFrame(
    [["Average difference: scenario X minus scenario s0011"] + [""] * (diff_df.shape[1] - 1)],
    columns=diff_df.columns
)
unit_row = pd.DataFrame([["FT"] * diff_df.shape[1]], columns=diff_df.columns)
diff_combined = pd.concat([note_row, unit_row, diff_df], ignore_index=True)
diff_combined.index = ["Note", "Unit"] + list(diff_df.index)

diff_output_path = os.path.join(GroupDataDirPath, f"GroundWater_AvgDiff.csv")
diff_combined.to_csv(diff_output_path)

print(f"\n✓ All files saved to:\n{GroupDataDirPath}")


# Plot histograms to find a natural break in the trends of baseline scenario

## Specify baseline index, quantiles and number of bins

In [None]:
baseline_index = "s0011"
lQuant = 0.05
hQuant = 0.4
nBins = 50

## Plot baseline trend histogram

In [None]:
# Select the row
baseline_data = annual_df_trend.loc[baseline_index]
# print("Baseline Data:")
# print(baseline_data)
# Plot histogram
bins = nBins
plt.figure(figsize=(10, 6))
counts, bin_edges, _ = plt.hist(baseline_data, bins=bins, edgecolor='black')# Add more x-axis ticks using bin edges
plt.xticks(np.round(bin_edges, 3), rotation=45)  # Round for readability
plt.title('Histogram of Annual Trends for ' + baseline_index)
plt.xlabel('Slope Value')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

## Plot clipped baseline trend histogram

In [None]:
# get quantiles
lVal, hVal = np.quantile(baseline_data.values, [lQuant, hQuant])

# clip data
clipped_data = baseline_data.values.clip(lVal, hVal)
# print("Clipped Data:")
# print(clipped_data)

# Plot histogram
bins = nBins
plt.figure(figsize=(10, 6))
counts, bin_edges, _ = plt.hist(clipped_data, bins=bins, edgecolor='black')# Add more x-axis ticks using bin edges
plt.xticks(np.round(bin_edges, 3), rotation=45)  # Round for readability
plt.title('Histogram of Annual Trends for ' + baseline_index + " (after clipping data)")
plt.xlabel('Slope Value')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

## Notes: Where to put the break? What quantiles to use to distinguish moderate from severe? Propose: -0.015

In [None]:
def find_calsim_base_path(start_path, sibling_name="CalSim3_Model_Runs"):
    current_path = os.path.abspath(start_path)
    dsp_root = os.path.dirname(os.path.dirname(current_path))  # notebooks → coeqwal → DSP
    candidate = os.path.join(dsp_root, sibling_name)
    if os.path.isdir(candidate):
        return candidate
    raise FileNotFoundError(f"{sibling_name} not found under {dsp_root}")

dsp_root = os.path.dirname(os.path.dirname(os.path.abspath(".")))
csv_path = os.path.join(
    dsp_root,
    "CalSim3_Model_Runs",
    "Scenarios",
    "Group_Data_Extraction",
    "GroundWater_DataMonthly.csv"
)

gw1_df_filtered = pd.read_csv(
    csv_path,
    header=[0, 1, 2, 3, 4],
    index_col=0,
    parse_dates=True
)

end_year_override = {"s0006", "s0007", "s0008", "s0009", "s0010"}
drop_threshold = 1000
start_year = 1960

plot_output_dir = os.path.join(
    dsp_root,
    "CalSim3_Model_Runs",
    "Scenarios",
    "Performance_Metrics",
    "Groundwater"
)
os.makedirs(plot_output_dir, exist_ok=True)

for col in gw1_df_filtered.columns:
    model, var, typ, timestep, unit = col
    if not re.match(r'^(WBA\d+)_s\d{4}$', var):
        continue

    wba_id, scenario = var.split("_")
    print("wba_id: " + str(wba_id) + ", scenario: " + scenario)
    ts = gw1_df_filtered[col]
    ts = ts[ts.index >= pd.Timestamp(f"{start_year}-01-01")].dropna()

    if scenario in end_year_override:
        ts = ts[ts.index < pd.Timestamp("2016-01-01")]
        drop_year = 2015
    else:
        diffs = ts.diff()
        drop_indices = diffs[diffs < -drop_threshold].index
        if not drop_indices.empty:
            cutoff_idx = drop_indices[0]
            ts = ts[ts.index <= cutoff_idx]
            drop_year = cutoff_idx.year
        else:
            drop_year = 2021

    if len(ts) < 2:
        continue  # skip if not enough data

    x = (ts.index - ts.index[0]).days / 365.25
    y = ts.values
    slope, intercept = np.polyfit(x, y, 1)
    trend = slope * x + intercept

    plt.figure(figsize=(10, 4))
    plt.plot(ts.index, y, label="Observed", marker='o')
    plt.plot(ts.index, trend, linestyle='--', label=f"Trend (slope={slope:.6f})")
    plt.title(f"{wba_id} under {scenario} (end year: {drop_year})")
    plt.xlabel("Date")
    plt.ylabel("Groundwater Storage (FT)")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()

    save_name = f"{scenario}_{wba_id}.png"
    save_path = os.path.join(plot_output_dir, save_name)
    plt.savefig(save_path)
    plt.show() 
    plt.close()
    print(f"✓ Saved: {save_path}")


In [None]:
print(plot_output_dir)

In [None]:
target_variable = "SR10:TOT"

matching_cols = [col for col in gw1_df.columns
                 if isinstance(col, tuple) and col[1].startswith(target_variable)]

df_2015 = gw1_df[gw1_df.index.year == 2015]

result_df = df_2015[matching_cols]

result_df.columns = [f"{col[1]}" for col in result_df.columns]

print("Groundwater variable values for", target_variable, "in 2015:\n")
print(result_df)


## Tier Assignment

## Find start point

In [None]:
from collections import Counter
import pandas as pd
import numpy as np

start_date = pd.Timestamp("1990-01-01")
end_date = pd.Timestamp("2000-12-31")

min_date_records = []

for col in gw1_df_filtered.columns:
    model, var, typ, timestep, unit = col
    if not re.match(r'^WBA\d+_s\d{4}$', var):
        continue

    wba_id, scenario = var.split("_")
    ts = gw1_df_filtered[col]
    ts = ts[(ts.index >= start_date) & (ts.index <= end_date)].dropna()

    if not ts.empty:
        min_idx = ts.idxmin()
        min_date_records.append(min_idx)

date_counts = Counter(min_date_records)
mode_date, mode_count = date_counts.most_common(1)[0]

print(f"✓ Found {len(min_date_records)} minimum dates from 1990–2000.")
print(f" The most frequent minimum date (mode) is: {mode_date} ({mode_count} occurrences)")


### Function to calculate slope

In [None]:
def compute_scenario_slope_df(gw1_df_filtered, scenario, start_date=None, end_date=None):
    """
    Computes slope for each WBA under the specified scenario over an optional date range.

    Parameters:
        gw1_df_filtered (pd.DataFrame): Time series data with MultiIndex columns.
        scenario (str): Scenario number as a string (e.g., '11').
        start_date (str or pd.Timestamp, optional): Start of range.
        end_date (str or pd.Timestamp, optional): End of range.

    Returns:
        pd.DataFrame: DataFrame with columns ['scenario', 'WBA', 'slope'].
    """
    result_records = []

    for col in gw1_df_filtered.columns:
        model, var, typ, timestep, unit = col
        if not var.endswith(f"s{int(scenario):04d}"):
            continue

        wba = var.replace(f"_s{int(scenario):04d}", "")
        ts = gw1_df_filtered[col]

        # Filter by date range
        if start_date:
            ts = ts[ts.index >= pd.to_datetime(start_date)]
        if end_date:
            ts = ts[ts.index <= pd.to_datetime(end_date)]

        ts = ts.dropna()
        if len(ts) < 2:
            slope = np.nan
        else:
            x = np.arange(len(ts))
            y = ts.values
            slope, _ = np.polyfit(x, y, 1)

        result_records.append({
            "scenario": f"s{int(scenario):04d}",
            "WBA": wba,
            "slope": slope
        })

    return pd.DataFrame(result_records)


In [None]:
# Compute slopes for scenario 11 between 1990 and 2000
slope_df = compute_scenario_slope_df(gw1_df_filtered, scenario="11", start_date="1992-09-30")
print(slope_df)


In [None]:
import os
import re
import numpy as np
import pandas as pd

# === Corrected function to find top-level CalSim3_Model_Runs ===
def find_calsim_base_path(start_path, target_folder="CalSim3_Model_Runs"):
    current_path = os.path.abspath(start_path)
    while True:
        parent = os.path.dirname(current_path)
        candidate = os.path.join(parent, target_folder)
        if os.path.isdir(candidate):
            return candidate
        if parent == current_path:
            raise FileNotFoundError(f"{target_folder} not found above {start_path}")
        current_path = parent

# === Main function ===
def assign_tiers(base_dir, severe_decline_threshold=-0.015):
    # Find CalSim3_Model_Runs path
    calsim_base_path = find_calsim_base_path(base_dir)

    # Set paths
    group_data_path = os.path.join(
        calsim_base_path, "Scenarios", "Group_Data_Extraction"
    )
    trend_annual_path = os.path.join(group_data_path, "GroundWater_TrendAnnual.csv")
    avg_diff_path = os.path.join(group_data_path, "GroundWater_AvgDiff.csv")
    tier_output_path = os.path.join(group_data_path, "GroundwaterTier_Annual.csv")

    wresl_path = os.path.join(base_dir, "CalSim3GWregionIndex.wresl")
    wba_csv_path = os.path.join(base_dir, "CalSim3_WBA.csv")

    # Load metadata
    wba_df = pd.read_csv(wba_csv_path)
    sr_to_fid = {f"SR{int(fid):02d}": fid for fid in wba_df["fid"]}
    fid_to_acres = dict(zip(wba_df["fid"], wba_df["GIS_Acres"]))

    sr_to_wba = {}
    with open(wresl_path, "r") as file:
        for line in file:
            match_wba = re.match(r"define\s+indxWBA_([0-9A-Za-z]+)\s+\{value\s+(\d+)\s+\}", line)
            if match_wba:
                wba_id, sr_num = match_wba.groups()
                sr_to_wba[f"SR{int(sr_num):02d}"] = f"WBA{wba_id}"
            else:
                match_detaw = re.match(r"define\s+indxDETAW\s+\{value\s+(\d+)\s+\}", line)
                if match_detaw:
                    sr_num = match_detaw.group(1)
                    sr_to_wba[f"SR{int(sr_num):02d}"] = "DETAW"

    # Load data
    trend_df = pd.read_csv(trend_annual_path, index_col=0)
    diff_df = pd.read_csv(avg_diff_path, index_col=0)

    # Clean and format
    trend_df = trend_df.drop(index=["Unit"], errors="ignore")
    diff_df = diff_df.drop(index=["Note", "Unit"], errors="ignore")

    trend_df = trend_df.apply(pd.to_numeric, errors="coerce")
    diff_df = diff_df.apply(pd.to_numeric, errors="coerce")

    trend_df.columns = [col.replace(":TOT", "") for col in trend_df.columns]
    diff_df.columns = [col.strip() for col in diff_df.columns]

    trend_df.index.name = "Scenario"
    diff_df.index.name = "Scenario"

    # Tier assignment
    baseline_scenario = "s0011"
    wba_cols = trend_df.columns.tolist()
    tier_matrix = pd.DataFrame(index=trend_df.index, columns=wba_cols)

    for wba_col in wba_cols:
        if baseline_scenario not in trend_df.index:
            continue
        baseline_slope = trend_df.loc[baseline_scenario, wba_col]

        for scenario in trend_df.index:
            if scenario == baseline_scenario:
                tier_matrix.loc[scenario, wba_col] = 0
                continue

            trend = trend_df.loc[scenario, wba_col]
            diff = diff_df.loc[scenario, wba_col] if wba_col in diff_df.columns else np.nan

            if pd.isna(trend) or pd.isna(diff) or pd.isna(baseline_slope):
                tier = np.nan
            elif trend >= 0:
                tier = 1 if diff >= 0 else 2
            elif trend >= severe_decline_threshold:
                tier = 3
            else:
                tier = 4

            tier_matrix.loc[scenario, wba_col] = tier

    # Save output
    tier_matrix.to_csv(tier_output_path)
    print(f"✓ Tier assignment saved to {tier_output_path}")
    print(f"✓ Used severe_decline_threshold = {severe_decline_threshold}")
    return tier_matrix

# === Run script directly ===
if __name__ == "__main__":
    base_dir = os.path.abspath(".")
    assign_tiers(base_dir, severe_decline_threshold=-0.015)

tier_df = assign_tiers(base_dir, severe_decline_threshold=-0.015)


In [None]:
import os
import re
import numpy as np
import pandas as pd

# === Corrected function to find top-level CalSim3_Model_Runs ===
def find_calsim_base_path(start_path, target_folder="CalSim3_Model_Runs"):
    current_path = os.path.abspath(start_path)
    while True:
        parent = os.path.dirname(current_path)
        candidate = os.path.join(parent, target_folder)
        if os.path.isdir(candidate):
            return candidate
        if parent == current_path:
            raise FileNotFoundError(f"{target_folder} not found above {start_path}")
        current_path = parent

# === Main function ===
def assign_monthly_tiers(base_dir, severe_decline_threshold=-0.015):
    # Find CalSim3_Model_Runs path
    calsim_base_path = find_calsim_base_path(base_dir)

    # Set paths
    group_data_path = os.path.join(
        calsim_base_path, "Scenarios", "Group_Data_Extraction"
    )
    trend_monthly_path = os.path.join(group_data_path, "GroundWater_TrendMonthly.csv")
    avg_diff_path = os.path.join(group_data_path, "GroundWater_AvgDiff.csv")
    tier_output_path = os.path.join(group_data_path, "GroundwaterTier_Monthly.csv")

    wresl_path = os.path.join(base_dir, "CalSim3GWregionIndex.wresl")
    wba_csv_path = os.path.join(base_dir, "CalSim3_WBA.csv")

    # Load metadata
    wba_df = pd.read_csv(wba_csv_path)
    sr_to_fid = {f"SR{int(fid):02d}": fid for fid in wba_df["fid"]}
    fid_to_acres = dict(zip(wba_df["fid"], wba_df["GIS_Acres"]))

    sr_to_wba = {}
    with open(wresl_path, "r") as file:
        for line in file:
            match_wba = re.match(r"define\s+indxWBA_([0-9A-Za-z]+)\s+\{value\s+(\d+)\s+\}", line)
            if match_wba:
                wba_id, sr_num = match_wba.groups()
                sr_to_wba[f"SR{int(sr_num):02d}"] = f"WBA{wba_id}"
            else:
                match_detaw = re.match(r"define\s+indxDETAW\s+\{value\s+(\d+)\s+\}", line)
                if match_detaw:
                    sr_num = match_detaw.group(1)
                    sr_to_wba[f"SR{int(sr_num):02d}"] = "DETAW"

    # Load data
    trend_df = pd.read_csv(trend_monthly_path, index_col=0)
    diff_df = pd.read_csv(avg_diff_path, index_col=0)

    # Clean and format
    trend_df = trend_df.drop(index=["Unit"], errors="ignore")
    diff_df = diff_df.drop(index=["Note", "Unit"], errors="ignore")

    trend_df = trend_df.apply(pd.to_numeric, errors="coerce")
    diff_df = diff_df.apply(pd.to_numeric, errors="coerce")

    trend_df.columns = [col.replace(":TOT", "") for col in trend_df.columns]
    diff_df.columns = [col.strip() for col in diff_df.columns]

    trend_df.index.name = "Scenario"
    diff_df.index.name = "Scenario"

    # Tier assignment
    baseline_scenario = "s0011"
    wba_cols = trend_df.columns.tolist()
    tier_matrix = pd.DataFrame(index=trend_df.index, columns=wba_cols)

    for wba_col in wba_cols:
        if baseline_scenario not in trend_df.index:
            continue
        baseline_slope = trend_df.loc[baseline_scenario, wba_col]

        for scenario in trend_df.index:
            if scenario == baseline_scenario:
                tier_matrix.loc[scenario, wba_col] = 0
                continue

            trend = trend_df.loc[scenario, wba_col]
            diff = diff_df.loc[scenario, wba_col] if wba_col in diff_df.columns else np.nan

            if pd.isna(trend) or pd.isna(diff) or pd.isna(baseline_slope):
                tier = np.nan
            elif trend >= 0:
                tier = 1 if diff >= 0 else 2
            elif trend >= severe_decline_threshold:
                tier = 3
            else:
                tier = 4

            tier_matrix.loc[scenario, wba_col] = tier

    # Save output
    tier_matrix.to_csv(tier_output_path)
    print(f"✓ Monthly tier assignment saved to {tier_output_path}")
    print(f"✓ Used severe_decline_threshold = {severe_decline_threshold}")
    return tier_matrix

# === Run script directly ===
if __name__ == "__main__":
    base_dir = os.path.abspath(".")
    assign_monthly_tiers(base_dir, severe_decline_threshold=-0.015)


## Trend Comparison

In [None]:
import os
import re
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# === Helper to find CalSim3_Model_Runs path ===
def find_calsim_base_path(start_path, sibling_name="CalSim3_Model_Runs"):
    current_path = os.path.abspath(start_path)
    while True:
        parent = os.path.dirname(current_path)
        candidate = os.path.join(parent, sibling_name)
        if os.path.isdir(candidate):
            return candidate
        if parent == current_path:
            raise FileNotFoundError(f"{sibling_name} not found above {start_path}")
        current_path = parent

# === Paths ===
base_dir = os.path.abspath(".")
calsim_base_path = find_calsim_base_path(base_dir)

# Load groundwater CSV from correct folder
csv_path = os.path.join(
    calsim_base_path,
    "Scenarios",
    "Group_Data_Extraction",
    "GroundWater_DataMonthly.csv"
)

# Plot output directory
plot_output_dir = os.path.join(
    calsim_base_path,
    "Scenarios",
    "Performance_Metrics",
    "Groundwater"
)
os.makedirs(plot_output_dir, exist_ok=True)

# === Load data ===
gw1_df_filtered = pd.read_csv(
    csv_path,
    header=[0, 1, 2, 3, 4],
    index_col=0,
    parse_dates=True
)

# === Constants ===
baseline_scenario = "s0011"
end_year_override = {"s0006", "s0007", "s0008", "s0009", "s0010"}
drop_threshold = 1000
start_year = 1960

# === Function ===
def plot_trendline_comparison(scenario_code, baseline_code=baseline_scenario, save_dir=None):
    scenario_data = {}
    baseline_data = {}

    for col in gw1_df_filtered.columns:
        model, var, typ, timestep, unit = col
        if not re.match(r'^(WBA\d+)_s\d{4}$', var):
            continue
        wba_id, scenario = var.split("_")
        if scenario == scenario_code:
            scenario_data[wba_id] = gw1_df_filtered[col]
        elif scenario == baseline_code:
            baseline_data[wba_id] = gw1_df_filtered[col]

    shared_wbas = sorted(set(scenario_data.keys()) & set(baseline_data.keys()))
    if not shared_wbas:
        print(f"No shared WBAs for {scenario_code} and {baseline_code}")
        return

    ncols = 3
    nrows = math.ceil(len(shared_wbas) / ncols)
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(5 * ncols, 3 * nrows))
    axes = axes.flatten()

    for i, wba in enumerate(shared_wbas):
        # Scenario time series
        ts = scenario_data[wba]
        ts = ts[ts.index >= pd.Timestamp(f"{start_year}-01-01")].dropna()
        if scenario_code in end_year_override:
            ts = ts[ts.index < pd.Timestamp("2016-01-01")]
        else:
            diffs = ts.diff()
            drop_indices = diffs[diffs < -drop_threshold].index
            if not drop_indices.empty:
                ts = ts[ts.index <= drop_indices[0]]

        # Baseline time series
        ts_base = baseline_data[wba]
        ts_base = ts_base[ts_base.index >= pd.Timestamp(f"{start_year}-01-01")].dropna()
        if baseline_code in end_year_override:
            ts_base = ts_base[ts_base.index < pd.Timestamp("2016-01-01")]
        else:
            diffs = ts_base.diff()
            drop_indices = diffs[diffs < -drop_threshold].index
            if not drop_indices.empty:
                ts_base = ts_base[ts_base.index <= drop_indices[0]]

        # Trendline for scenario
        x = (ts.index - ts.index[0]).days / 365.25
        y = ts.values
        slope, intercept = np.polyfit(x, y, 1)
        trend = slope * x + intercept

        # Trendline for baseline
        x_base = (ts_base.index - ts_base.index[0]).days / 365.25
        y_base = ts_base.values
        slope_base, intercept_base = np.polyfit(x_base, y_base, 1)
        trend_base = slope_base * x_base + intercept_base

        ax = axes[i]
        ax.plot(ts.index, trend, color="red", linestyle="--", label=f"{scenario_code} trend")
        ax.plot(ts_base.index, trend_base, color="black", linestyle="--", label=f"{baseline_code} trend")
        ax.set_title(wba)
        ax.set_xlim(pd.Timestamp("1960-01-01"), pd.Timestamp("2021-12-31"))
        ax.set_ylabel("FT")
        ax.grid(True)

        # Display slope values inside each subplot
        ax.text(0.01, 0.95, f"{scenario_code} slope: {slope:.6f}", transform=ax.transAxes, fontsize=8, color="red", verticalalignment='top')
        ax.text(0.01, 0.85, f"{baseline_code} slope: {slope_base:.6f}", transform=ax.transAxes, fontsize=8, color="black", verticalalignment='top')

        # Add legend to bottom right to avoid overlap
        ax.legend(loc="lower right", fontsize=7, frameon=True)

    # Hide unused subplots
    for j in range(i + 1, len(axes)):
        axes[j].axis("off")

    fig.suptitle(f"Trendline Comparison: {scenario_code} vs {baseline_code}", fontsize=15)
    fig.tight_layout(rect=[0, 0.04, 1, 0.96])  # extra space for title

    if save_dir:
        os.makedirs(save_dir, exist_ok=True)
        save_path = os.path.join(save_dir, f"{scenario_code}_vs_{baseline_code}_trendlines.png")
        plt.savefig(save_path, dpi=300)
        print(f"✓ Saved: {save_path}")
        plt.show()
        plt.close()
    else:
        plt.show()

# === Call the function for each scenario ===
all_scenarios = [f"s{str(i).zfill(4)}" for i in [1,2,3,4,5,6,7,8,9,10,12,13,14,15,16,18,19,20,21]]  # exclude s0011
for sc in all_scenarios:
    plot_trendline_comparison(scenario_code=sc, save_dir=plot_output_dir)


In [None]:
import os

folder_path = r".\shapefiles (1)"
shp_files = [f for f in os.listdir(folder_path) if f.endswith(".shp")]
for f in shp_files:
    print(f)


In [None]:
pip install geopandas


In [None]:
import os
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# === Load shapefile using relative path ===
notebook_dir = os.path.abspath(".")
shapefile_path = os.path.join(notebook_dir, "shapefiles (1)", "i12_CalSim3Model_WaterBudgetAreas_20221021.shp")
wba_shp = gpd.read_file(shapefile_path)
wba_shp["WBA_ID"] = wba_shp["WBA_ID"].str.strip()

# === Locate CalSim3_Model_Runs directory ===
def find_calsim_base_path(start_path, target_folder="CalSim3_Model_Runs"):
    current_path = os.path.abspath(start_path)
    while True:
        parent = os.path.dirname(current_path)
        candidate = os.path.join(parent, target_folder)
        if os.path.isdir(candidate):
            return candidate
        if parent == current_path:
            raise FileNotFoundError(f"{target_folder} not found above {start_path}")
        current_path = parent

# === Load tier assignment CSV ===
base_dir = os.path.abspath(".")
calsim_base_path = find_calsim_base_path(base_dir)
tier_output_dir = os.path.join(
    calsim_base_path,
    "Scenarios",
    "Performance_Metrics",
    "Tiered_Outcome_Measures",
    "Groundwater"
)


# === Fix WBA column names to match shapefile format ===
new_columns = {}
for col in tier_df.columns:
    if col.startswith("WBA"):
        suffix = col[3:]
        if suffix.isdigit():
            new_columns[col] = suffix.zfill(2)
        else:
            digits = ''.join(filter(str.isdigit, suffix)).zfill(2)
            letter = ''.join(filter(str.isalpha, suffix))
            new_columns[col] = digits + letter
tier_df.rename(columns=new_columns, inplace=True)

# === Define atlas-style muted tier colors ===
tier_colors = {
    1: "#8FBBD9",  # soft dusty blue
    2: "#B5CDA3",  # olive green
    3: "#E6C27A",  # warm khaki
    4: "#D97B6D",  # soft brick red (worst)
}


# === Vertical shifts for selected WBA labels ===
label_shifts = {
    "17STOT": 0.015,
    "71TOT": 0.015,
    "22TOT": 0.015,
    "50TOT": -0.015,
    "21TOT": -0.015,
    "12TOT": -0.015,
}

# === Plot and save maps for each scenario (except baseline) ===
for scenario in tier_df.index:
    if scenario == 's0011':
        continue

    # Map tier data to shapefile
    tier_series = tier_df.loc[scenario]
    tier_map = wba_shp.copy()
    tier_map["Tier"] = tier_map["WBA_ID"].map(tier_series.to_dict())

    # Plot base
    fig, ax = plt.subplots(figsize=(8, 10))
    for tier_val, color in tier_colors.items():
        subset = tier_map[tier_map["Tier"] == tier_val]
        if not subset.empty:
            subset.plot(
                ax=ax,
                color=color,
                edgecolor='black',
                linewidth=0.3,
                label=f"Tier {tier_val}"
            )

    # === Add WBA_ID labels at centroids with optional vertical shift ===
    for idx, row in tier_map.iterrows():
        if pd.notna(row["Tier"]):
            x, y = row.geometry.centroid.x, row.geometry.centroid.y
            wba_id = row["WBA_ID"]
            shift = label_shifts.get(wba_id, 0)
            ax.text(x, y + shift, wba_id, fontsize=7, weight='bold', ha='center')

    ax.set_title(f"Groundwater Tiers for Scenario {scenario}", fontweight="bold")
    ax.axis("off")

    # Add legend
    legend_handles = [mpatches.Patch(color=color, label=f"Tier {tier}")
                      for tier, color in tier_colors.items()]
    ax.legend(handles=legend_handles, title="Tier", loc="lower left", frameon=True)

    # Save and show
    output_path = os.path.join(tier_output_dir, f"GroundwaterTiers_{scenario}.png")
    plt.tight_layout()
    plt.savefig(output_path, dpi=300)
    plt.show()

    print(f"✓ Saved map for {scenario} to: {output_path}")



In [None]:
import os
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.ticker import FixedLocator, FixedFormatter

def pad_index(idx):
    import re
    if isinstance(idx, str):
        m = re.match(r'^(\d{1,2})([A-Z]*)$', idx)  # Matches numbers + optional suffix
        if m:
            num = int(m.group(1))
            suffix = m.group(2)
            return f"{num:02d}{suffix}"
    return idx  # If it doesn't match, return as is

notebook_dir = os.path.abspath(".")
shapefile_path = os.path.join(notebook_dir, "shapefiles (1)", "i12_CalSim3Model_WaterBudgetAreas_20221021.shp")
wba_shp = gpd.read_file(shapefile_path)
wba_shp["WBA_ID"] = wba_shp["WBA_ID"].str.strip().str.upper()

# print (" Shapefile contents:")
# print(wba_shp)
# print("Baseline data:")
# print(baseline_data)


baseline_data.index = (
    baseline_data.index
    .str.replace("WBA", "", regex=False)
    .str.replace(":TOT", "", regex=False)
    .str.replace("TOT", "", regex=False)
    .str.lstrip("0")
    .str.strip()
    .str.upper()
)

# print("Baseline data after modifications:")
# print(baseline_data)
baseline_data.index = baseline_data.index.map(pad_index)
intersect = set(wba_shp["WBA_ID"]) & set(baseline_data.index)
# print("set(wba_shp[WBA_ID]):")
# print(set(wba_shp["WBA_ID"]))
# print("set(baseline_data.index):")
# print(set(baseline_data.index))
# print("intersect:")
# print(intersect)

slope_rank = baseline_data.rank().astype(int)
slope_map = wba_shp.copy()
slope_map["Slope"] = slope_map["WBA_ID"].map(baseline_data.squeeze().to_dict())
slope_map["SlopeRank"] = slope_map["WBA_ID"].map(slope_rank.squeeze().to_dict())


num_ranks = slope_rank.nunique()
cmap = plt.colormaps.get_cmap("coolwarm_r").resampled(num_ranks)
norm = mcolors.Normalize(vmin=1, vmax=num_ranks)

def find_calsim_base_path(start_path, target_folder="CalSim3_Model_Runs"):
    current_path = os.path.abspath(start_path)
    while True:
        parent = os.path.dirname(current_path)
        candidate = os.path.join(parent, target_folder)
        if os.path.isdir(candidate):
            return candidate
        if parent == current_path:
            raise FileNotFoundError(f"{target_folder} not found above {start_path}")
        current_path = parent

base_dir = os.path.abspath(".")
calsim_base_path = find_calsim_base_path(base_dir)
output_dir = os.path.join(
    calsim_base_path,
    "Scenarios",
    "Performance_Metrics",
    "Tiered_Outcome_Measures",
    "Groundwater"
)
os.makedirs(output_dir, exist_ok=True)

# === Plot map ===
fig, ax = plt.subplots(figsize=(8, 10))
slope_map.plot(
    column="SlopeRank",
    cmap=cmap,
    linewidth=0.3,
    edgecolor='black',
    ax=ax,
    legend=False,
    norm=norm
)

# === Label WBAs ===
for idx, row in slope_map.iterrows():
    if pd.notna(row["SlopeRank"]):
        x, y = row.geometry.centroid.x, row.geometry.centroid.y
        wba_id = row["WBA_ID"]
        shift = label_shifts.get(wba_id, 0)
        ax.text(x, y + shift, wba_id, fontsize=7, weight='bold', ha='center')

# === Colorbar with slope values ===
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm._A = []
rank_to_slope = slope_map.dropna(subset=["Slope", "SlopeRank"]).groupby("SlopeRank")["Slope"].mean().sort_index()
tick_locs = list(rank_to_slope.index)
tick_labels = [f"{s:.6f}" for s in rank_to_slope.values]

cbar = fig.colorbar(sm, ax=ax, orientation="vertical", ticks=tick_locs)
cbar.set_label("Slope Value")
cbar.ax.yaxis.set_major_locator(FixedLocator(tick_locs))
cbar.ax.yaxis.set_major_formatter(FixedFormatter(tick_labels))

# === Finalize layout ===
ax.set_title("Slope Rank Map for Scenario s0011 (Baseline)", fontweight="bold")
ax.axis("off")
plt.tight_layout()

# === Save and show ===
output_path = os.path.join(output_dir, "SlopeRank_s0011.png")
plt.savefig(output_path, dpi=300)
plt.show()

print(f" Saved slope rank map to: {output_path}")

In [None]:
print("Done!")