In [None]:
pip install openpyxl

In [None]:
pip install plotly

In [None]:
pip install pandas

In [None]:
pip install seaborn

In [None]:
pip install dash

In [None]:
# Import standard libraries
import os
from contextlib import redirect_stdout

import sys
# append coeqwal packages to path
sys.path.append('./coeqwalpackage')

import numpy as np
import pandas as pd
import datetime as dt
import re
import plotly.graph_objects as go
import dash
from dash import html, dcc
from dash.dependencies import Input, Output

In [None]:
# Import custom libraries
# Note: on my computer the next import doesn't work the first time I call it, why? If I re-run the cell, then it is ok. MUST DEBUG
from coeqwalpackage.metrics import *
import cqwlutils as cu
import plotting as pu

In [None]:
CtrlFile = 'CalSim3DataExtractionInitFile_v4.xlsx'
CtrlTab = 'Init'
ScenarioListFile, ScenarioListTab, ScenarioListPath, DVDssNamesOutPath, SVDssNamesOutPath, ScenarioIndicesOutPath, DssDirsOutPath, VarListPath, VarListFile, VarListTab, VarOutPath, DataOutPath, ConvertDataOutPath, ExtractionSubPath, DemandDeliverySubPath, ModelSubPath, GroupDataDirPath, ScenarioDir, DVDssMin, DVDssMax, SVDssMin, SVDssMax, NameMin, NameMax, DirMin, DirMax, IndexMin, IndexMax, StartMin, StartMax, EndMin, EndMax, VarMin, VarMax, DemandFilePath, DemandFileName, DemandFileTab, DemMin, DemMax, InflowOutSubPath, InflowFilePath, InflowFileName, InflowFileTab, InflowMin, InflowMax = cu.read_init_file(CtrlFile, CtrlTab)

In [None]:
df, dss_names = read_in_df(ConvertDataOutPath,DVDssNamesOutPath)

In [None]:
df = add_water_year_column(df)

In [None]:
metrics_path = GroupDataDirPath + "/metrics_output"
if not os.path.exists(metrics_path):
    os.makedirs(metrics_path)

plots_path = GroupDataDirPath + "/plots_output"
if not os.path.exists(plots_path):
    os.makedirs(plots_path)
    

In [None]:
drought_wys = [
    1924,1925,1926,1929,1930,1931,1932,1933,1934,
    1939,1944,1945,1947,1948,1949,1950,1955,1960,
    1961,1962,1964,1976,1977,1979,1981,1987,1988,
    1989,1990,1991,1992,1994,2001,2008,2009,2013,
    2014,2015,2020,2021
]

In [None]:
def get_scenario_styles(studies):
    """
    Given a list (or tuple) of scenario numbers, return an appropriate dictionary
    specifying line color, style, and label. Extend as needed for more scenarios.
    """
    scenario_tuple = tuple(studies)
    if scenario_tuple == (2, 11):
        return {
            "s0002": {'color': 'black', 'linestyle': 'solid', 'label': 's0002 Baseline'},
            "s0011": {'color': 'red', 'linestyle': 'dash', 'label': 's0011 TUCP'}
        }
    elif scenario_tuple == (11, 12):
        return {
            "s0011": {'color': 'black', 'linestyle': 'solid', 'label': 's0011 Baseline (TUCP)'},
            "s0012": {'color': 'red', 'linestyle': 'dash', 'label': 's0012 SGMA'}
        }
    elif scenario_tuple == (11, 13):
        return {
            "s0011": {'color': 'black', 'linestyle': 'solid', 'label': 's0011 Baseline (TUCP)'},
            "s0013": {'color': 'red', 'linestyle': 'dot', 'label': 's0013 Future Baseline'}
        }
    else:
        colors = ['black', 'red', 'blue', 'green', 'orange']
        linestyles = ['solid', 'dash', 'dot', 'longdash', 'dashdot']
        style_dict = {}
        for i, s in enumerate(studies):
            style_dict[s] = {
                'color': colors[i % len(colors)],
                'linestyle': linestyles[i % len(linestyles)],  # Use valid Plotly styles
                'label': s  # Fix: Remove extra 's' prefix
            }
        return style_dict


In [None]:
variables = [
    "NOD_STORAGE_", "DEL_NOD_AG_", "DEL_NOD_MI_",
    "C_SAC041_", "C_SJR070_", "NDO_",
    "X2_PRV_KM_", "TOTAL_EXPORTS_",
    "DEL_SOD_AG_", "SOD_STORAGE_", "DEL_SOD_MI_"
]

scenario_comps = [[2,11],[11,12],[11,13]]

In [None]:
df.columns = ['_'.join(map(str, col)) for col in df.columns]

print(df.columns.tolist()[:20]) 


In [None]:
original_columns = df.columns.tolist()

s_numbers = set()
for col in original_columns:
    matches = re.findall(r's\d{4,}', col)  
    s_numbers.update(matches) 

# Convert to a sorted list
s_numbers_list = sorted(s_numbers)

print(s_numbers_list)

## Interactive Dashboard

### Time Series
Shows how the selected variable changes over time for each scenario.

### Monthly-of-Year
Displays the monthly average for a selected year.

### Single Exceedance
Shows the probability that a value will be equaled or exceeded.

### Annual Exceedance
Shows how often the selected month's total value exceeds a given threshold across all years. Each yearâ€™s data for the chosen month is summed (e.g., total flow in April each year), and the annual values are ranked from highest to lowest. From these ranks, exceedance probabilities are calculated to show how frequently high values occur.

### Month-of-Year Avg
Averages each calendar month across all years, optionally filtered by Water Year Type. Water Year Types classify each year based on how wet or dry it was. The scale ranges from 1 (wettest) to 5 (driest)

In [None]:
import re
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

# --- Existing Preprocessing (you already have) ---
original_columns = df.columns.tolist()

s_numbers = set()
for col in original_columns:
    matches = re.findall(r's\d{4,}', col)  
    s_numbers.update(matches) 

scenarios = sorted(s_numbers)

# --- Extract variables and units ---
variable_units = {}
variables = set()
pattern_var = re.compile(r'CALSIM_(.*?)_s\d{4}')

for col in df.columns:
    match = pattern_var.search(col)
    if match:
        var_name = match.group(1)
        unit = col.split('_')[-1]
        variables.add(var_name)
        if var_name not in variable_units:
            variable_units[var_name] = unit

variables = sorted(variables)

# --- Dropdown options with units ---
variable_labels = []
for v in variables:
    unit = variable_units.get(v, "unknown unit")
    label = f"{v} ({unit})"
    variable_labels.append({"label": label, "value": v})

default_scenario = [scenarios[0]] if scenarios else []

# --- Prepare water year DataFrame ---
def add_water_year_column(df):
    df_copy = df.copy().sort_index()
    df_copy['Date'] = pd.to_datetime(df_copy.index)
    df_copy['Year'] = df_copy['Date'].dt.year
    df_copy['Month'] = df_copy['Date'].dt.month
    df_copy['WaterYear'] = np.where(df_copy['Month'] >= 10, df_copy['Year'] + 1, df_copy['Year'])
    return df_copy.drop(["Date", "Year", "Month"], axis=1)

water_year_df = add_water_year_column(df)

years_in_data = sorted(df.index.year.unique())
drought_years = {1924, 1925, 1926, 1929, 1930, 1931, 1932, 1933, 1934, 1939,
                  1944, 1945, 1947, 1948, 1949, 1950, 1955, 1960, 1961, 1962, 1964,
                  1976, 1977, 1979, 1981, 1987, 1988, 1989, 1990, 1991, 1992, 1994,
                  2001, 2008, 2009, 2013, 2014, 2015, 2020, 2021}
year_options = [{"label": f"{y} {'(Drought)' if y in drought_years else ''}", "value": y} for y in years_in_data]
water_year_type_options = [{"label": str(i), "value": i} for i in range(1, 6)]
month_options = [{"label": "April", "value": 4}, {"label": "September", "value": 9}]

plot_type_descriptions = {
    "time_series": "Shows how the selected variable changes over time for each scenario.",
    "monthly": "Displays the monthly average for a selected year.",
    "single_exceedance": "Shows the probability that a value will be equaled or exceeded.",
    "annual_exceedance": "Shows how often the selected month's total value exceeds a given threshold across all years.",
    "month_of_year_avg": "Averages each calendar month across all years, optionally filtered by Water Year Type."
}

def find_col(df, variable, scenario):
    matches = [col for col in df.columns if variable in col and scenario in col]
    return matches[0] if matches else None

def find_wyt_col(df, scenario):
    matches = [col for col in df.columns if f"CALSIM_WYT_SAC__{scenario}" in col and "WATERYEARTYPE" in col]
    return matches[0] if matches else None

def get_colors(scenarios):
    base = ["red", "blue", "green", "orange", "purple", "brown", "cyan", "magenta", "gray", "black"]
    return {s: base[i % len(base)] for i, s in enumerate(scenarios)}

def get_line_styles():
    return ['solid', 'dash', 'dot', 'dashdot', 'longdash', 'longdashdot']

def filter_by_wyt_annual(df_col, scenario, wyt_list, month=5):
    if not wyt_list:
        return df_col
    wyt_col = find_wyt_col(df, scenario)
    if wyt_col is None:
        return df_col
    working_df = df[[wyt_col]].copy()
    working_df['WaterYear'] = water_year_df['WaterYear']
    working_df['Month'] = df.index.month
    filtered = working_df[working_df['Month'] == month].groupby('WaterYear').first()
    selected_years = filtered[filtered[wyt_col].isin(wyt_list)].index
    df_col = df_col.copy()
    df_col['WaterYear'] = water_year_df['WaterYear']
    df_col = df_col[df_col['WaterYear'].isin(selected_years)]
    return df_col.drop(columns='WaterYear')

# --- Dash Layout & Callback ---
app = dash.Dash(__name__)
app.layout = html.Div([
    html.H2("Water Data Dashboard", style={'textAlign': 'center'}),
    html.Label("Select Variables (must have same unit)"),
    dcc.Dropdown(
        id="variable-dropdown",
        options=variable_labels,
        value=[variables[0]] if variables else [],
        multi=True
    ),
    html.Label("Select Scenarios"),
    dcc.Dropdown(id="scenario-dropdown", options=[{"label": s, "value": s} for s in scenarios],
                 value=default_scenario, multi=True),
    html.Label("Select Plot Type"),
    dcc.Dropdown(id="plot-type-dropdown", options=[
        {"label": "Time Series", "value": "time_series"},
        {"label": "Monthly-of-Year", "value": "monthly"},
        {"label": "Month-of-Year Avg", "value": "month_of_year_avg"},
        {"label": "Single Exceedance", "value": "single_exceedance"},
        {"label": "Annual Exceedance", "value": "annual_exceedance"}
    ], value="time_series"),
    html.Div(id="plot-type-description", style={"marginTop": "10px", "fontStyle": "italic", "color": "#555"}),
    html.Div([
        html.Label("Select Year (for Monthly Plot)"),
        dcc.Dropdown(id="year-dropdown", options=year_options, value=years_in_data[0] if years_in_data else None)
    ], id="year-container", style={"display": "none"}),
    html.Div([
        html.Label("Select Water Year Type (1-5)"),
        dcc.Dropdown(id="wyt-dropdown", options=water_year_type_options, value=[], multi=True)
    ], id="wyt-container", style={"display": "none"}),
    html.Div([
        html.Label("Select Month for Annual Exceedance"),
        dcc.Dropdown(id="month-dropdown", options=month_options, value=9)
    ], id="month-container", style={"display": "none"}),
    dcc.Graph(id="dynamic-plot")
])

@app.callback(
    [Output("dynamic-plot", "figure"),
     Output("year-container", "style"),
     Output("wyt-container", "style"),
     Output("month-container", "style"),
     Output("plot-type-description", "children")],
    [Input("variable-dropdown", "value"),
     Input("scenario-dropdown", "value"),
     Input("plot-type-dropdown", "value"),
     Input("year-dropdown", "value"),
     Input("wyt-dropdown", "value"),
     Input("month-dropdown", "value")]
)
def update_plot(vars_selected, scenarios, plot_type, year, wyt, month):
    show_year = {"display": "block"} if plot_type == "monthly" else {"display": "none"}
    show_wyt = {"display": "block"} if plot_type == "month_of_year_avg" else {"display": "none"}
    show_month = {"display": "block"} if plot_type == "annual_exceedance" else {"display": "none"}
    description = plot_type_descriptions.get(plot_type, "")

    if not scenarios or not vars_selected:
        return go.Figure(), show_year, show_wyt, show_month, description

    selected_units = {variable_units.get(v, "unknown unit") for v in vars_selected}
    if len(selected_units) > 1:
        fig = go.Figure()
        fig.update_layout(
            title="Unit Mismatch Detected",
            annotations=[{
                "text": "Selected variables have different units. Please select variables with the same unit.",
                "xref": "paper", "yref": "paper",
                "showarrow": False, "font": {"size": 16}
            }]
        )
        return fig, show_year, show_wyt, show_month, description

    fig = go.Figure()
    line_styles = get_line_styles()
    colors = get_colors(scenarios)

    for idx, v in enumerate(vars_selected):
        style = line_styles[idx % len(line_styles)]
        for s in scenarios:
            col = find_col(df, v, s)
            if not col:
                continue
            df_copy = df[[col]].copy()
            if plot_type == "monthly":
                df_copy['Year'] = df_copy.index.year
                df_copy['Month'] = df_copy.index.month
                df_copy = df_copy[df_copy['Year'] == year]
                monthly_avg = df_copy.groupby('Month')[col].mean()
                fig.add_trace(go.Scatter(
                    x=monthly_avg.index, y=monthly_avg.values,
                    mode='lines+markers', name=f"{s} - {v}",
                    line=dict(color=colors[s], dash=style)
                ))
            elif plot_type == "month_of_year_avg":
                df_copy = water_year_df[[col]].copy()
                df_copy = filter_by_wyt_annual(df_copy, s, wyt, month=5)
                df_copy['Month'] = df_copy.index.month
                monthly_avg = df_copy.groupby('Month')[col].mean()
                fig.add_trace(go.Scatter(
                    x=monthly_avg.index, y=monthly_avg.values,
                    mode='lines', name=f"{s} - {v}",
                    line=dict(color=colors[s], dash=style)
                ))
            elif plot_type == "single_exceedance":
                series = df_copy[col].dropna().sort_values(ascending=False)
                exceedance_probs = np.arange(1, len(series)+1) / (len(series)+1)
                fig.add_trace(go.Scatter(
                    x=exceedance_probs, y=series.values,
                    mode='lines', name=f"{s} - {v}",
                    line=dict(color=colors[s], dash=style)
                ))
            elif plot_type == "annual_exceedance":
                df_copy = df_copy[df_copy.index.month == month]
                annual_sum = df_copy.resample("YE").sum(min_count=1)
                sorted_vals = annual_sum[col].dropna().sort_values(ascending=False)
                exceed_probs = sorted_vals.rank(method="first", ascending=False) / (1 + len(sorted_vals))
                fig.add_trace(go.Scatter(
                    x=exceed_probs, y=sorted_vals,
                    mode='lines', name=f"{s} - {v}",
                    line=dict(color=colors[s], dash=style)
                ))
            else:  # time_series
                fig.add_trace(go.Scatter(
                    x=df.index, y=df_copy[col],
                    mode='lines', name=f"{s} - {v}",
                    line=dict(color=colors[s], dash=style)
                ))

    unit = selected_units.pop()
    fig.update_layout(
        title=f"{plot_type.replace('_', ' ').title()} Plot for Selected Variables",
        xaxis_title="Date" if plot_type in ["time_series", "monthly", "month_of_year_avg"] else "Exceedance Probability",
        yaxis_title=f"Value ({unit})"
    )
    return fig, show_year, show_wyt, show_month, description

if __name__ == '__main__':
    app.run(debug=True)

