## Importing packages

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

## Reading files for each site

In [None]:
## change working directory to assess files
data_path = ''
os.chdir(data_path)
cwd = os.getcwd()
print('Current Working Directory:{}',format(cwd))


## Loading CSV

### Upstream water level

In [None]:
# 1) describe each file in one place as tuples
SPEC = [
    # name        file                        date_fmt              value_col
    # ("USL_a",  "USL_VAL.csv",      "%d/%m/%Y %H:%M",     "Event Value"),
]

# 2) one small loader that returns a Series indexed by datetime
def load_series(path, date_fmt, value_col, date_col="Date/time", skiprows=2):
    df = pd.read_csv(path, skiprows=skiprows)
    idx = pd.to_datetime(df[date_col], format=date_fmt, dayfirst=True, errors="raise")
    s = pd.Series(df[value_col].to_numpy(), index=idx, name=Path(path).stem).sort_index()
    return s

# 3) load everything into a dict in one line
ser = {name: load_series(f, fmt, vcol) for name, f, fmt, vcol in SPEC}

# 4) post-processing, e.g. apply site elevation offsets 

# 5) plot only the upstream water levels (like your original figure)
plt.figure(figsize=(10, 6))
for name in ["USL_a"]:
    plt.plot(ser[name].index, ser[name].values, marker="o", markersize=1, label=name)

plt.title("Upstream water level")
plt.xlabel("Date/Time")
plt.ylabel("Value")
plt.legend()
plt.grid(True)
plt.show()

### Flow Comparison

In [None]:
def plot_group(ser_dict, key_prefix="FLOW_", title="Flow comparison", resample=None, smooth=None):
    """
    ser_dict: dict[str, pd.Series] (datetime index)
    key_prefix: only plot series whose name starts with this
    resample: e.g. 'H' or 'D' to resample (mean)
    smooth: rolling window size (e.g. 5) for simple smoothing
    """
    plt.figure(figsize=(10, 6))

    keys = [k for k in ser_dict if k.startswith(key_prefix)]
    for k in sorted(keys):
        s = ser_dict[k]
        if resample:
            s = s.resample(resample).mean()
        if smooth:
            s = s.rolling(smooth, min_periods=1).mean()
        plt.plot(s.index, s.values, marker='o', markersize=1, label=k)

    plt.title(title)
    plt.xlabel("Date/Time")
    plt.ylabel("Value")
    plt.legend()
    plt.grid(True)
    plt.show()

# use it:
plot_group(ser, key_prefix="FLOW_", title="Flow comparison")

### Downstream water level

In [None]:
def plot_selected(ser_dict, names, title, resample=None, smooth=None, ylabel="Value"):
    """
    names: list of keys to plot from ser_dict (order preserved)
    resample: e.g. 'H' or 'D' (mean)
    smooth: rolling window (int) for simple smoothing
    """
    plt.figure(figsize=(10, 6))
    for k in names:
        s = ser_dict[k]
        if resample:
            s = s.resample(resample).mean()
        if smooth:
            s = s.rolling(smooth, min_periods=1).mean()
        plt.plot(s.index, s.values, marker="o", markersize=1, label=k)
    plt.title(title)
    plt.xlabel("Date/Time")
    plt.ylabel(ylabel)
    plt.legend()
    plt.grid(True)
    plt.show()

# your figure:
plot_selected(ser, ["DSL_a"], title="Downstream water level")

## Zoomed

In [None]:
def plot_zoom(ser_dict, names, title, start=None, end=None, ylabel="Value"):
    plt.figure(figsize=(10, 6))
    for k in names:
        s = ser_dict[k]
        if start or end:
            s = s.loc[start:end]
        plt.plot(s.index, s.values, marker="o", markersize=1, label=k)
    plt.title(title)
    plt.xlabel("Date/Time")
    plt.ylabel(ylabel)
    plt.legend()
    plt.grid(True)
    plt.show()

# your zoomed plot:
plot_zoom(
    ser,
    ["USL_a"],
    title="Upstream water level",
    start="2023-12-10",
    end="2024-01-25"
)


In [None]:
# zoomed flow comparison
plot_zoom(
    ser,
    ["FLOW_a"],
    title="Flow comparison",
    start="2023-12-10",
    end="2024-01-25"
)


In [None]:
# zoomed downstream comparison
plot_zoom(
    ser,
    ["DSL_a"],
    title="Downstream water level",
    start="2023-12-10",
    end="2024-01-25"
)

## Linear Interpolation

In [57]:
def resample_interp(df, value_col="Event Value", rule="1T", method="linear"):
    """
    Resample to a regular grid (e.g. 1-minute) and linearly interpolate.
    Returns a DataFrame with the same columns on the new index.
    """
    out = df.resample(rule).mean()
    return out.interpolate(method=method)

def plot_series(named_series, title, ylabel="Value"):
    """
    Plot multiple series that already share a datetime index.
    named_series: list of (label, series)
    """
    plt.figure(figsize=(10, 6))
    for label, s in named_series:
        plt.plot(s.index, s.values, marker="o", markersize=1, label=label)
    plt.title(title)
    plt.xlabel("Date/Time")
    plt.ylabel(ylabel)
    plt.legend()
    plt.grid(True)
    plt.show()

def zoom(s, start=None, end=None):
    """Slice a Series (or DataFrame) by date range if provided."""
    if start or end:
        return s.loc[start:end]
    return s

In [None]:
# resample + interpolate upstream series ---------------------
# NOTE: apply any offsets BEFORE interpolation

usl_a = resample_interp(ser["USL_a"].to_frame("Value"), value_col="Value", rule="1T")["Value"]
usl_b = resample_interp(ser["USL_b"].to_frame("Event Value"), value_col="Event Value", rule="1T")["Event Value"]

# quick plot of the interpolated upstream series ------------------------
plot_series(
    [
        ("USL_b", usl_b),
        ("USL_a", usl_a),
    ],
    title="Upstream water level (interpolated)"
)

In [None]:
# zoomed view (drop-in replacement for your start/end blocks) -----------
start_date = "2023-12-10"
end_date   = "2024-01-25"

plot_series(
    [
        ("USL_a", zoom(usl_a, start_date, end_date)),
        ("USL_b", zoom(usl_b,   start_date, end_date)),
    ],
    title="Upstream water level (zoomed, interpolated)"
)

In [None]:
# --- 3) compute and plot the difference (R7 minus 2007) -----------------------
# Since both are on the same 1-minute grid, just subtract.
diff_b_minus_a = usl_b - usl_a
plot_series(
    [("difference (b - a)", zoom(diff_b_minus_a, start_date, end_date))],
    title="Water level difference between regulator and outlet (zoomed)"
)

In [None]:
# another zoom window example (March 2023) ------------------------------
start_date2 = "2023-03-01 00:00:00"
end_date2   = "2023-03-24 00:00:00"

plot_series(
    [("difference (b - a)", zoom(diff_b_minus_a, start_date2, end_date2))],
    title="Water level difference between regulator and outlet (Mar, 2023)"
)

## Sensor reading alarms

### Visualizing alarms

In [None]:
# --- ensure we have the difference series -------------------------------------
# If you already have merged_df['difference'], use it; otherwise compute from ser dict.
try:
    diff = merged_df["difference"]
except NameError:
    # build from your interpolated series (assumes you ran the interpolation cell)
    # usl_a, usl_b should exist; if not, compute them as you did earlier.
    diff = (usl_b - usl_a).dropna()
    merged_df = pd.DataFrame({"difference": diff})

# Optional zoom window
start, end = None, None  # e.g. "2023-12-10", "2024-01-25"
if start or end:
    diff = diff.loc[start:end]

# --- Step 3: thresholds --------------------------------------------------------
thresholds = [-0.02, -0.03, -0.04, -0.05, -0.06]  # least to most severe

# --- Step 4–6: sample-level alarms & counts -----------------------------------
alarms = {t: diff[diff <= t] for t in thresholds}              # times where diff <= t
alarm_counts = {t: int(alarms[t].shape[0]) for t in thresholds}  # sample counts

# --- Step 7: plot difference + alarm markers at each threshold ----------------
plt.figure(figsize=(14, 7))
plt.plot(diff.index, diff.values, label="Difference", linestyle="--")

for t in thresholds:
    # threshold line
    plt.axhline(t, linestyle="--", alpha=0.3)
    # scatter markers at the times alarms occur (y fixed at the threshold for clarity)
    alarm_times = alarms[t].index
    plt.scatter(alarm_times, [t]*len(alarm_times), s=12, label=f"alarm ≤ {t}")

plt.xlabel("Date")
plt.ylabel("Difference (b - a)")
plt.title("Water level difference and alarms")
plt.legend(ncol=2)
plt.grid(True)
plt.show()

print("Sample alarm counts (diff ≤ threshold):")
for t in thresholds:
    print(f"  {t}: {alarm_counts[t]} samples")

### Zoomed

In [None]:
# plot alarms with zoom option -------------------------------------
def plot_alarms(diff_df, thresholds, start=None, end=None, title="Alarms"):
    """
    diff_df : DataFrame with a 'difference' column indexed by datetime
    thresholds : list of thresholds to check
    start, end : optional date strings to zoom
    """
    # zoom if requested
    data = diff_df.loc[start:end] if (start or end) else diff_df
    
    # collect alarms
    alarms = {t: data[data["difference"] <= t] for t in thresholds}
    alarm_counts = {t: len(df) for t, df in alarms.items()}
    
    # plot
    plt.figure(figsize=(14, 7))
    plt.plot(data.index, data["difference"], label="Difference", linestyle="--")
    
    for t in thresholds:
        times = alarms[t].index
        plt.axhline(t, linestyle="--", alpha=0.3)  # optional threshold line
        plt.scatter(times, [t]*len(times), s=12, label=f"≤ {t}")
    
    plt.xlabel("Date")
    plt.ylabel("Difference (R7 - 2007)")
    plt.title(title)
    plt.legend(ncol=2)
    plt.grid(True)
    plt.show()
    
    # print counts
    print("Alarm counts (samples ≤ threshold):")
    for t, c in alarm_counts.items():
        print(f"  {t}: {c} samples")

# --- use it for your zoom window ---
plot_alarms(
    merged_df,
    thresholds=[-0.02, -0.03, -0.04, -0.05, -0.06],
    start="2024-01-22", end="2024-01-24",
    title="Sensor Readings and Alarms: Y4R7 (Zoomed)"
)


### Setting thresholds

In [None]:
# --- Load the merged DataFrame (if not already done) --------------------------
# --- Config -------------------------------------------------------------------
thresholds = [-0.05, -0.04, -0.03, -0.02, 0.02, 0.03, 0.04, 0.05]  # both sides
start_date = "2023-01-31"   # or None
end_date   = "2024-01-24"   # or None
inclusive  = True           # True => use <= / >= ; False => use < / >

# Prepare the windowed data --------------------------------------------
data = merged_df.loc[start_date:end_date] if (start_date or end_date) else merged_df
diff = data["difference"]

# Build alarms per threshold (both sides) -------------------------------
def pick_hits(series: pd.Series, thr: float, inclusive: bool = True) -> pd.Series:
    if thr < 0:
        return series[series <= thr] if inclusive else series[series < thr]
    else:
        return series[series >= thr] if inclusive else series[series > thr]

alarms = {t: pick_hits(diff, t, inclusive=inclusive) for t in thresholds}
alarm_counts = {t: int(s.shape[0]) for t, s in alarms.items()}

# Plot the difference + threshold lines + alarm markers -----------------
plt.figure(figsize=(14, 8))
plt.plot(diff.index, diff.values, label="Difference", linestyle="--")

for t, s in alarms.items():
    # threshold reference line
    plt.axhline(t, linestyle="--", alpha=0.3)
    # scatter actual difference values where alarm triggers
    if not s.empty:
        marker = "x" if t < 0 else "o"
        plt.scatter(s.index, s.values, label=f"{'≤' if t<0 else '≥'} {t:+.2f}", s=14, marker=marker)

plt.axhline(0.0, linestyle="--", alpha=0.25)
plt.xlabel("Date")
plt.ylabel("Difference (b - a)")
plt.title("Water Level Sensor Differences and Alarms")
plt.legend(ncol=2)
plt.grid(True)
plt.show()

# Print counts ----------------------------------------------------------
print("Alarm counts (samples meeting each threshold):")
for t in thresholds:
    print(f"  {t:+.2f}: {alarm_counts[t]}")

# --- 5) Save alarms to CSV safely --------------------------------------------
rows = []
for t, s in alarms.items():
    if not s.empty:
        rows.append(
            pd.DataFrame({
                "timestamp": s.index,
                "difference": s.values,
                "threshold": t,
                "direction": "neg" if t < 0 else "pos",
            })
        )

if rows:
    all_alarms = pd.concat(rows, ignore_index=True)
else:
    all_alarms = pd.DataFrame(columns=["timestamp", "difference", "threshold", "direction"])

# all_alarms.to_csv("alarms_comparison.csv", index=False)
print("\nSaved: alarms_comparison.csv")
print(all_alarms.head())
