In [1]:
import numpy as np
import pandas as pd

In [None]:
# import numpy as np
# import pandas as pd

# def temporal_anomaly_scoring(
#     monitoring_cube,
#     BaselineModel,
#     features=("NDVI", "NBR", "BSI", "B11", "B12"),
# ):
#     """
#     STEP 3 — Temporal Anomaly Scoring (Inference)

#     Parameters
#     ----------
#     monitoring_cube : xarray.Dataset
#         Dimensions: (time, y, x)
#         Variables : spectral indices / bands

#     BaselineModel : dict
#         {
#             "scaler": fitted StandardScaler,
#             "pca": fitted PCA
#         }

#     features : list or tuple
#         Feature names in correct order (MUST match baseline)

#     Returns
#     -------
#     outputs : dict
#         {
#             "monitor_df"    : DataFrame with anomaly_score,
#             "anomaly_maps" : dict[datetime -> xarray.DataArray],
#             "stats"        : dict (summary statistics)
#         }
#     """

#     # ─────────────────────────────────────────────
#     # 1. Flatten monitoring cube
#     # ─────────────────────────────────────────────
#     monitor_df = (
#         monitoring_cube
#         .to_dataframe()
#         .reset_index()
#         .dropna()
#     )

#     # ─────────────────────────────────────────────
#     # 2. Load baseline model
#     # ─────────────────────────────────────────────
#     scaler = BaselineModel["scaler"]
#     pca    = BaselineModel["pca"]

#     # ─────────────────────────────────────────────
#     # 3. Normalize → project → reconstruct
#     # ─────────────────────────────────────────────
#     X_mon = scaler.transform(monitor_df[list(features)])
#     X_latent = pca.transform(X_mon)
#     X_recon = pca.inverse_transform(X_latent)

#     # ─────────────────────────────────────────────
#     # 4. Compute anomaly score
#     # ─────────────────────────────────────────────
#     monitor_df["anomaly_score"] = np.mean(
#         (X_mon - X_recon) ** 2,
#         axis=1
#     )

#     # ─────────────────────────────────────────────
#     # 5. Rebuild anomaly rasters (per date)
#     # ─────────────────────────────────────────────
#     anomaly_maps = {}

#     for t in monitor_df["time"].unique():
#         df_t = monitor_df[monitor_df["time"] == t]

#         anomaly_maps[pd.to_datetime(t)] = (
#             df_t
#             .set_index(["y", "x"])["anomaly_score"]
#             .to_xarray()
#         )

#     # ─────────────────────────────────────────────
#     # 6. Summary statistics (VERY IMPORTANT later)
#     # ─────────────────────────────────────────────
#     stats = {
#         "mean": float(monitor_df["anomaly_score"].mean()),
#         "std": float(monitor_df["anomaly_score"].std()),
#         "p90": float(monitor_df["anomaly_score"].quantile(0.90)),
#         "p95": float(monitor_df["anomaly_score"].quantile(0.95)),
#         "p99": float(monitor_df["anomaly_score"].quantile(0.99)),
#         "min": float(monitor_df["anomaly_score"].min()),
#         "max": float(monitor_df["anomaly_score"].max()),
#         "n_pixels": int(len(monitor_df))
#     }

#     return monitor_df, anomaly_maps, stats
    


In [1]:
# temporial_anomaly_pipeline_fixed.py

import numpy as np
import pandas as pd

def temporal_anomaly_scoring_safe(
    monitoringcube,
    BaselineModel,
    features=("NDVI", "NBR", "BSI", "B11", "B12"),
):
    """
    Safer temporal anomaly scoring:
    - Does not drop all NaNs globally.
    - Computes scores per-date only for valid pixels, then merges back.
    """
    scaler = BaselineModel["scaler"]
    pca = BaselineModel["pca"]
    feats = list(features)

    # Convert cube to long DataFrame with possible NaNs
    df_all = monitoringcube[feats].to_dataframe().reset_index()

    # Prepare output column
    df_all["anomalyscore"] = np.nan

    # Compute anomaly per time slice on valid rows
    for t, dft in df_all.groupby("time"):
        mask_valid = dft[feats].notna().all(axis=1)
        if not mask_valid.any():
            continue

        Xmon = dft.loc[mask_valid, feats].values
        Xscaled = scaler.transform(Xmon)
        Xlatent = pca.transform(Xscaled)
        Xrecon = pca.inverse_transform(Xlatent)
        recon_err = np.mean((Xscaled - Xrecon) ** 2, axis=1)

        df_all.loc[dft.index[mask_valid], "anomalyscore"] = recon_err

    # Build xarray anomaly maps
    anomalymaps = {}
    for t, dft in df_all.groupby("time"):
        anomalymaps[pd.to_datetime(t)] = (
            dft.set_index(["y", "x"])["anomalyscore"]
            .to_xarray()
            .rename("anomalyscore")
        )

    stats = {
        "mean": float(df_all["anomalyscore"].mean(skipna=True)),
        "std": float(df_all["anomalyscore"].std(skipna=True)),
        "p90": float(df_all["anomalyscore"].quantile(0.90)),
        "p95": float(df_all["anomalyscore"].quantile(0.95)),
        "p99": float(df_all["anomalyscore"].quantile(0.99)),
        "min": float(df_all["anomalyscore"].min(skipna=True)),
        "max": float(df_all["anomalyscore"].max(skipna=True)),
        "npixels": int(df_all["anomalyscore"].notna().sum()),
    }

    return df_all, anomalymaps, stats


In [None]:
monitor_df, anomaly_maps, stats  = temporal_anomaly_scoring(
    monitoring_cube=monitoring_cube,
    BaselineModel=BaselineModel
)


