# Anomaly Detection Debug Notebook

This notebook helps you interactively debug the baseline z‑score anomaly detection flow for 15‑minute `consommation` data.

## 1. Load Dependencies and Configure Environment

Load libraries, set logging, and configure environment defaults used in the debugging steps.

In [14]:
!pip install matplotlib pandas polars pendulum requests prefect --quiet

In [15]:
import json
import logging
import os
from typing import Dict, List, Tuple
import matplotlib.pyplot as plt
import pandas as pd
import pendulum
import requests
import polars as pl

DATA_API_URL = "http://localhost:8005"
os.environ["DATA_API_URL"] = DATA_API_URL   # use mapped port from docker ps

pd.set_option("display.max_columns", 50)
pd.set_option("display.width", 120)


def build_url(path: str) -> str:
    """Join the API base URL with a path."""
    return f"{DATA_API_URL.rstrip('/')}/{path.lstrip('/')}"


## 2. Load Data and Initialize Pipeline

Fetch historical `consommation` measurements from the Data API and confirm inputs are present.

In [16]:
import sys
sys.path.insert(0, '/root/demo_power_sense/services/prefect_worker')


In [17]:
def _to_iso(ts) -> str:
    """Convert a datetime-like object to ISO-8601 string using pendulum."""
    # Accept datetime or pendulum.DateTime
    try:
        p = pendulum.instance(ts)
    except Exception:
        # fallback: convert via str()
        return str(ts)
    # ensure UTC
    if p.tzinfo is None:
        p = p.in_timezone("UTC")
    else:
        p = p.in_timezone("UTC")
    return p.to_iso8601_string()

In [18]:
# Use the project's anomaly detection tasks to fetch history
from anomaly_detection.tasks import _chunk_time_ranges
from db import fetch_measurements
metric = 'consommation'
now = pendulum.now("UTC")
history_start = now - pendulum.duration(days=84)
ANOMALY_EVAL_HOURS = 24
eval_start = now - pendulum.duration(hours=ANOMALY_EVAL_HOURS)
chunk_hours=24

rows: List[dict] = []
for window_start, window_end in _chunk_time_ranges(history_start, now, chunk_hours):

    chunk = fetch_measurements(
                start_ts=_to_iso(window_start),
                end_ts=_to_iso(window_end),
                metric=metric,
                order="asc",
            )
    rows.extend(chunk)

df_history = pl.from_dicts(rows)
df_history

ts,source,metric,value,ukey,version,inserted_at
str,str,str,f64,str,i64,str
"""2025-10-30T13:15:00+00:00""","""France""","""consommation""",53222.0,"""{""date"":""20251030"",""metric"":""c…",1,"""2026-01-21T17:59:53.602000+00:…"
"""2025-10-30T13:30:00+00:00""","""France""","""consommation""",52729.0,"""{""date"":""20251030"",""metric"":""c…",1,"""2026-01-21T17:59:53.602000+00:…"
"""2025-10-30T13:45:00+00:00""","""France""","""consommation""",52230.0,"""{""date"":""20251030"",""metric"":""c…",1,"""2026-01-21T17:59:53.602000+00:…"
"""2025-10-30T14:00:00+00:00""","""France""","""consommation""",52295.0,"""{""date"":""20251030"",""metric"":""c…",1,"""2026-01-21T17:59:53.602000+00:…"
"""2025-10-30T14:15:00+00:00""","""France""","""consommation""",51986.0,"""{""date"":""20251030"",""metric"":""c…",1,"""2026-01-21T17:59:53.602000+00:…"
…,…,…,…,…,…,…
"""2026-01-22T11:45:00+00:00""","""France""","""consommation""",68074.0,"""{""date"":""20260122"",""metric"":""c…",1,"""2026-01-22T12:04:22.665000+00:…"
"""2026-01-22T12:00:00+00:00""","""France""","""consommation""",67192.0,"""{""date"":""20260122"",""metric"":""c…",1,"""2026-01-22T12:19:22.260000+00:…"
"""2026-01-22T12:15:00+00:00""","""France""","""consommation""",67412.0,"""{""date"":""20260122"",""metric"":""c…",1,"""2026-01-22T12:34:22.227000+00:…"
"""2026-01-22T12:30:00+00:00""","""France""","""consommation""",66235.0,"""{""date"":""20260122"",""metric"":""c…",1,"""2026-01-22T12:49:21.845000+00:…"
