# Elering data pipeline (optimized)

See notebook laeb **ühe korraga** Eleringi tegeliku tarbimise **koos plaaniga** (with-plan API),
ja **lisab Eleringi plaani** juba jooksvalt kõikidesse eksporditavatesse CSV-desse.
Lisaks arvutab jooksvalt veerud prognooside vahede jaoks.

In [62]:
# ⚙️ Seaded
from pathlib import Path
from datetime import datetime, timedelta, timezone
import pandas as pd
import requests

# Kuupäevavahemik (viimased X päeva)
DAYS_BACK = 7  # vajadusel muuda

# Väljundkaustad
OUT_DIR = Path("scripts")
OUT_DIR.mkdir(parents=True, exist_ok=True)

# Viitefail meie varasemale CSV-le (kui olemas) — sinna lisame Eleringi plaani ja vahed jooksvalt
FORECAST_COMPARISON_PATH = OUT_DIR / "forecast_vs_actual_comparison_hourly.csv"

# Ajaformaat
def utc_today():
    return datetime.now(timezone.utc).date()

START_DATE = utc_today() - timedelta(days=DAYS_BACK)
END_DATE = utc_today()  # k.a. tänane (kuni 23:00Z)

print(f"Laen andmed vahemikule: {START_DATE} ... {END_DATE}")

Laen andmed vahemikule: 2025-09-16 ... 2025-09-23


## 1) Lae **tegelik tarbimine koos plaaniga** (Elering with-plan CSV)

In [63]:
# Eemalda kõik vanad CSV-failid, mis algavad 'elering_actual_with_plan' enne uue salvestamist
for f in OUT_DIR.glob('elering_actual_with_plan*.csv'):
    try:
        f.unlink()
        print(f'Kustutatud vana fail: {f}')
    except Exception as e:
        print(f'Viga faili kustutamisel: {f} — {e}')

# Elering with-plan CSV — toob välja tegeliku tarbimise ja plaani ühes failis
from urllib.parse import urlencode
import requests
from io import StringIO

BASE_URL = "https://dashboard.elering.ee/api/system/with-plan/csv"

# NB! Elering API nõuab korduvaid 'fields' parameetreid, mitte komaga eraldatud väärtust!
params = [
    ("start", f"{START_DATE}T00:00:00Z"),
    ("end", f"{END_DATE}T23:00:00Z"),
    ("fields", "consumption"),
    ("fields", "consumptionPlan")
]
url = f"{BASE_URL}?{urlencode(params)}"
print("WITH-PLAN URL:", url)

# Lae CSV requests'iga ja loe pandas'ega
resp = requests.get(url)
if resp.status_code == 200:
    with_plan_df = pd.read_csv(StringIO(resp.text), sep=';', encoding='utf-8')
else:
    raise RuntimeError(f"API päring ebaõnnestus! HTTP kood: {resp.status_code}, sisu: {resp.text}")

# Standardiseeri veerud
# Toeta nii eesti- kui ingliskeelseid veerunimesid
rename_map = {
    'Ajatempel (UTC)': 'timestamp',
    'Kuupäev (Eesti aeg)': 'date_local',
    'Tarbimine': 'consumption',
    'Planeeritud tarbimine': 'plan',
    'consumptionPlan': 'plan',
}
with_plan_df = with_plan_df.rename(columns=rename_map)

# Leia ajaveeru nimi
time_col = None
for cand in ["timestamp", "time", "datetime", "ts"]:
    if cand in with_plan_df.columns:
        time_col = cand
        break
if time_col is None:
    raise ValueError(f"Aja veeru nimi on teadmata. Veerud: {list(with_plan_df.columns)}")

# Parsi timestamp õigesti: kas epoch sekundid või ISO string
ts_sample = with_plan_df["timestamp"].iloc[0]
try:
    # Try as float epoch seconds
    ts_val = float(ts_sample)
    with_plan_df["timestamp"] = pd.to_datetime(with_plan_df["timestamp"].astype(float), unit='s', utc=True)
except Exception:
    # Fallback: try as ISO string
    with_plan_df["timestamp"] = pd.to_datetime(with_plan_df["timestamp"], utc=True)

# Puhasta ja teisenda consumption ja plan numbriks
for col in ["consumption", "plan"]:
    if col in with_plan_df.columns:
        with_plan_df[col] = (
            with_plan_df[col]
            .astype(str)
            .str.replace('"', '', regex=False)
            .str.replace(',', '.', regex=False)
            .str.strip()
        )
        with_plan_df[col] = pd.to_numeric(with_plan_df[col], errors="coerce")

# Sorteeri ja eemalda duplikaadid
with_plan_df = (with_plan_df
                .sort_values("timestamp")
                .drop_duplicates(subset=["timestamp"], keep="last")
               )

# Arvuta mõned kontrollveerud (kui plaan on olemas)
if "plan" in with_plan_df.columns:
    with_plan_df["elering_plan_minus_actual"] = with_plan_df["plan"] - with_plan_df["consumption"]

print("with_plan_df veerud:", list(with_plan_df.columns))
print(with_plan_df.head())

# Salvesta kontrolliks eraldi CSV alati sama nimega (kirjutab üle)
wplan_out = OUT_DIR / "elering_actual_with_plan.csv"
with_plan_df.to_csv(wplan_out, sep=";", index=False, encoding="utf-8")
print("Salvestasin:", wplan_out)

# --- Lisa ka vanade failinimede uuendamine ---
# Salvesta ka failina elering_actual_with_plan_from_YYYYMMDD.csv (nagu vanas scriptis)
wplan_out_dated = OUT_DIR / f"elering_actual_with_plan_from_{datetime.now(timezone.utc).strftime('%Y%m%d')}.csv"
with_plan_df.to_csv(wplan_out_dated, sep=";", index=False, encoding="utf-8")
print("Salvestasin ka:", wplan_out_dated)

Kustutatud vana fail: scripts\elering_actual_with_plan.csv
Kustutatud vana fail: scripts\elering_actual_with_plan_from_20250920.csv
Kustutatud vana fail: scripts\elering_actual_with_plan_from_20250923.csv
WITH-PLAN URL: https://dashboard.elering.ee/api/system/with-plan/csv?start=2025-09-16T00%3A00%3A00Z&end=2025-09-23T23%3A00%3A00Z&fields=consumption&fields=consumptionPlan
with_plan_df veerud: ['timestamp', 'date_local', 'consumption', 'plan', 'elering_plan_minus_actual']
                  timestamp        date_local  consumption     plan  \
0 2025-09-16 00:00:00+00:00  16.09.2025 03:00        662.4  679.350   
1 2025-09-16 01:00:00+00:00  16.09.2025 04:00        665.4  673.775   
2 2025-09-16 02:00:00+00:00  16.09.2025 05:00        690.4  712.700   
3 2025-09-16 03:00:00+00:00  16.09.2025 06:00        799.4  757.050   
4 2025-09-16 04:00:00+00:00  16.09.2025 07:00        907.6  911.425   

   elering_plan_minus_actual  
0                     16.950  
1                      8.375  
2  

In [64]:
# Kuvab scripts-kausta kõik CSV-väljundfailid
import os
print("Väljundfailid kaustas 'scripts':")
for fname in os.listdir(OUT_DIR):
    if fname.endswith('.csv'):
        print("-", fname)

Väljundfailid kaustas 'scripts':
- dq_report.csv
- elering_actual_from_20250920.csv
- elering_actual_with_plan.csv
- elering_actual_with_plan_from_20250923.csv
- elering_forecast_from_20250916.csv
- elering_forecast_from_20250919.csv
- elering_forecast_from_20250920.csv
- forecast_skill_summary.csv
- forecast_vs_actual_comparison_hourly.csv
- forecast_vs_actual_comparison_hourly_filtered.csv


In [65]:
# Kontrolli, kas failid on päriselt olemas ja kuvatavad Windows Exploreris
import os
from pathlib import Path

print('Kontrollin, kas väljundfailid eksisteerivad ja on kirjutatavad:')
for fname in os.listdir(OUT_DIR):
    if fname.endswith('.csv'):
        fpath = OUT_DIR / fname
        print(f'- {fname}:', 'OLEMAS' if fpath.exists() else 'PUUDUB', '| Suurus:', fpath.stat().st_size, 'baiti', '| Muudetud:', fpath.stat().st_mtime)

# Proovi kirjutada testfail
test_path = OUT_DIR / 'test_write_check.txt'
try:
    with open(test_path, 'w', encoding='utf-8') as f:
        f.write('Test kirjutamine õnnestus!')
    print('Testfail kirjutatud:', test_path)
except Exception as e:
    print('Testfaili kirjutamine ebaõnnestus:', e)

# Kustuta testfail
try:
    test_path.unlink()
    print('Testfail kustutatud.')
except Exception:
    pass

Kontrollin, kas väljundfailid eksisteerivad ja on kirjutatavad:
- dq_report.csv: OLEMAS | Suurus: 352 baiti | Muudetud: 1758544789.6412964
- elering_actual_from_20250920.csv: OLEMAS | Suurus: 2702 baiti | Muudetud: 1758603567.156417
- elering_actual_with_plan.csv: OLEMAS | Suurus: 13747 baiti | Muudetud: 1758603634.5729966
- elering_actual_with_plan_from_20250923.csv: OLEMAS | Suurus: 13747 baiti | Muudetud: 1758603634.5886455
- elering_forecast_from_20250916.csv: OLEMAS | Suurus: 4042 baiti | Muudetud: 1758274005.7700217
- elering_forecast_from_20250919.csv: OLEMAS | Suurus: 4045 baiti | Muudetud: 1758544287.6714277
- elering_forecast_from_20250920.csv: OLEMAS | Suurus: 4014 baiti | Muudetud: 1758603567.4761894
- forecast_skill_summary.csv: OLEMAS | Suurus: 311 baiti | Muudetud: 1758544913.2940848
- forecast_vs_actual_comparison_hourly.csv: OLEMAS | Suurus: 2685 baiti | Muudetud: 1758603567.4842463
- forecast_vs_actual_comparison_hourly_filtered.csv: OLEMAS | Suurus: 7763 baiti | Muud

## 2) Lisa Eleringi plaan **meie jooksvale võrdlusfailile** (kui see eksisteerib)

In [66]:
# Kui meil on juba olemas 'forecast_vs_actual_comparison_hourly.csv', siis lisame sinna Eleringi plaani ja vahed.
import pandas as pd
from pathlib import Path

if Path(FORECAST_COMPARISON_PATH).exists():
    df = pd.read_csv(FORECAST_COMPARISON_PATH, sep=";", encoding="utf-8")
    # Leia ajaveerg meie failis (toeta ka eesti keelt)
    time_col = None
    for cand in ["timestamp","time","datetime","ts","Ajatempel (UTC)"]:
        if cand in df.columns:
            time_col = cand
            break
    if time_col is None:
        # kui ei leia, pakume välja tüüpilise veeru
        raise ValueError(f"Ei leidnud ajaveergu võrdlusfailis. Veerud: {list(df.columns)}")
    df = df.rename(columns={time_col: "timestamp"})
    df["timestamp"] = pd.to_datetime(df["timestamp"], utc=True)
    
    # Eemalda olemasolev elering_plan veerg, kui see on juba olemas
    if "elering_plan" in df.columns:
        df = df.drop(columns=["elering_plan"])
    
    # Valmistame liitumiseks väikse with_plan subseti
    wps = with_plan_df[["timestamp","plan"]].rename(columns={"plan": "elering_plan"})
    
    merged = df.merge(wps, on="timestamp", how="left")
    
    # Eelda, et meie prognoosi veerg on 'forecast' või 'prediction'
    forecast_col = None
    for cand in ["forecast","prediction","our_forecast","model_forecast"]:
        if cand in merged.columns:
            forecast_col = cand
            break
    
    # Lisa vahed
    if forecast_col is not None:
        merged["diff_forecast_vs_elering_plan"] = merged[forecast_col] - merged["elering_plan"]
        merged["diff_forecast_vs_actual"] = merged[forecast_col] - merged.get("actual", merged.get("consumption", pd.NA))
    else:
        merged["diff_forecast_vs_elering_plan"] = pd.NA
        merged["diff_forecast_vs_actual"] = pd.NA
    
    # Kui olemas 'actual' ega mitte 'consumption', proovi lisada ka plan - actual vahe
    if "actual" in merged.columns:
        merged["diff_elering_plan_vs_actual"] = merged["elering_plan"] - merged["actual"]
    elif "consumption" in merged.columns:
        merged["diff_elering_plan_vs_actual"] = merged["elering_plan"] - merged["consumption"]
    else:
        merged["diff_elering_plan_vs_actual"] = pd.NA
    
    # Salvesta tagasi — alati sama nimega (kirjutab üle)
    merged.to_csv(FORECAST_COMPARISON_PATH, sep=";", index=False, encoding="utf-8")
    print("Uuendasin võrdlusfaili:", FORECAST_COMPARISON_PATH)
    print("Uued veerud lisatud: 'elering_plan', 'diff_forecast_vs_elering_plan', 'diff_elering_plan_vs_actual' (ja 'diff_forecast_vs_actual' kui võimalik)")
else:
    print("Võrdlusfaili ei leitud — jätan selle sammu vahele. Kui fail tekib hiljem, jooksuta see samm uuesti.")

Uuendasin võrdlusfaili: scripts\forecast_vs_actual_comparison_hourly.csv
Uued veerud lisatud: 'elering_plan', 'diff_forecast_vs_elering_plan', 'diff_elering_plan_vs_actual' (ja 'diff_forecast_vs_actual' kui võimalik)


## 3) Abi-funktsioon: torustik ühe funktsioonikutsena

In [67]:
def elering_pipeline(days_back=7, forecast_csv_path=FORECAST_COMPARISON_PATH):
    """Lae Eleringi actual+plan, salvesta eraldi CSV; soovi korral sega plaan juurde
    olemasolevasse võrdlusfaili.

    Tagastab (with_plan_df, merged_df_or_None).
    """
    from datetime import datetime, timedelta, timezone
    import pandas as pd
    from urllib.parse import urlencode
    from pathlib import Path
    
    START = (datetime.now(timezone.utc).date() - timedelta(days=days_back)).strftime("%Y-%m-%d")
    END = datetime.now(timezone.utc).date().strftime("%Y-%m-%d")
    
    BASE_URL = "https://dashboard.elering.ee/api/system/with-plan/csv"
    params = {"start": f"{START}T00:00:00Z", "end": f"{END}T23:00:00Z", "fields": "consumption,plan"}
    url = f"{BASE_URL}?{urlencode(params)}"
    
    wpdf = pd.read_csv(url)
    # normalize
    time_col = None
    for cand in ["timestamp","time","datetime","ts"]:
        if cand in wpdf.columns: time_col = cand; break
    if time_col is None: raise ValueError("Puudub aja veerg (timestamp/time/datetime/ts)")
    wpdf = wpdf.rename(columns={time_col:"timestamp"})
    if "consumption" not in wpdf.columns:
        for cand in wpdf.columns:
            if cand.lower() in ["consumption","actual","consumed","load"]:
                wpdf = wpdf.rename(columns={cand:"consumption"}); break
    if "plan" not in wpdf.columns:
        for cand in wpdf.columns:
            if cand.lower() in ["plan","planned","forecast","consumptionplan","elering_plan"]:
                wpdf = wpdf.rename(columns={cand:"plan"}); break
    wpdf["timestamp"] = pd.to_datetime(wpdf["timestamp"], utc=True)
    wpdf = (wpdf.sort_values("timestamp").drop_duplicates(subset=["timestamp"], keep="last"))
    if "plan" in wpdf.columns:
        wpdf["elering_plan_minus_actual"] = wpdf["plan"] - wpdf["consumption"]
    
    out_path = OUT_DIR / f"elering_actual_with_plan_{datetime.now(timezone.utc).strftime('%Y%m%d')}.csv"
    wpdf.to_csv(out_path, sep=";", index=False, encoding="utf-8")
    
    merged = None
    if Path(forecast_csv_path).exists():
        df = pd.read_csv(forecast_csv_path, sep=";", encoding="utf-8")
        tcol = None
        for cand in ["timestamp","time","datetime","ts"]:
            if cand in df.columns: tcol=cand; break
        if tcol is None: raise ValueError("Ei leidnud ajaveergu võrdlusfailis")
        df = df.rename(columns={tcol:"timestamp"})
        df["timestamp"] = pd.to_datetime(df["timestamp"], utc=True)
        wps = wpdf[["timestamp","plan"]].rename(columns={"plan":"elering_plan"})
        merged = df.merge(wps, on="timestamp", how="left")
        fcol = None
        for cand in ["forecast","prediction","our_forecast","model_forecast"]:
            if cand in merged.columns: fcol = cand; break
        if fcol is not None:
            merged["diff_forecast_vs_elering_plan"] = merged[fcol] - merged["elering_plan"]
            merged["diff_forecast_vs_actual"] = merged[fcol] - merged.get("actual", merged.get("consumption", pd.NA))
        if "actual" in merged.columns:
            merged["diff_elering_plan_vs_actual"] = merged["elering_plan"] - merged["actual"]
        elif "consumption" in merged.columns:
            merged["diff_elering_plan_vs_actual"] = merged["elering_plan"] - merged["consumption"]
        else:
            merged["diff_elering_plan_vs_actual"] = pd.NA
        merged.to_csv(forecast_csv_path, sep=";", index=False, encoding="utf-8")
    return wpdf, merged

print("✅ Pipeline on defineeritud. Käivita: with_plan_df, merged = elering_pipeline(DAYS_BACK, FORECAST_COMPARISON_PATH)")

✅ Pipeline on defineeritud. Käivita: with_plan_df, merged = elering_pipeline(DAYS_BACK, FORECAST_COMPARISON_PATH)


# Lisa meie forecasti võrdlus Eleringi actual+plan andmetega
Järgnev plokk otsib viimase forecast-faili, liidab selle Eleringi actual+plan andmetega ja arvutab vead.

## Forecast-only Data Quality Check (kolitud eraldi faili)
See QC plokk on eraldatud uude notebooki: `scripts/forecast_qc.ipynb`.
Käivita ja vaata QC raportid sealt.

7) Drift: viimane vs eelmine forecast (Code-rakk)

8) Kiired graafikud (ajalugu + jaotus) (Code-rakk)

## Visualiseerimine: Eleringi plaan vs meie prognoos (ja tegelik)
Alljärgnevad graafikud eeldavad, et fail `scripts/forecast_vs_actual_comparison_hourly.csv` on olemas.
Kasutame robustset veerunimede tuvastust (eng/est) ja kuvame nii ajarea, hajuvusgraafiku kui ka veahistogrammi.
Pildid salvestatakse kausta `scripts/output/plots/`.

In [68]:
# Visualize errors vs actual: Eleringi plan vs Our forecast, with date filter and refresh button
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Try widgets; fall back if not available
try:
    import ipywidgets as widgets
    from IPython.display import display, clear_output
    USE_WIDGETS = True
except Exception:
    USE_WIDGETS = False

# Paths
cmp_path = Path("scripts") / "forecast_vs_actual_comparison_hourly.csv"
plots_dir = Path("scripts") / "output" / "plots"
plots_dir.mkdir(parents=True, exist_ok=True)

# Robust CSV reader (encodings+separators)
def _read_csv_robust(path: Path):
    encodings = ["utf-8", "utf-8-sig", "cp1257", "windows-1257", "cp1252", "latin-1", "iso-8859-1"]
    seps = [";", ","]
    last_err = None
    for enc in encodings:
        for sep in seps:
            try:
                df_local = pd.read_csv(path, encoding=enc, sep=sep)
                return df_local, enc, sep
            except Exception as e:
                last_err = e
                continue
    raise last_err if last_err else RuntimeError("CSV reading failed for unknown reasons")

if not cmp_path.exists():
    print(f"Ei leidnud võrdlust: {cmp_path}. Käivita esmalt Eleringi with-plan pipeline.")
else:
    df_raw, used_enc, used_sep = _read_csv_robust(cmp_path)
    if df_raw.empty:
        print(f"Tühi fail: {cmp_path}")
    else:
        # Detect columns
        time_candidates = [
            "datetime_hour", "timestamp", "time", "datetime", "ts", "Ajatempel (UTC)", "time_utc"
        ]
        plan_candidates = [
            "elering_plan", "consumptionPlan", "Planeeritud tarbimine", "plan"
        ]
        our_candidates = [
            "consumption_hourly", "our_forecast", "forecast", "prediction", "model_forecast",
            "yhat_consumption", "yhat_base"
        ]
        actual_candidates = [
            "consumption", "actual", "Tarbimine"
        ]

        def pick(cols):
            return next((c for c in cols if c in df_raw.columns), None)

        time_col = pick(time_candidates)
        plan_col = pick(plan_candidates)
        our_col = pick(our_candidates)
        actual_col = pick(actual_candidates)

        if time_col is None:
            raise ValueError(f"Ei leidnud ajaveergu. Veerud: {list(df_raw.columns)}")
        if actual_col is None:
            raise ValueError("'Tegelik' veerg puudub (nt 'consumption'). Vigade graafikuid ei saa koostada.")
        if (plan_col is None) and (our_col is None):
            raise ValueError(
                f"Puuduvad prognoosi veerud. Otsisin {plan_candidates+our_candidates}, olemas on {list(df_raw.columns)}"
            )

        # Prepare and clean dataframe once
        df = df_raw.copy()
        df["timestamp"] = pd.to_datetime(df[time_col], utc=True, errors="coerce")
        df = df.dropna(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)

        def to_num(s):
            if s.dtype.kind in "biufc":
                return pd.to_numeric(s, errors="coerce")
            return pd.to_numeric(
                s.astype(str).str.replace(" ", "", regex=False).str.replace(",", ".", regex=False),
                errors="coerce",
            )

        for c in [plan_col, our_col, actual_col]:
            if c is not None and c in df.columns:
                df[c] = to_num(df[c])
                df.loc[(df[c] < 0) | (df[c] > 3000), c] = np.nan

        # Keep rows where actual is present; forecasts may be NaN independently
        df = df[df[actual_col].notna()].copy()

        # Reporting
        print(f"Loetud: {cmp_path} (enc={used_enc}, sep='{used_sep}')")
        print({"aeg": time_col, "elering_plan": plan_col, "meie_prognoos": our_col, "tegelik": actual_col})
        print(f"Read pärast puhastust: {len(df)}")

        # Filtering helper
        def apply_filter(df_in: pd.DataFrame, range_key: str) -> tuple[pd.DataFrame, str]:
            if df_in.empty:
                return df_in, "empty"
            max_ts = df_in["timestamp"].max()
            if range_key == "Last 7 days":
                start = max_ts - pd.Timedelta(days=7)
                return df_in[df_in["timestamp"] >= start].copy(), "last7d"
            elif range_key == "Last 30 days":
                start = max_ts - pd.Timedelta(days=30)
                return df_in[df_in["timestamp"] >= start].copy(), "last30d"
            else:
                return df_in.copy(), "all"

        # Plotting helper: show only errors vs actual
        def make_plots(df_in: pd.DataFrame, suffix: str, label: str):
            has_plan = plan_col is not None and plan_col in df_in.columns and df_in[plan_col].notna().any()
            has_our = our_col is not None and our_col in df_in.columns and df_in[our_col].notna().any()
            has_actual = actual_col is not None and actual_col in df_in.columns and df_in[actual_col].notna().any()

            if not has_actual:
                print("Tegelik tarbimine puudub, ei saa vigu arvutada.")
                return

            # Compute errors (forecast - actual)
            err_plan = None
            err_our = None
            if has_plan:
                err_plan = (df_in[plan_col] - df_in[actual_col]).dropna()
            if has_our:
                err_our = (df_in[our_col] - df_in[actual_col]).dropna()

            if (err_plan is None or err_plan.empty) and (err_our is None or err_our.empty):
                print("Pole andmeid vigade graafikute jaoks.")
                return

            # Metrics
            mae_plan = float(err_plan.abs().mean()) if (err_plan is not None and not err_plan.empty) else None
            rmse_plan = float(np.sqrt((err_plan ** 2).mean())) if (err_plan is not None and not err_plan.empty) else None
            mae_our = float(err_our.abs().mean()) if (err_our is not None and not err_our.empty) else None
            rmse_our = float(np.sqrt((err_our ** 2).mean())) if (err_our is not None and not err_our.empty) else None

            # Time series of errors
            fig, ax = plt.subplots(figsize=(12, 5))
            if has_plan and err_plan is not None and not err_plan.empty:
                # align err series to timestamp index for plotting vs time
                ax.plot(df_in.loc[err_plan.index, "timestamp"], err_plan, label="Eleringi plaan", color="#1f77b4", linewidth=1.5)
            if has_our and err_our is not None and not err_our.empty:
                ax.plot(df_in.loc[err_our.index, "timestamp"], err_our, label="Meie prognoos", color="#ff7f0e", linewidth=1.5)
            ax.axhline(0, color="gray", linestyle="--", linewidth=1)
            title = f"Viga (prognoos - tegelik) — {label}"
            parts = []
            if mae_our is not None: parts.append(f"MAE(meie)={mae_our:.1f}")
            if mae_plan is not None: parts.append(f"MAE(elering)={mae_plan:.1f}")
            if rmse_our is not None: parts.append(f"RMSE(meie)={rmse_our:.1f}")
            if rmse_plan is not None: parts.append(f"RMSE(elering)={rmse_plan:.1f}")
            if parts:
                title += " (" + ", ".join(parts) + ")"
            ax.set_title(title)
            ax.set_xlabel("Aeg (UTC)")
            ax.set_ylabel("Viga (MWh)")
            ax.legend()
            ax.grid(True, alpha=0.3)
            err_ts_path = plots_dir / f"errors_vs_actual_timeseries_{suffix}.png"
            fig.tight_layout(); fig.savefig(err_ts_path, dpi=150)
            plt.show()
            print(f"Salvestatud: {err_ts_path}")

            # Histogram overlay
            fig, ax = plt.subplots(figsize=(10, 4))
            bins = 40
            # Determine common bin edges for comparability
            vals = []
            if err_plan is not None and not err_plan.empty:
                vals.append(err_plan.values)
            if err_our is not None and not err_our.empty:
                vals.append(err_our.values)
            if vals:
                all_vals = np.concatenate(vals)
                rng = (np.nanmin(all_vals), np.nanmax(all_vals))
            else:
                rng = None
            if err_plan is not None and not err_plan.empty:
                ax.hist(err_plan.dropna(), bins=bins, range=rng, color="#1f77b4", alpha=0.5, label="Eleringi plaan")
            if err_our is not None and not err_our.empty:
                ax.hist(err_our.dropna(), bins=bins, range=rng, color="#ff7f0e", alpha=0.7, label="Meie prognoos")
            ax.set_title(f"Vea jaotus (prognoos - tegelik) — {label}")
            ax.set_xlabel("Viga (MWh)")
            ax.set_ylabel("Sagedus")
            ax.legend()
            ax.grid(True, axis="y", alpha=0.3)
            err_hist_path = plots_dir / f"errors_vs_actual_hist_{suffix}.png"
            fig.tight_layout(); fig.savefig(err_hist_path, dpi=150)
            plt.show()
            print(f"Salvestatud: {err_hist_path}")

            # Print metrics summary
            print({
                "MAE": {"meie": None if mae_our is None else round(mae_our, 2), "elering": None if mae_plan is None else round(mae_plan, 2)},
                "RMSE": {"meie": None if rmse_our is None else round(rmse_our, 2), "elering": None if rmse_plan is None else round(rmse_plan, 2)},
            })

        # Widgets UI or fallback
        def run_with_range(range_key: str):
            filtered, suffix = apply_filter(df, range_key)
            label = {"last7d": "viimased 7 päeva", "last30d": "viimased 30 päeva", "all": "kogu periood"}[suffix]
            make_plots(filtered, suffix, label)

        if USE_WIDGETS:
            range_dd = widgets.Dropdown(
                options=["Last 7 days", "Last 30 days", "All"],
                value="Last 7 days",
                description="Ajavahemik:",
            )
            refresh_btn = widgets.Button(description="Refresh plots", icon="refresh", button_style="")
            out = widgets.Output()

            def on_click(_):
                with out:
                    clear_output(wait=True)
                    run_with_range(range_dd.value)

            refresh_btn.on_click(on_click)
            ui = widgets.HBox([range_dd, refresh_btn])
            display(ui, out)

            # initial draw
            with out:
                clear_output(wait=True)
                run_with_range(range_dd.value)
        else:
            print("ipywidgets puudub. Joonistan vaikimisi viimased 7 päeva. (Võid paigaldada ipywidgets interaktiivsuseks)")
            run_with_range("Last 7 days")

Loetud: scripts\forecast_vs_actual_comparison_hourly.csv (enc=utf-8, sep=';')
{'aeg': 'datetime_hour', 'elering_plan': 'elering_plan', 'meie_prognoos': 'consumption_hourly', 'tegelik': 'Tarbimine'}
Read pärast puhastust: 7


HBox(children=(Dropdown(description='Ajavahemik:', options=('Last 7 days', 'Last 30 days', 'All'), value='Last…

Output()