In [None]:
# Analysis of the output of experiments

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import datetime
import os
import pathlib

import pandas as pd
import altair as alt
import numpy as np

import plotly.express as px

alt.data_transformers.disable_max_rows()


def _(df, *args, **kwargs):
    print(len(df))
    display(df.head(*args, **kwargs))

In [None]:
# It's always annoying to set the working directory: we use an environment variable defined in the Makefile.
CWD = os.environ.get("CWD")
if CWD:
    os.chdir(CWD)

In [None]:
%pwd

In [None]:
inferred_meta = pd.read_csv("data/meta_inferred.csv").set_index("ss_id")
_(inferred_meta)

In [None]:
def round_to(x, to):
    return round(x / to) * to

In [None]:
ROUND_FUTURE_TO = 4
ROUND_PRED_TO = 30
WIDTH = 65


def error_chart(names):

    dfs = []
    for name in names:
        try:
            df = pd.read_csv(f"exp_results/{name}/errors.csv")
        except FileNotFoundError:
            continue
        df["model"] = name
        dfs.append(df)

    df = pd.concat(dfs)

    _(df)

    df["ts"] = pd.to_datetime(df["ts"])
    df = df[df["metric"] == "mae"]
    df = df.join(inferred_meta[["factor"]], on="pv_id")
    df = df.rename(columns={"factor": "capacity"})
    df["pred_ts"] = df["ts"] + df["future"].map(lambda x: pd.Timedelta(minutes=x))
    df["weighted_error"] = df["capacity"] * df["error"]
    df["future"] = df["future"] / 60.0
    df = df[~df["error"].isnull()]

    df["pred_hour"] = df["pred_ts"].dt.hour * 60 + round_to(
        df["pred_ts"].dt.minute, ROUND_PRED_TO
    )

    df["future"] = round_to(df["future"], ROUND_FUTURE_TO)

    df = (
        df[["model", "future", "weighted_error", "pred_hour"]]
        .groupby(["model", "pred_hour", "future"])
        .mean()
        .reset_index()
    )

    df["pred_hour"] = pd.to_timedelta(df["pred_hour"], unit="minute")
    df["date"] = pd.Timestamp(2023, 1, 1)
    df["pred_hour"] = df["date"] + df["pred_hour"]

    print(f"Mean Error: {df.groupby(['model', 'future'])['weighted_error'].mean()}")

    chart = (
        alt.Chart(df)
        .mark_line()
        .encode(
            x=alt.X("hoursminutes(pred_hour)", title="Time *at prediction*"),
            y=alt.Y("weighted_error", title="Error"),
            color=alt.Color("model", sort=names),
            column=alt.Column("future:O", spacing=1),
        )
        .properties(
            height=WIDTH * 1.5,
            width=WIDTH,
        )
    )

    chart2 = (
        alt.Chart(df)
        .mark_bar()
        .encode(
            x=alt.X("model"),
            y=alt.Y("mean(weighted_error)", title="Error"),
            color=alt.Color("model", sort=names),
            column=alt.Column("future:O"),
        )
        .properties(
            height=125,
            width=30,
        )
    )
    return chart, chart2

In [None]:
names = [
    p.stem for p in pathlib.Path("exp_results").iterdir() if not p.stem.startswith(".")
]
# names = ['10_days', '5_days', '1_days']

c1, c2 = error_chart(names)
display(c1)
display(c2)

In [None]:
df

In [None]:
df = pd.read_csv(f"exp_results/{name}/errors.csv")
df["ts"] = pd.to_datetime(df["ts"])
df = df[df["metric"] == "mae"]
df = df.join(inferred_meta[["factor"]], on="pv_id")
df = df.rename(columns={"factor": "capacity"})
df["pred_ts"] = df["ts"] + df["future"].map(lambda x: pd.Timedelta(minutes=x))
df["weighted_error"] = df["capacity"] * df["error"]
# df["ts_hour"] = df["ts"].dt.hour.astype(str) + ":" + df["ts"].dt.minute.astype(str)
df["future"] = df["future"] / 60.0
df = df[~df["error"].isnull()]
_(df, 20)

In [None]:
late = df[df["pred_ts"].dt.hour > 20]

In [None]:
late

In [None]:
chart = (
    alt.Chart(df.sample(1000))
    .mark_circle(opacity=0.5)
    .encode(x="y", y="pred")
    .properties(width=400, height=400)
) + (
    alt.Chart(pd.DataFrame(dict(x=[0, 300], y=[0, 300])))
    .mark_line(color="black")
    .encode(x="x", y="y")
)
chart