In [None]:
# Set the current working directory to the root of the repo (`CWD` is set in the Makefile)
import os

os.chdir(os.environ["CWD"])
%pwd

In [None]:
import datetime as dt

import sqlalchemy as sa
from sqlalchemy import select
from sqlalchemy.orm import sessionmaker
import pandas as pd

import altair as alt

alt.data_transformers.enable("default", max_rows=None)

import pvsite_datamodel.sqlmodels as models

from forecast_inference._db_helpers import get_generation, get_forecasts, rows_to_df
from forecast_inference.scripts.live_eval_model import _resample_df

In [None]:
# This is the host given my ssh tunnel. The password is set in a .pgpass file.
HOST = "postgresql://main@localhost:9997/pvsitedevelopment"

START_UTC = dt.datetime.utcnow() - dt.timedelta(days=1)
END_UTC = dt.datetime.utcnow() + dt.timedelta(hours=36)
HORIZON_MINUTES = 2 * 60  # 60 * 1
# Resample both time series (forecast and ground truth to this number of minutes),
# so that we can easily calculate the cost between them
RESAMPLE_MINUTES = 15

In [None]:
# `future=True` to try out sqlalchemy's 2.0 syntax (that also work in 1.4)
engine = sa.create_engine(HOST, future=True)
Session = sessionmaker(engine)

In [None]:
# How many sites in the DB
with Session() as session:
    num_sites = session.scalars(select(sa.func.count()).select_from(models.SiteSQL)).one()
print(num_sites)

In [None]:
# Find sites with some predictions
with Session() as session:
    site_uuids = session.scalars(select(models.ForecastSQL.site_uuid).distinct().order_by()).all()
print(len(site_uuids))

In [None]:
# offset = 0
# limit = 100
# site_uuids = site_uuids[offset : offset + limit]
# site_uuids

In [None]:
# Get the recent generation for the selected sites
with Session() as session:
    rows = get_generation(
        session,
        site_uuids=site_uuids,
        start_utc=START_UTC,
        end_utc=END_UTC,
    )

    df_generation = rows_to_df(rows)
df_generation.head()

In [None]:
# Get the corresponding forecasts (this one takes as few seconds)
with Session() as session:
    rows = get_forecasts(
        session,
        site_uuids=site_uuids,
        horizon_minutes=HORIZON_MINUTES,
        start_utc=START_UTC,
        end_utc=END_UTC,
    )
    df_forecast = rows_to_df(rows)
df_forecast.head()

In [None]:
# Calculate some costs.
# We do the same sort of calculations that we do in the live_eval_model *script*.

KEEP_N_WORSE = 100

df_f = _resample_df(
    df_forecast.set_index(["site_uuid", "start_utc"])[["forecast_power_kw"]], RESAMPLE_MINUTES
)
# display(df_f.head())

df_g = _resample_df(
    df_generation.set_index(["site_uuid", "start_utc"])[["generation_power_kw"]], RESAMPLE_MINUTES
)
# display(df_g.head())
df_g = df_g[df_g["generation_power_kw"] > 0.001]


df = pd.concat([df_g, df_f], axis=1)
df = df.dropna()

abs_diff = abs(df["forecast_power_kw"] - df["generation_power_kw"])
sum_abs_diff = abs_diff.groupby(pd.Grouper(level="site_uuid")).sum()
sum_generation = df["generation_power_kw"].groupby(pd.Grouper(level="site_uuid")).sum()

errors_flat = abs_diff.sum() / df["generation_power_kw"].sum()
print(errors_flat)
print("flat", errors_flat.mean())

error = sum_abs_diff / sum_generation
error = error.sort_values(ascending=True)
print(error.mean())
display(error.to_frame(name="error"))

sorted_ids = list(error.index.unique())[:KEEP_N_WORSE]
data = df.melt(ignore_index=False).reset_index().copy()
data = data[data["site_uuid"].isin(sorted_ids)]

chart = (
    alt.Chart(data)
    .mark_line()
    .encode(
        x="start_utc",
        y="value",
        color="variable",
        facet=alt.Facet("site_uuid", columns=10, spacing=0, sort=sorted_ids),
    )
    .properties(width=130, height=60)
    .resolve_scale(
        y="independent",
    )
)
display(chart)

In [None]:
df_g = df_generation.copy()
df_g["which"] = "generation"
df_g["power_kw"] = df_g["generation_power_kw"]
del df_g["generation_power_kw"]

df_f = df_forecast.copy()
df_f["which"] = "forecast"
df_f["power_kw"] = df_f["forecast_power_kw"]
del df_f["forecast_power_kw"]
df = pd.concat([df_f, df_g])

In [None]:
data = df.copy()  # [:1000]
ids = data["site_uuid"].unique()[:100]
data = data[data["site_uuid"].isin(ids)]
data = data.sort_values("which", ascending=False)
data["timestamp"] = data["start_utc"] + (data["end_utc"] - data["start_utc"]) / 2
# data = data[data['power_kw'] > 0]
base_generation = (
    alt.Chart()  # data[data['which'] == 'generation'])
    .mark_line(color="black")  # size=10, opacity=0.5, color='black')
    .encode(x="timestamp", y="power_kw")
    .transform_filter(alt.datum.which == "generation")
    .properties(height=100, width=200)
    #     .encode(
    #         x='timestamp',
    #         y='power_kw',
    # facet=alt.Facet('client_site_id', columns=6),
    # color=alt.Color('which', scale=alt.Scale(domain=['forecast', 'generation'], range=['red', 'black']))
)


# )

base_forecast = (
    alt.Chart()  # data[data['which'] == 'forecast'])
    .mark_line(color="orange")  # size=25, opacity=0.5, color='orange')
    .encode(x="timestamp", y="power_kw")
    .transform_filter(alt.datum.which == "forecast")
)

chart = (
    alt.layer(base_generation, base_forecast, data=data)
    .facet("site_uuid", columns=10)
    .resolve_scale(
        y="independent",
    )
)


chart