In [1]:
# Set the current working directory to the root of the repo (`CWD` is set in the Makefile)
import os

os.chdir(os.environ["CWD"])
%pwd

'/home/simon/code/pv-site-production/pv-site'

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import datetime as dt

import sqlalchemy as sa
from sqlalchemy import select
from sqlalchemy.orm import sessionmaker
import pandas as pd

import altair as alt

alt.data_transformers.enable("default", max_rows=None)

import pvsite_datamodel.sqlmodels as models
from pv_site._db_helpers import rows_to_df as _rows_to_df, get_forecasts, get_generation

In [4]:
# This is the host given my ssh tunnel. The password is set in a .pgpass file.
HOST = "postgresql://main@localhost:9997/pvsitedevelopment"

START_UTC = dt.datetime.utcnow() - dt.timedelta(days=1)
END_UTC = dt.datetime.utcnow() + dt.timedelta(hours=36)
HORIZON_MINUTES = 2 * 60  # 60 * 1

In [5]:
# `future=True` to try out sqlalchemy's 2.0 syntax (that also work in 1.4)
engine = sa.create_engine(HOST, future=True)
Session = sessionmaker(engine)

In [6]:
# How many sites in the DB
with Session() as session:
    num_sites = session.scalars(select(sa.func.count()).select_from(models.SiteSQL)).one()
print(num_sites)

739


In [7]:
# Find sites with some predictions
with Session() as session:
    site_uuids = session.scalars(select(models.ForecastSQL.site_uuid).distinct().order_by()).all()
print(len(site_uuids))

668


In [15]:
offset = 0
limit = 100
site_uuids = site_uuids[offset : offset + limit]
# site_uuids

In [16]:
def rows_to_df(rows):
    df = _rows_to_df(rows)
    for col in df.columns:
        if col.endswith("_power_kw"):
            df["power_kw"] = df[col]
            del df[col]
    return df

In [17]:
# Get the recent generation for the selected sites
with Session() as session:
    rows = get_generation(session, site_uuids=site_uuids, start_utc=START_UTC, end_utc=END_UTC)

    df_generation = rows_to_df(rows)
df_generation.head()

Unnamed: 0,site_uuid,start_utc,end_utc,power_kw
0,013ce6ee-5b75-4f76-ab0e-ef788eb237e9,2023-04-17 01:10:00,2023-04-17 01:15:00,0.0
1,013ce6ee-5b75-4f76-ab0e-ef788eb237e9,2023-04-17 01:15:00,2023-04-17 01:20:00,0.0
2,013ce6ee-5b75-4f76-ab0e-ef788eb237e9,2023-04-17 01:20:00,2023-04-17 01:25:00,0.0
3,013ce6ee-5b75-4f76-ab0e-ef788eb237e9,2023-04-17 01:25:00,2023-04-17 01:30:00,0.0
4,013ce6ee-5b75-4f76-ab0e-ef788eb237e9,2023-04-17 01:30:00,2023-04-17 01:35:00,0.0


In [18]:
# Get the corresponding forecasts
# This query still takes a few seconds to run.
# TODO: understand why and make it faster.
with Session() as session:
    rows = get_forecasts(
        session,
        site_uuids=site_uuids,
        horizon_minutes=HORIZON_MINUTES,
        start_utc=START_UTC,
        end_utc=END_UTC,
    )
    df_forecast = rows_to_df(rows)
df_forecast.head()

Unnamed: 0,site_uuid,start_utc,end_utc,power_kw
0,627af66c-637a-48c8-a21e-d659c853c5e2,2023-04-17 02:00:00,2023-04-17 02:15:00,0.0
1,bdf532da-6fed-4a08-8e0a-bec360c5043e,2023-04-17 02:00:00,2023-04-17 02:15:00,0.0
2,c34e6f81-be12-461c-a3fc-c81e842e56d0,2023-04-17 02:00:00,2023-04-17 02:15:00,0.0
3,f2b70ebc-fca8-466e-a051-bbb89c42ceab,2023-04-17 02:00:00,2023-04-17 02:15:00,0.0
4,2b724227-f316-49cc-938a-c66e079b4001,2023-04-17 02:15:00,2023-04-17 02:30:00,0.0


In [19]:
df_generation["which"] = "generation"
df_forecast["which"] = "forecast"
df = pd.concat([df_forecast, df_generation])

In [20]:
df.head()

Unnamed: 0,site_uuid,start_utc,end_utc,power_kw,which
0,627af66c-637a-48c8-a21e-d659c853c5e2,2023-04-17 02:00:00,2023-04-17 02:15:00,0.0,forecast
1,bdf532da-6fed-4a08-8e0a-bec360c5043e,2023-04-17 02:00:00,2023-04-17 02:15:00,0.0,forecast
2,c34e6f81-be12-461c-a3fc-c81e842e56d0,2023-04-17 02:00:00,2023-04-17 02:15:00,0.0,forecast
3,f2b70ebc-fca8-466e-a051-bbb89c42ceab,2023-04-17 02:00:00,2023-04-17 02:15:00,0.0,forecast
4,2b724227-f316-49cc-938a-c66e079b4001,2023-04-17 02:15:00,2023-04-17 02:30:00,0.0,forecast


In [21]:
data = df.copy().reset_index()  # [:1000]
ids = data["site_uuid"].unique()[:100]
data = data[data["site_uuid"].isin(ids)]
data = data.sort_values("which", ascending=False)
data["timestamp"] = data["start_utc"] + (data["end_utc"] - data["start_utc"]) / 2
# data = data[data['power_kw'] > 0]
base_generation = (
    alt.Chart()  # data[data['which'] == 'generation'])
    .mark_line(color="black")  # size=10, opacity=0.5, color='black')
    .encode(x="timestamp", y="power_kw")
    .transform_filter(alt.datum.which == "generation")
    .properties(height=100, width=200)
    #     .encode(
    #         x='timestamp',
    #         y='power_kw',
    # facet=alt.Facet('client_site_id', columns=6),
    # color=alt.Color('which', scale=alt.Scale(domain=['forecast', 'generation'], range=['red', 'black']))
)


# )

base_forecast = (
    alt.Chart()  # data[data['which'] == 'forecast'])
    .mark_line(color="orange")  # size=25, opacity=0.5, color='orange')
    .encode(x="timestamp", y="power_kw")
    .transform_filter(alt.datum.which == "forecast")
)

chart = (
    alt.layer(base_generation, base_forecast, data=data)
    .facet("site_uuid", columns=10)
    .resolve_scale(
        y="independent",
    )
)


chart