In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly_express as px

from sktime.transformations.series.clear_sky import ClearSky

In [None]:
# downloaded two years of national estimates from here
# https://www.solar.sheffield.ac.uk/pvlive/
# there's also an API
df = (
    pd.read_csv(
        "../../../../../Downloads/pvlive/PV_Live Historical Results 2021.csv",
        index_col=["gsp_id", "datetime_gmt"],
        parse_dates=["datetime_gmt"],
    )
    .droplevel(0)
    .sort_index()
)
df = df.asfreq("30T")
df["yday"] = df.index.dayofyear
df["tod"] = df.index.hour + df.index.minute / 60 + df.index.second / 60
df["generation_pu"] = df["generation_mw"] / df["capacity_mwp"]

df.info()

In [None]:
# takes ~1-2m
cs_model = ClearSky()
cs_model.fit(df["generation_pu"])

In [None]:
fig = go.Figure(
    data=go.Heatmap(
        x=cs_model.clearskypower.index.get_level_values(0),
        y=cs_model.clearskypower.index.get_level_values(1),
        z=cs_model.clearskypower,
        type="heatmap",
        colorscale="Viridis",
    )
)
fig.show()

In [None]:
cs_model = ClearSky()
cs_model.fit(df.loc[df["yday"] < 100, "generation_pu"])

fig = go.Figure(
    data=go.Heatmap(
        x=cs_model.clearskypower.index.get_level_values(0),
        y=cs_model.clearskypower.index.get_level_values(1),
        z=cs_model.clearskypower,
        type="heatmap",
        colorscale="Viridis",
    )
)
fig.show()

In [None]:
cs_model = ClearSky()
cs_model.fit(df.loc[df["yday"] < 200, "generation_pu"])

fig = go.Figure(
    data=go.Heatmap(
        x=cs_model.clearskypower.index.get_level_values(0),
        y=cs_model.clearskypower.index.get_level_values(1),
        z=cs_model.clearskypower,
        type="heatmap",
        colorscale="Viridis",
    )
)
fig.show()

In [None]:
cs_model = ClearSky()
cs_model.fit(df.loc[df["yday"] < 300, "generation_pu"])

fig = go.Figure(
    data=go.Heatmap(
        x=cs_model.clearskypower.index.get_level_values(0),
        y=cs_model.clearskypower.index.get_level_values(1),
        z=cs_model.clearskypower,
        type="heatmap",
        colorscale="Viridis",
    )
)
fig.show()

In [None]:
cs_model = ClearSky()
cs_model.fit(df.loc[df["yday"] < 350, "generation_pu"])

fig = go.Figure(
    data=go.Heatmap(
        x=cs_model.clearskypower.index.get_level_values(0),
        y=cs_model.clearskypower.index.get_level_values(1),
        z=cs_model.clearskypower,
        type="heatmap",
        colorscale="Viridis",
    )
)
fig.show()

In [None]:
cs_model = ClearSky(min_thresh=0.15)
# cs_model = ClearSky()
cs_model.fit(df["generation_pu"])
cs_model.clearskypower

In [None]:
fig = px.line(
    data_frame=cs_model.clearskypower.rename("value").reset_index(),
    x="tod",
    y="value",
    line_group="yday",
    title="Daily trajectories of clear sky power",
)

fig.update_yaxes(matches=None)
fig.update_traces(line={"color": "grey"}, opacity=0.2)
fig.update_layout(height=550, width=1500)
fig.show()

In [None]:
df["generation_csi"] = cs_model.transform(df["generation_pu"])
df["generation_inv"] = cs_model.inverse_transform(df["generation_csi"])

In [None]:
if cs_model.min_thresh is None:
    from pandas.testing import assert_series_equal

    assert_series_equal(
        df["generation_pu"], df["generation_inv"].rename("generation_pu")
    )

df[df["generation_pu"] != df["generation_inv"]][["generation_pu", "generation_inv"]]

In [None]:
df_plot = df[
    [
        "generation_csi",
        "generation_pu",
        "generation_mw",
        "generation_inv",
        "tod",
        "yday",
    ]
].melt(id_vars=["tod", "yday"], ignore_index=False)

fig = px.line(
    data_frame=df_plot,
    x="tod",
    y="value",
    line_group="yday",
    facet_col="variable",
    title="Daily trajectories of solar generation",
)

fig.update_yaxes(matches=None)
fig.update_traces(line={"color": "grey"}, opacity=0.2)
fig.update_layout(height=550, width=1500)
fig.show()

In [None]:
df_plot = df[
    [
        "generation_csi",
        "generation_pu",
        "generation_mw",
        "generation_inv",
        "tod",
        "yday",
    ]
].melt(id_vars=["tod", "yday"], ignore_index=False)

fig = px.box(
    data_frame=df_plot.loc[df_plot["value"] > 0],
    x="tod",
    y="value",
    facet_col="variable",
    title="boxplots showing the changing distribution of solar generation > 0",
)

fig.update_yaxes(matches=None)
fig.update_layout(height=550, width=1500)
fig.show()

In [None]:
# why is there multivariate tests going on here?
from sktime.utils.estimator_checks import check_estimator

check_estimator(ClearSky)