In [1]:
import pickle
from tsflex.chunking import chunk_data
from tsflex.features import FeatureCollection

from typing import List

import pandas as pd
import catboost

### Deserialization

In [2]:
def pickle_load(file_name):
    with open(file_name, "rb") as f:
        return pickle.load(f)

In [3]:
fc_p: FeatureCollection = pickle_load("fc.pkl")
selected_cols_p: List[str] = pickle_load("cols.pkl")
pipe_p: catboost.CatBoostRegressor = pickle_load("pipe.pkl")
sp_p = pickle_load("sp.pkl")


def predict(data: pd.DataFrame) -> pd.DataFrame:
    df_feat = pd.concat(
        [
            sp_p.process(
                fc_p.calculate(
                    c, show_progress=True, return_df=True, approve_sparsity=True
                ),
                return_df=True,
            )
            for c in chunk_data(data=data, chunk_range_margin="10 min")
        ],
        axis=0,
        copy=False,
    )[selected_cols_p].dropna(how="any", axis=0)
    return pd.Series(pipe_p.predict(df_feat), index=df_feat.index).rename("predictions")

In [4]:
X_test = pd.read_parquet("X_test.parquet")
y_test = pd.read_parquet("y_test.parquet")

In [5]:
X_test

Unnamed: 0_level_0,Sub_metering_1,Sub_metering_2,Sub_metering_3,time,Global_active_power,Global_reactive_power
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-02-08 04:28:00,0.0,0.0,0.0,2010-02-08 04:28:00,0.252,0.054
2010-02-08 04:29:00,0.0,0.0,1.0,2010-02-08 04:29:00,0.250,0.054
2010-02-08 04:30:00,0.0,0.0,1.0,2010-02-08 04:30:00,0.250,0.054
2010-02-08 04:31:00,0.0,0.0,1.0,2010-02-08 04:31:00,0.250,0.054
2010-02-08 04:32:00,0.0,0.0,0.0,2010-02-08 04:32:00,0.250,0.054
...,...,...,...,...,...,...
2010-11-26 20:58:00,0.0,0.0,0.0,2010-11-26 20:58:00,0.946,0.000
2010-11-26 20:59:00,0.0,0.0,0.0,2010-11-26 20:59:00,0.944,0.000
2010-11-26 21:00:00,0.0,0.0,0.0,2010-11-26 21:00:00,0.938,0.000
2010-11-26 21:01:00,0.0,0.0,0.0,2010-11-26 21:01:00,0.934,0.000


In [6]:
y_pred = predict(X_test)

100%|██████████| 230/230 [00:56<00:00,  4.10it/s]


In [7]:
df_preds = y_pred.to_frame().join(y_test).dropna(how='any', axis=0)
df_preds

Unnamed: 0_level_0,predictions,GAP_avg15min_shift-18min
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-02-09 04:28:00,1.159084,1.537600
2010-02-09 04:43:00,1.137598,1.382000
2010-02-09 04:58:00,1.029496,1.420800
2010-02-09 05:13:00,1.130508,1.475867
2010-02-09 05:28:00,1.073466,0.618400
...,...,...
2010-11-26 19:28:00,1.580021,1.596933
2010-11-26 19:43:00,1.747098,1.606533
2010-11-26 19:58:00,1.796373,1.299067
2010-11-26 20:13:00,1.866325,0.998267


In [8]:
import plotly.graph_objects as go
from plotly_resampler.downsamplers import LTTB
from plotly_resampler import FigureResampler

fig = FigureResampler(go.Figure(), default_downsampler=LTTB(interleave_gaps=True))

fig.add_trace(go.Scattergl(name="target"), hf_x=y_test.index, hf_y=y_test.iloc[:, 0])
fig.add_trace(go.Scattergl(name="prediction"), hf_x=df_preds.index, hf_y=df_preds.predictions)


fig.update_layout(title="Power consumption predictions", title_x=0.5, height=600)
fig.update_xaxes(title="Time")
fig.update_yaxes(title="AVG power consumption (kW)")
fig.show_dash(mode="external", port=8062)

The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html


Dash app running on http://127.0.0.1:8062/
