In [6]:
# potrebne knjiznice
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import datetime

from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters
from tsfresh.utilities.dataframe_functions import roll_time_series

from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_percentage_error

## Uvoz podatkov **SEEDS_TP_A+_A-**

In [2]:
# uvoz podatkov
data_A = pd.read_csv("SEEDS_TP_A+_A-.csv", index_col=0).iloc[1:,:]
data_A = data_A.apply(pd.to_numeric)
data_A.index = pd.to_datetime(data_A.index, format="%d/%m/%Y %H:%M")
data_A.index = pd.date_range(start=data_A.index[0], periods=len(data_A), freq="15T")

# 1 mesec, 2 dni za napoved
df = data_A[(data_A["TP BENCINSKI SERVIS 3 2781_A+_401"].index >= "2023-02-01") &
                (data_A["TP BENCINSKI SERVIS 3 2781_A+_401"].index < "2023-03-03")][["TP BENCINSKI SERVIS 3 2781_A+_401"]]
df["time"] = df.index
df["id"] = 2 # mesec
df.columns = ["moc", "time", "id"]

## Recursive forecast

### tsfresh

$\dots t-4, t-3, t-2, t-1$ za napoved $t+1$

$\dots t-4, t-3, t-2, t-1$ in napovedano $t+1$ za napoved $t+2$

In [3]:
# train/test split
data_train = df[:len(df)-4*24*2]
data_test = df[-4*24*2:]

# target split
y_train = data_train.moc.shift(-1)
y_train.dropna(inplace = True)

# features for training data, max and min window sizes are equal -> 4*24 = 1 day 
data_rolled = roll_time_series(data_train, column_id="id", column_sort="time", max_timeshift=4*24, min_timeshift=4*24)
features_train = extract_features(data_rolled, column_id="id", column_sort="time", column_value="moc")
features_train.index = features_train.index.map(lambda x: x[1])
features_train_selected = features_train.drop(columns=features_train.columns[features_train.isna().sum() != 0]) # one feature has only NAs

# target in features indeksi
y_train = y_train[y_train.index.isin(features_train_selected.index)]
features_train_selected = features_train_selected[features_train_selected.index.isin(y_train.index)]

# feature selection with tsfresh
features_train_selected = select_features(features_train_selected, y_train)

Rolling: 100%|██████████| 20/20 [00:04<00:00,  4.49it/s]
Feature Extraction: 100%|██████████| 20/20 [00:50<00:00,  2.53s/it]


## Decision Tree Regressor

In [None]:
# Recursive forecast for next 2 days

# fit model na 2023-02-02 00:15:00 to 2023-02-28 23:30:00
model = DecisionTreeRegressor(random_state=42).fit(features_train_selected, y_train)

....