# Pycaret

In [1]:
# Importe as bibliotecas necessárias
import pandas as pd
import holidays
import plotly.express as px
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from pycaret.time_series import *

# # Carregue os dados
# data = pd.read_csv('../TCC/datasets/forecast_dap.csv')
# data['Timestamp'] = pd.to_datetime(data['Timestamp'])
# data = data.set_index('Timestamp').asfreq('H')

# # Defina a coluna alvo
# target = 'Day Ahead Price'

# Carregue os dados
data = pd.read_csv('../TCC/datasets/demanda.csv')
data['Timestamp'] = pd.to_datetime(data['Timestamp'])
data = data.set_index('Timestamp').asfreq('H')

# Defina a coluna alvo
target = 'kVA fornecido'

# Trabalhando com a coluna alvo
data = data[[target]]

# Criando colunas de numéricas
data['Hour'] = data.index.hour
data['Day'] = data.index.day
data['Dayofweek'] = data.index.dayofweek
data['Month'] = data.index.month
data['Quarter'] = data.index.quarter
data['Year'] = data.index.year
br_holidays = holidays.Brazil(years=data.index.year.unique())
data['Holiday'] = data.index.map(lambda x: x in br_holidays).astype(int)

# Lista de colunas numéricas
num_cols = data.columns.drop(target).tolist()

# Movendo os últimos 24 dados (1 dia) para serem usados como dados de validação da previsão
df_forecast = data.iloc[-24:]
data = data.iloc[:-24]

In [2]:
# Apresentando os dados
display(data)

Unnamed: 0_level_0,kVA fornecido,Hour,Day,Dayofweek,Month,Quarter,Year,Holiday
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-01 00:00:00,2384.702608,0,1,5,1,1,2022,1
2022-01-01 01:00:00,2448.460757,1,1,5,1,1,2022,1
2022-01-01 02:00:00,2650.551481,2,1,5,1,1,2022,1
2022-01-01 03:00:00,2607.329616,3,1,5,1,1,2022,1
2022-01-01 04:00:00,2560.792438,4,1,5,1,1,2022,1
...,...,...,...,...,...,...,...,...
2023-02-27 20:00:00,1967.954857,20,27,0,2,1,2023,0
2023-02-27 21:00:00,2067.398727,21,27,0,2,1,2023,0
2023-02-27 22:00:00,2111.336125,22,27,0,2,1,2023,0
2023-02-27 23:00:00,2077.196880,23,27,0,2,1,2023,0


In [3]:
# Inicialize o ambiente do PyCaret
s = setup(data, 
          fh=24,
          target=target,
          session_id=123, 
          fold_strategy='expanding', 
          fold=5,
          seasonal_period=24,
          seasonality_type='auto',
          use_gpu=True
          )

Unnamed: 0,Description,Value
0,session_id,123
1,Target,kVA fornecido
2,Approach,Univariate
3,Exogenous Variables,Present
4,Original data shape,"(10153, 8)"
5,Transformed data shape,"(10153, 8)"
6,Transformed train set shape,"(10129, 8)"
7,Transformed test set shape,"(24, 8)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: Intel(R) UHD Graphics 630, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Start training from score 0.500000
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] Using GPU Device: Intel(R) UHD Graphics 630, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 16 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Start training from score 0.500000
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] 

In [4]:
# Compare modelos
best = compare_models(fold=3, sort='RMSE', n_select=1, exclude='auto_arima')

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,0.4199,0.3594,255.0933,346.6075,0.5646,0.2204,0.141,1.96
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.4376,0.3617,265.8692,348.8345,0.5914,0.2264,0.1977,1.2333
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.4377,0.3618,265.906,348.886,0.5914,0.2264,0.1975,1.2133
xgboost_cds_dt,Extreme Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.463,0.371,281.327,357.7731,0.7415,0.2533,0.1857,1.9
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.4957,0.3855,301.2296,371.7961,0.6898,0.2502,0.0168,5.4733
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.4789,0.3916,290.9622,377.6317,0.6508,0.2406,0.0709,5.8333
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,0.4789,0.3916,290.9622,377.6317,0.6508,0.2406,0.0709,1.17
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.5437,0.421,330.3015,405.9242,0.6356,0.258,0.0457,13.82
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,0.5576,0.4278,338.6642,412.4798,0.4753,0.3938,0.0805,1.5633
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.5278,0.4281,320.6648,412.8734,0.7207,0.2571,-0.091,6.5233


In [5]:
# Tunando o modelo
tuned_best = tune_model(best, fold=5, optimize='RMSE', choose_better=True, n_iter=100)

Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,2023-02-22 00:00,0.6728,0.6268,410.077,606.1609,0.2207,0.2874,-9.9216
1,2023-02-23 00:00,0.5392,0.4505,328.3294,435.292,0.1669,0.1749,-0.3001
2,2023-02-24 00:00,0.3269,0.25,198.7807,241.304,0.1309,0.1234,0.5023
3,2023-02-25 00:00,0.5268,0.4093,320.0159,394.7009,0.2756,0.2163,-0.1186
4,2023-02-26 00:00,0.6388,0.4914,387.752,473.4252,2.2937,0.4588,0.377
Mean,NaT,0.5409,0.4456,328.991,430.1766,0.6176,0.2522,-1.8922
SD,NaT,0.1208,0.1221,73.5711,118.1511,0.8395,0.1164,4.0258


Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:   30.2s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 426 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  3.1min finished


In [6]:
# Treine o modelo selecionado
model = finalize_model(tuned_best)

In [7]:
# Gerando dados de previsão para 24 horas no futuro
future_data = df_forecast.drop(target, axis=1)

In [8]:
# Faça previsões para os próximos 24 passos de tempo
future = 24
forecast = predict_model(model, X=future_data, fh=future)

In [9]:
# Insert 'y_pred' column into df_forecast
df_forecast['y_pred'] = forecast['y_pred'].values

In [10]:
#plot using plotly
fig = px.line(df_forecast, x=df_forecast.index, y=[target, 'y_pred'], 
              title='Forecast for next 24 hours')
fig.show()