# Dependências

In [1]:
!pip install dask



In [2]:
!pip install dask[dataframe]

Collecting dask-expr<1.2,>=1.1 (from dask[dataframe])
  Downloading dask_expr-1.1.15-py3-none-any.whl.metadata (2.5 kB)
INFO: pip is looking at multiple versions of dask-expr to determine which version is compatible with other requirements. This could take a while.
  Downloading dask_expr-1.1.14-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.13-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.12-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.11-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.10-py3-none-any.whl.metadata (2.5 kB)
Downloading dask_expr-1.1.10-py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.2/242.2 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dask-expr
Successfully installed dask-expr-1.1.10


In [3]:
!pip install sktime[all_extras]

Collecting sktime[all_extras]
  Downloading sktime-0.33.1-py3-none-any.whl.metadata (32 kB)
Collecting scikit-base<0.9.0,>=0.6.1 (from sktime[all_extras])
  Downloading scikit_base-0.8.3-py3-none-any.whl.metadata (8.5 kB)
Collecting arch<7.1.0,>=5.6 (from sktime[all_extras])
  Downloading arch-7.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting autots<0.7,>=0.6.1 (from sktime[all_extras])
  Downloading autots-0.6.15-py3-none-any.whl.metadata (10 kB)
Collecting dash!=2.9.0 (from sktime[all_extras])
  Downloading dash-2.18.1-py3-none-any.whl.metadata (10 kB)
Collecting dtaidistance<2.4 (from sktime[all_extras])
  Downloading dtaidistance-2.3.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting dtw-python (from sktime[all_extras])
  Downloading dtw_python-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.1/48.1 kB

In [13]:
import pandas as pd
import numpy as np
from pmdarima import auto_arima
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sktime.forecasting.arima import AutoARIMA
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error

# Carregando Dataset

Essa tabela de produção de eletricidade possui as seguintes colunas:

- **DATE**: Esta coluna contém as datas no formato `MM/DD/AAAA` (mês/dia/ano), indicando o mês e o ano em que os dados de produção de eletricidade foram registrados.

- **IPG2211A2N**: Esta coluna contém valores numéricos que representam a produção de eletricidade nos Estados Unidos em bilhões de kilowatts-hora (kWh) para cada mês especificado na coluna `DATE`.

In [14]:
path = "/content/Electric_Production.csv"
df = pd.read_csv(path)

In [15]:
df.head()

Unnamed: 0,DATE,IPG2211A2N
0,1/1/1985,72.5052
1,2/1/1985,70.672
2,3/1/1985,62.4502
3,4/1/1985,57.4714
4,5/1/1985,55.3151


# Modelo Sktime

## Preparando os dados

In [16]:
df['DATE'] = pd.to_datetime(df['DATE'], format='%m/%d/%Y')

df = df.rename(columns={'IPG2211A2N': 'Electricity_Production'})

df.set_index('DATE', inplace=True)

In [17]:
train_size = int(len(df) * 0.8)
y_train = df['Electricity_Production'][:train_size]
y_test = df['Electricity_Production'][train_size:]

## Treinamento do Modelo

In [18]:
model = auto_arima(y_train, seasonal=True, m=12, suppress_warnings=True)

n_periods = len(y_test)
y_pred, conf_int = model.predict(n_periods=n_periods, return_conf_int=True)

mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse:.4f}")

Mean Squared Error (MSE): 60.7417


# Modelo do LSTM

## Preparando os dados

In [19]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df[['Electricity_Production']])

In [20]:
def create_sequences(data, window_size):
    x = []
    y = []
    for i in range(window_size, len(data)):
        x.append(data[i-window_size:i, 0])
        y.append(data[i, 0])
    return np.array(x), np.array(y)

window_size = 60
x, y = create_sequences(scaled_data, window_size)

In [21]:
train_size = int(len(x) * 0.8)
x_train, y_train = x[:train_size], y[:train_size]
x_test, y_test = x[train_size:], y[train_size:]

x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

## Arquitetura e treinamento

In [22]:
model = Sequential()

model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))

model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))

model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(x_train, y_train, epochs=20, batch_size=32)

  super().__init__(**kwargs)


Epoch 1/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 48ms/step - loss: 0.1826
Epoch 2/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 56ms/step - loss: 0.0354
Epoch 3/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - loss: 0.0193
Epoch 4/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 50ms/step - loss: 0.0173
Epoch 5/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - loss: 0.0144
Epoch 6/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - loss: 0.0150
Epoch 7/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - loss: 0.0130
Epoch 8/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - loss: 0.0134
Epoch 9/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step - loss: 0.0128
Epoch 10/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step - loss: 0.0132
Epoch 11/20
[1m9/9

<keras.src.callbacks.history.History at 0x7f9af033a8c0>

In [23]:
y_pred_lstm = model.predict(x_test)
y_pred_lstm = scaler.inverse_transform(y_pred_lstm)

mse_lstm = mean_squared_error(scaler.inverse_transform(y_test.reshape(-1, 1)), y_pred_lstm)
print(f"Mean Squared Error (LSTM): {mse_lstm:.4f}")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 408ms/step
Mean Squared Error (LSTM): 109.9873


# Comparação dos resultados e justificativa da métrica de erro

A escolha do Mean Squared Error (MSE) como métrica foi devido à sua capacidade de penalizar erros maiores, o que é crucial em previsões de séries temporais (Hyndman & Koehler, 2006). Nos resultados, o MSE do modelo LSTM foi de 109.9873, enquanto o do modelo AutoARIMA do sktime foi de 60.7417. Isso indica que o LSTM não capturou bem os padrões da série, resultando em previsões menos precisas. O modelo AutoARIMA se mostrou mais eficaz neste caso, sugerindo que métodos tradicionais de séries temporais podem ser mais adequados que modelos de aprendizado profundo sem um ajuste mais rigoroso.

### Referência:

Hyndman, R. J., & Koehler, A. B. (2006). "Another look at measures of forecast accuracy." International Journal of Forecasting, 22(4), 679-688. DOI: 10.1016/j.ijforecast.2006.03.001. Acesso em: 7 de jul. 2024.