In [75]:
import config
from src import load_data

import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, SimpleRNN, LSTM, GRU

from tensorflow.keras.metrics import MeanAbsoluteError as mae
from tensorflow.keras.metrics import MeanAbsolutePercentageError as mape

In [76]:
data = load_data(config.PROCESSED_DATA_FILE)

Données chargées avec succès.


In [77]:
features = data.drop(columns=['date', 'Appliances']).values
target = data['Appliances'].values

In [78]:
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

features_scaled = scaler_x.fit_transform(features)
target_scaled = scaler_y.fit_transform(target.reshape(-1, 1))

In [79]:
# Fonction de création de fenêtres temporelles
def create_windows(features, target, window_size):
    X, y = [], []
    for i in range(len(features) - window_size):
        X.append(features[i:i+window_size])
        y.append(target[i+window_size])
    return np.array(X), np.array(y)

In [80]:
print(len(X))

19591


In [81]:
data[data['date'] == "2016-04-27 18:00:00"]

Unnamed: 0,date,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,wednesday,thursday,friday,saturday,sunday,sin_hour,cos_hour,is_holiday,is_school_holiday,season
15414,2016-04-27 18:00:00,360,0,21.133333,34.966667,19.5,35.326667,21.23,34.0,20.29,...,1,0,0,0,0,-1.0,-1.83697e-16,0,0,0


In [82]:
train_size = 15415
print(train_size)

15415


In [83]:
test_size = len(X) - train_size
print(test_size)

4176


In [84]:
print(train_size + test_size)

19591


In [85]:
# Fenêtrage des données
window_size = 144  # Nombre de pas de temps
X, y = create_windows(features_scaled, target_scaled, window_size)

# Division en ensembles d'entraînement, validation et test
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
# X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [86]:
X_train.shape

(15415, 144, 39)

In [87]:
# RMSE (Root Mean Squared Error)
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_true - y_pred), axis=-1))

# MAPE (Mean Absolute Percentage Error)
# def mape(y_true, y_pred):
#     return K.mean(K.abs((y_true - y_pred) / K.clip(y_true, K.epsilon(), None)), axis=-1)

In [88]:
# Modèle RNN
rnn_model = Sequential([
    Input(shape=(window_size, X_train.shape[2])),
    SimpleRNN(50, activation='tanh'),
    Dense(1)
])

rnn_model.compile(optimizer='adam', loss='mse', metrics=[mae, mape, rmse])

In [89]:
# Modèle LSTM
lstm_model = Sequential([
    Input(shape=(window_size, X_train.shape[2])),
    LSTM(50, activation='tanh'),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mse', metrics=[mae, mape, rmse])

In [90]:
# Modèle GRU
gru_model = Sequential([
    Input(shape=(window_size, X_train.shape[2])),
    GRU(50, activation='tanh'),
    Dense(1)
])

gru_model.compile(optimizer='adam', loss='mse', metrics=[mae, mape, rmse])

In [91]:
# Entraîner chaque modèle
batch_size = 32
epochs = 5

history_rnn = rnn_model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs)
history_lstm = lstm_model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs)
history_gru = gru_model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs)

Epoch 1/5
[1m482/482[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 14ms/step - loss: 0.0242 - mean_absolute_error: 0.1005 - mean_absolute_percentage_error: 102849.8750 - rmse: 0.1005
Epoch 2/5
[1m482/482[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - loss: 0.0092 - mean_absolute_error: 0.0586 - mean_absolute_percentage_error: 25380.8379 - rmse: 0.0586
Epoch 3/5
[1m482/482[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - loss: 0.0083 - mean_absolute_error: 0.0560 - mean_absolute_percentage_error: 28779.5645 - rmse: 0.0560
Epoch 4/5
[1m482/482[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 17ms/step - loss: 0.0085 - mean_absolute_error: 0.0561 - mean_absolute_percentage_error: 25529.6230 - rmse: 0.0561
Epoch 5/5
[1m482/482[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 17ms/step - loss: 0.0085 - mean_absolute_error: 0.0562 - mean_absolute_percentage_error: 36483.1445 - rmse: 0.0562
Epoch 1/5
[1m482/482[0m [32m━━━━━━━━━━━━━

In [108]:
# Évaluation des modèles
# rnn_loss, rnn_mae = rnn_model.evaluate(X_test, y_test)
# lstm_loss, lstm_mae = lstm_model.evaluate(X_test, y_test)
# gru_loss, gru_mae = gru_model.evaluate(X_test, y_test)

# print(f"RNN Loss: {rnn_loss}, MAE: {rnn_mae}")
# print(f"LSTM Loss: {lstm_loss}, MAE: {lstm_mae}")
# print(f"GRU Loss: {gru_loss}, MAE: {gru_mae}")

# Prédictions séquentielles
def predict_sequential(model, X_init, days_to_predict):
    predictions = []
    input_seq = X_init.copy()  # Commence avec la dernière fenêtre de l'entraînement
    for _ in range(days_to_predict):
        # Prédire un jour complet (144 points)
        print(input_seq.shape)
        # pred_day = model.predict(input_seq[np.newaxis, :, :])[0]
        pred_day = model.predict(input_seq)
        # print(pred_day.shape)
        predictions.append(pred_day)
        
        # Met à jour la séquence en décalant et ajoutant les 144 prédictions
        input_seq = np.roll(input_seq, -144, axis=0)  # Décale la séquence de 144 points
        input_seq[-144:] = pred_day  # Remplace les derniers points par les prédictions
    return np.array(predictions)

# Exemple de prédiction pour 100 pas de temps
initial_input = X_train[-1]
predictions = predict_sequential(model=rnn_model, X_init=initial_input, days_to_predict=1)

(144, 39)


ValueError: Exception encountered when calling Sequential.call().

[1mCannot take the length of shape with unknown rank.[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=<unknown>, dtype=float32)
  • training=False
  • mask=None

In [100]:
len(predictions)

10

In [103]:
min(y_test)
max(np.abs(y_test - y_p)

array([0.00934579])