In [None]:
import numpy as np
import pandas as pd 
from datetime import datetime
from prophet import Prophet 
from prophet.plot import plot_plotly, plot_components_plotly

In [None]:
df = pd.read_csv('data/comptages-routiers-permanents.csv', sep=';')
df.dropna(inplace=True)
df.columns

In [None]:
def preprocessing(df):
    out = pd.DataFrame.from_dict({'Date et heure de comptage':[],'Identifiant arc':[],\
                                    "Taux d'occupation": [], 'Débit horaire': [],\
                                    "Identifiant noeud amont":[],\
                                    "Identifiant noeud aval":[]})
    for arc in df['Identifiant arc'].unique():
        frame = df[df['Identifiant arc']==arc][['Date et heure de comptage','Débit horaire', "Taux d'occupation", "Identifiant noeud amont", "Identifiant noeud aval"]]
        frame.sort_values(by='Date et heure de comptage', inplace=True)
        if '00:00:00' not in frame['Date et heure de comptage'].iloc[0]:
            row_0 = frame.iloc[0,:]
            frame = pd.concat([frame, pd.DataFrame.from_dict({'Date et heure de comptage':[pd.Timestamp('{} 00:00:00+00:00'.format(row_0[0][:10]))],\
                                                            "Taux d'occupation": [row_0[2]], 'Débit horaire': [row_0[1]],\
                                                            "Identifiant noeud amont":[row_0[3]],\
                                                            "Identifiant noeud aval":[row_0[4]]})], ignore_index=True)
        frame['Date et heure de comptage'] = pd.to_datetime(frame['Date et heure de comptage'], utc=True)
        frame.sort_values(by='Date et heure de comptage', inplace=True)
        frame = frame.set_index('Date et heure de comptage').resample('H').ffill().reset_index()
        frame['Identifiant arc'] = [arc]*len(frame)
        out = pd.concat([out, frame])
    out.reset_index(inplace=True)

    return out


# frame.rename(columns={'Date et heure de comptage':'ds', "Taux d'occupation":'y'}, inplace=True)
# frame['ds'] = frame['ds'].dt.tz_localize(None)

In [None]:
frame = preprocessing(df)

In [None]:
# Initialize model
m = Prophet(interval_width=0.95, weekly_seasonality=True, daily_seasonality=True)
# Add monthly seasonality to the model
# m.add_seasonality(name='monthly', period=30.5, fourier_order=5, prior_scale=0.02)
# Fit the model with training data and make prediction

m.fit(frame)
future = m.make_future_dataframe(periods=114)
forecast = m.predict(future)
fig = m.plot_components(forecast)

In [None]:
from tensorflow.keras.layers import SimpleRNN, LSTM, Dense, BatchNormalization, Dropout
from tensorflow.keras import Sequential
from tensorflow.keras.losses import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt


# Create sequence data
def sequences(frame):
    X = []
    y = []
    for i in range((len(frame)//24)-1):

        X.append(frame.iloc[i*24:(i+1)*24,2:])
        y.append(frame.iloc[(i+1)*24:(i+2)*24,2])
    return np.array(X), np.array(y)

X, y = sequences(frame)
x_train, x_test, y_train, y_test = train_test_split(X, y , test_size=0.2)


# Define the model 
model = Sequential()
model.add(LSTM(units=50, input_shape= (24, 5), return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(SimpleRNN(units=10))
model.add(Dropout(0.2))
model.add(Dense(units=100))
model.add(Dropout(0.2))
model.add(Dense(units=24))

model.compile(optimizer='nadam', loss='mse')

early_stopping = EarlyStopping(
    monitor='val_loss',  # Suivre la perte sur l'ensemble de validation
    patience=200,         # Arrêter si aucune amélioration pendant 10 epochs
    restore_best_weights=True  # Restaure les poids du meilleur modèle
) 
history = model.fit(x_train, y_train, epochs=1000, batch_size=32, validation_data=(x_test, y_test), callbacks=early_stopping)

y_pred = model.predict(x_test)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f'Root Mean Squared Error (RMSE): {np.mean(rmse)}')

# Optional: Plot the loss curve
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

In [None]:
y_test[0]-y_pred[0]
# # array([202.10742188, 201.94824219, 202.00097656, 201.98681641,
#        201.86962891, 201.63769531, 201.55957031, 201.90234375,
#        201.91748047, 202.37451172, 202.08935547, 201.55175781,
#        201.81933594, 202.12646484, 201.62792969, 201.65869141,
#        202.93066406, 202.80322266, 203.04833984, 202.58984375,
#        202.88183594, 202.99560547, 202.87939453, 202.70166016])