# Projet Datascientest - Compagnon immo

<code>mar25_bds_compagnon_immo_1</code>

---

## Modélisation évolution des prix - v5.0

---

### Recherche optimisation R2

In [23]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import RobustScaler, OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Bidirectional, GRU, LSTM, LayerNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Conv1D,  Layer
from tensorflow.keras.layers import Activation

### Chargement des données nettoyées

In [2]:
output_path = '../data/processed/dep_75_cleaned.csv.gz'
df_dep75 = pd.read_csv(output_path, low_memory=False, index_col='date_mutation', parse_dates=True)

#### Ajout du taux d'inflation annuel, taux livret A et taux moyen bancaire

In [3]:
# Source INSEE
df_inflation = pd.read_csv('../data/raw/inflation-2020-2024.csv', index_col=0)

df_inflation = df_inflation.drop('mois',axis=1)
df_inflation.rename(columns={'index': 'mois'}, inplace=True)
df_inflation.columns = df_inflation.columns.astype(int)
df_inflation["mois"] = df_inflation.index.astype(int)

def get_inflation(row):
    mois = row['mois']
    annee = row['annee']
    try:
        return df_inflation.loc[mois, annee]
    except KeyError:
        return np.nan

df_dep75['taux_inflation'] = df_dep75.apply(get_inflation, axis=1)

# Source Banque de France
taux_livret_a = {
     2020: 0.50,
     2021: 0.50,
     2022: 1.38,
     2023: 2.50,
     2024: 3.00,
}
taux_moyen_bancaire = {
     2020: 0.48,
     2021: 0.47,
     2022: 0.78,
     2023: 1.37,
     2024: 1.80,
}
df_dep75["taux_livret_a"] = df_dep75["annee"].map(taux_livret_a)
df_dep75["taux_moyen_bancaire"] = df_dep75["annee"].map(taux_moyen_bancaire)

In [4]:
df_dep75 = df_dep75[df_dep75['prix_m2_vente'] < 1_000_000]
df_dep75 = df_dep75.drop(['numero_disposition', 
                          'lot2_numero', 
                          'lot2_surface_carrez',
                          'lot3_numero', 
                          'lot3_surface_carrez',
                          'lot4_numero', 
                          'lot4_surface_carrez', 
                          'lot5_numero', 
                          'lot5_surface_carrez'], axis=1)
df_dep75.sample(5)

Unnamed: 0_level_0,nature_mutation,valeur_fonciere,code_commune,code_departement,lot1_numero,lot1_surface_carrez,nombre_lots,code_type_local,type_local,surface_reelle_bati,...,code_nature_culture_speciale,surface_terrain,longitude,latitude,prix_m2_vente,annee,mois,taux_inflation,taux_livret_a,taux_moyen_bancaire
date_mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-04-10,Vente,515000.0,75120,75,0,0.0,0,2.0,Appartement,24.0,...,NS,145.0,2.39443,48.874568,21458.333333,2024,4,2.2,3.0,1.8
2022-03-25,Vente,14500000.0,75110,75,0,0.0,0,2.0,Appartement,44.0,...,NS,771.0,2.365641,48.880592,329545.454545,2022,3,4.5,1.38,0.78
2021-11-30,Vente,5310000.0,75111,75,0,0.0,0,4.0,Local industriel. commercial ou assimilé,134.0,...,NS,122.0,2.384223,48.851293,39626.865672,2021,11,2.8,0.5,0.47
2024-04-23,Vente,80150000.0,75106,75,0,0.0,0,2.0,Appartement,122.0,...,NS,1532.0,2.32526,48.850175,656967.213115,2024,4,2.2,3.0,1.8
2020-03-17,Adjudication,15400000.0,75103,75,0,0.0,0,4.0,Local industriel. commercial ou assimilé,421.0,...,NS,349.0,2.359087,48.863926,36579.572447,2020,3,0.7,0.5,0.48


### Deep Learning

In [89]:
def display_scores(y_test_seq, y_pred, model_name, scaler_name):
    y_pred = scaler_y.inverse_transform(y_pred)
    y_test_seq = scaler_y.inverse_transform(y_test_seq.reshape(-1, 1))
    r2 = r2_score(y_test_seq, y_pred)
    mae = mean_absolute_error(y_test_seq, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test_seq, y_pred))
    
    scores = pd.DataFrame([{
        'Modèle': model_name,
        'Scaler': scaler_name,
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2
    }])

    display(scores)


#### Callbacks

In [129]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
callbacks = [
    EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, verbose=1),
]

#### Train test Split

In [11]:
df_train = df_dep75[df_dep75['annee'] < 2024]
df_test = df_dep75[df_dep75['annee'] == 2024]

X_train = df_train.drop(['prix_m2_vente'], axis=1)
X_test = df_test.drop(['prix_m2_vente'], axis=1)

y_train = df_train['prix_m2_vente'].values.reshape(-1, 1)
y_test = df_test['prix_m2_vente'].values.reshape(-1, 1)

#### Encodage

In [13]:
one_hot_cols = ['type_local', 'nature_mutation']
ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
ohe.fit(X_train[one_hot_cols])

X_train_ohe = pd.DataFrame(ohe.transform(X_train[one_hot_cols]), 
                           columns=ohe.get_feature_names_out(one_hot_cols),
                           index=X_train.index)

X_test_ohe = pd.DataFrame(ohe.transform(X_test[one_hot_cols]), 
                          columns=ohe.get_feature_names_out(one_hot_cols),
                          index=X_test.index)

X_train = X_train.drop(columns=one_hot_cols)
X_test = X_test.drop(columns=one_hot_cols)

X_train = pd.concat([X_train, X_train_ohe], axis=1)
X_test = pd.concat([X_test, X_test_ohe], axis=1)

freq_cols = ['code_nature_culture', 'code_nature_culture_speciale', 'code_commune', 'code_departement']

for col in freq_cols:
    freq_encoding = X_train[col].value_counts(normalize=True)

    X_train[col] = X_train[col].map(freq_encoding).fillna(0)
    X_test[col] = X_test[col].map(freq_encoding).fillna(0)


X_train['lot1_numero'] = (X_train['lot1_numero'] != 0).astype(int)
X_test['lot1_numero'] = (X_test['lot1_numero'] != 0).astype(int)

print("Variables catégorielles restantes :", len(X_train.select_dtypes('object').columns))

Variables catégorielles restantes : 0


In [15]:
robust_x_scaler = RobustScaler()
X_train_scaled = robust_x_scaler.fit_transform(X_train)
X_test_scaled = robust_x_scaler.transform(X_test)

robust_y_scaler = RobustScaler()
y_train_scaled = robust_y_scaler.fit_transform(y_train)
y_test_scaled = robust_y_scaler.transform(y_test)

In [41]:
from tensorflow.keras.losses import Huber

timesteps = 12  
input_dim = X_train_scaled.shape[1]

inputs = Input(shape=(timesteps, input_dim))

x = Conv1D(filters=128, kernel_size=5, activation="relu")(inputs)
x = Dropout(0.35)(x)
x = LayerNormalization()(x)

x = Bidirectional(GRU(128, activation="tanh", return_sequences=True))(x)
x = Dropout(0.3)(x)
x = LayerNormalization()(x)

x = Bidirectional(LSTM(64, activation="tanh", return_sequences=True))(x)
x = Dropout(0.3)(x)
x = LayerNormalization()(x)

x = GRU(32, activation="tanh", return_sequences=False)(x)
x = Dropout(0.2)(x)

x = Dense(64, activation="swish")(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

outputs = Dense(1)(x)

model_bi_gru_lstm = Model(inputs=inputs, outputs=outputs)

model_bi_gru_lstm.compile(optimizer="RMSprop", loss=Huber())

X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train_scaled, timesteps)
X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test_scaled, timesteps)

history_hybrid = model_bi_gru_lstm.fit(X_train_seq, y_train_seq, epochs=50, batch_size=16, validation_data=(X_test_seq, y_test_seq), callbacks=[callbacks])

y_pred_bi_gru_lstm = model_bi_gru_lstm.predict(X_test_seq)


Epoch 1/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 13ms/step - loss: 0.5084 - val_loss: 0.3452 - learning_rate: 0.0010
Epoch 2/50
[1m 12/694[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6s[0m 10ms/step - loss: 0.3126

  current = self.get_monitor_value(logs)
  callback.on_epoch_end(epoch, logs)


[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - loss: 0.2932 - val_loss: 0.3493 - learning_rate: 0.0010
Epoch 3/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - loss: 0.2647 - val_loss: 0.3290 - learning_rate: 0.0010
Epoch 4/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - loss: 0.2532 - val_loss: 0.2807 - learning_rate: 0.0010
Epoch 5/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - loss: 0.2512 - val_loss: 0.2803 - learning_rate: 0.0010
Epoch 6/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 13ms/step - loss: 0.2414 - val_loss: 0.2906 - learning_rate: 0.0010
Epoch 7/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - loss: 0.2433 - val_loss: 0.2881 - learning_rate: 0.0010
Epoch 8/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - loss: 0.2395 - val_loss: 0.2791 - learning_rate: 0.0

NameError: name 'display_scores' is not defined

In [47]:
scaler_y = robust_y_scaler
display_scores(y_test_seq, y_pred_bi_gru_lstm, 'Bidirectional GRU + LSTM', 'robust')

Unnamed: 0,Modèle,Scaler,MAE,RMSE,R2
0,Bidirectional GRU + LSTM,robust,124336.189735,185566.537182,0.28966


In [17]:
def fit_and_evaluate(model, train_set, test_set, learning_rate, epochs=50):
    opt = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)
    model.compile(loss=tf.keras.losses.Huber(), optimizer=opt, metrics=["mae"])
    history = model.fit(train_set, validation_data=test_set, epochs=epochs,
                        callbacks=[callbacks])
    valid_loss, valid_mae = model.evaluate(test_set)
    return valid_mae * 1e6

def create_sequences(X, y, timesteps):
    X_seq, y_seq = [], []
    for i in range(len(X) - timesteps):
        X_seq.append(X[i:i+timesteps])
        y_seq.append(y[i+timesteps])
    return np.array(X_seq), np.array(y_seq)

In [85]:
X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train_scaled, timesteps=12)
X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test_scaled, timesteps=12)

tf.random.set_seed(42)
model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(1, input_shape=[12, X_train_seq.shape[2]])
])

  super().__init__(**kwargs)


In [91]:
train_set = tf.data.Dataset.from_tensor_slices((X_train_seq, y_train_seq)).batch(16)
test_set = tf.data.Dataset.from_tensor_slices((X_test_seq, y_test_seq)).batch(16)

fit_and_evaluate(model, train_set, test_set, learning_rate=0.02)

Epoch 1/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.3268 - mae: 0.6440 - val_loss: 0.3173 - val_mae: 0.6413 - learning_rate: 0.0200
Epoch 2/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.2990 - mae: 0.6032 - val_loss: 0.3139 - val_mae: 0.6377 - learning_rate: 0.0200
Epoch 3/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.2947 - mae: 0.5988 - val_loss: 0.3178 - val_mae: 0.6367 - learning_rate: 0.0200
Epoch 4/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3057 - mae: 0.6123 - val_loss: 0.3100 - val_mae: 0.6261 - learning_rate: 0.0200
Epoch 5/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.2945 - mae: 0.5923 - val_loss: 0.3147 - val_mae: 0.6335 - learning_rate: 0.0200
Epoch 6/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.2947 - mae: 0.5949 - val

583127.9754638672

In [93]:
y_pred = model.predict(test_set.map(lambda x, y: x))
y_true = test_set.map(lambda x, y: y).unbatch()
y_true = np.array([y.numpy() for y in y_true])

r2 = r2_score(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
results_df = pd.DataFrame([{
    'Modèle': 'RNN',
    'Scaler': 'robust',
    'MAE': mae,
    'RMSE': rmse,
    'R2': r2
}])
results_df

[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


Unnamed: 0,Modèle,Scaler,MAE,RMSE,R2
0,RNN,robust,0.583128,0.872974,0.270143


In [97]:
tf.random.set_seed(42)
deep_model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(32, return_sequences=True, input_shape=[12, X_train_seq.shape[2]]),
    tf.keras.layers.SimpleRNN(32, return_sequences=True),
    tf.keras.layers.SimpleRNN(32),
    tf.keras.layers.Dense(1)
])
fit_and_evaluate(deep_model, train_set, test_set, learning_rate=0.01)

Epoch 1/50


  super().__init__(**kwargs)


[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 0.3623 - mae: 0.6890 - val_loss: 0.3213 - val_mae: 0.6227 - learning_rate: 0.0100
Epoch 2/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.2730 - mae: 0.5658 - val_loss: 0.3089 - val_mae: 0.6068 - learning_rate: 0.0100
Epoch 3/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.2588 - mae: 0.5422 - val_loss: 0.3008 - val_mae: 0.5984 - learning_rate: 0.0100
Epoch 4/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.2532 - mae: 0.5323 - val_loss: 0.3000 - val_mae: 0.5973 - learning_rate: 0.0100
Epoch 5/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.2499 - mae: 0.5282 - val_loss: 0.2997 - val_mae: 0.5967 - learning_rate: 0.0100
Epoch 6/50
[1m694/694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.2471 - mae: 0.5242 - val_loss: 0.29

595975.6374359131

In [99]:
y_pred_dm = deep_model.predict(test_set.map(lambda x, y: x))
y_true_dm = test_set.map(lambda x, y: y).unbatch()
y_true_dm = np.array([y.numpy() for y in y_true_dm])

r2 = r2_score(y_true_dm, y_pred_dm)
mae = mean_absolute_error(y_true_dm, y_pred_dm)
rmse = np.sqrt(mean_squared_error(y_true_dm, y_pred_dm))
results_df = pd.DataFrame([{
    'Modèle': 'RNN',
    'Scaler': 'robust',
    'MAE': mae,
    'RMSE': rmse,
    'R2': r2
}])
results_df

[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


Unnamed: 0,Modèle,Scaler,MAE,RMSE,R2
0,RNN,robust,0.595976,0.880505,0.257495


---

In [143]:
from tensorflow.keras.optimizers import AdamW

timesteps = 12

X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train_scaled, timesteps=timesteps)
X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test_scaled, timesteps=timesteps)

train_set = tf.data.Dataset.from_tensor_slices((X_train_seq, y_train_seq)).batch(16)
test_set = tf.data.Dataset.from_tensor_slices((X_test_seq, y_test_seq)).batch(16)

model_rnn = Sequential([
    Input(shape=(timesteps, X_train_scaled.shape[1])),
    
    Bidirectional(LSTM(128, return_sequences=True, activation="tanh")),  # Capture les tendances avant/arrière
    Dropout(0.2),

    Bidirectional(GRU(64, return_sequences=True, activation="tanh")),  # GRU bidirectionnel
    Dropout(0.2),
    
    GRU(32, return_sequences=False, activation="tanh"),
    Dropout(0.2),

    Dense(16, activation="relu"),
    Dense(1)
])

model_rnn.compile(
    loss="mse",
    optimizer=AdamW(learning_rate=0.001, weight_decay=1e-4),
    metrics=["mae"]
)

history = model_rnn.fit(X_train_seq, y_train_seq, epochs=10, batch_size=32, validation_data=(X_test_seq, y_test_seq), callbacks=[callbacks])
y_pred_rnn = model_rnn.predict(X_test_seq)

Epoch 1/10
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 18ms/step - loss: 0.6835 - mae: 0.5825 - val_loss: 0.6695 - val_mae: 0.5760 - learning_rate: 0.0010
Epoch 2/10
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - loss: 0.6177 - mae: 0.5390 - val_loss: 0.7118 - val_mae: 0.5983 - learning_rate: 0.0010
Epoch 3/10
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - loss: 0.5835 - mae: 0.5177 - val_loss: 0.6557 - val_mae: 0.5869 - learning_rate: 0.0010
Epoch 4/10
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - loss: 0.6000 - mae: 0.5222 - val_loss: 0.7320 - val_mae: 0.5649 - learning_rate: 0.0010
Epoch 5/10
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - loss: 0.5683 - mae: 0.5039 - val_loss: 0.7246 - val_mae: 0.5572 - learning_rate: 0.0010
Epoch 6/10
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - loss: 0.5767 - mae: 0.508

In [145]:
scaler_y = robust_y_scaler
display_scores(y_test_seq, y_pred_rnn, 'RNN', 'robust')

Unnamed: 0,Modèle,Scaler,MAE,RMSE,R2
0,RNN,robust,126459.016422,174482.246974,0.371986


In [159]:
from tensorflow.keras.layers import Add, GlobalAveragePooling1D

input_layer = Input(shape=(timesteps, X_train_scaled.shape[1]))
x = Bidirectional(LSTM(128, return_sequences=True, activation="tanh"))(input_layer)
x = GRU(64, return_sequences=True, activation="tanh")(x)

residual = x  
x = LSTM(32, return_sequences=True, activation="tanh")(x)

residual = GlobalAveragePooling1D()(residual)
x = GlobalAveragePooling1D()(x)

residual = Dense(32, activation="tanh")(residual)
x = Add()([x, residual]) 

output_layer = Dense(1)(x)

model_rnn = Model(inputs=input_layer, outputs=output_layer)

model_rnn.compile(
    loss="mse",
    optimizer=tf.keras.optimizers.AdamW(learning_rate=0.001, weight_decay=1e-4),
    metrics=["mae"]
)

history = model_rnn.fit(X_train_seq, y_train_seq, epochs=50, batch_size=32, validation_data=(X_test_seq, y_test_seq), callbacks=[callbacks])
y_pred_rnn = model_rnn.predict(X_test_seq)

Epoch 1/50
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - loss: 0.7217 - mae: 0.5991 - val_loss: 0.7307 - val_mae: 0.5645 - learning_rate: 0.0010
Epoch 2/50
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 0.5987 - mae: 0.5309 - val_loss: 0.7215 - val_mae: 0.6203 - learning_rate: 0.0010
Epoch 3/50
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 0.6025 - mae: 0.5262 - val_loss: 0.7351 - val_mae: 0.5718 - learning_rate: 0.0010
Epoch 4/50
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - loss: 0.6085 - mae: 0.5276 - val_loss: 0.6619 - val_mae: 0.5823 - learning_rate: 0.0010
Epoch 5/50
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 0.5717 - mae: 0.5072 - val_loss: 0.6830 - val_mae: 0.5605 - learning_rate: 0.0010
Epoch 6/50
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 0.5686 - mae: 0.505

In [163]:
scaler_y = robust_y_scaler
display_scores(y_test_seq, y_pred_rnn, 'RNN + residu', 'robust')

Unnamed: 0,Modèle,Scaler,MAE,RMSE,R2
0,RNN,robust,125476.100855,175297.990699,0.3661


In [173]:
!pip install keras-tcn

In [169]:
from tensorflow.keras.optimizers import Adam
from tcn import TCN

timesteps = 12
features = X_train_scaled.shape[1]

input_layer = Input(shape=(timesteps, features))
x = TCN(nb_filters=64, kernel_size=3, dilations=[1, 2, 4, 8], activation="relu")(input_layer)
x = Dropout(0.2)(x)
x = Dense(32, activation="relu")(x)
output_layer = Dense(1)(x)

model_tcn = Model(inputs=input_layer, outputs=output_layer)
model_tcn.compile(
    loss="mse",
    optimizer=Adam(learning_rate=0.001),
    metrics=["mae"]
)

model_tcn.summary()

history = model_tcn.fit(
    X_train_seq, y_train_seq, 
    epochs=50, batch_size=32, 
    validation_data=(X_test_seq, y_test_seq), 
    callbacks=[callbacks]
)

y_pred_tcn = model_tcn.predict(X_test_seq)




Epoch 1/50
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 11ms/step - loss: 5.1346 - mae: 1.1647 - val_loss: 1.2270 - val_mae: 0.7445 - learning_rate: 0.0010
Epoch 2/50
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 1.1246 - mae: 0.7113 - val_loss: 0.8600 - val_mae: 0.7225 - learning_rate: 0.0010
Epoch 3/50
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.8795 - mae: 0.6423 - val_loss: 0.7836 - val_mae: 0.6858 - learning_rate: 0.0010
Epoch 4/50
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.7602 - mae: 0.6162 - val_loss: 0.7763 - val_mae: 0.6786 - learning_rate: 0.0010
Epoch 5/50
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.9242 - mae: 0.6183 - val_loss: 0.7042 - val_mae: 0.6180 - learning_rate: 0.0010
Epoch 6/50
[1m347/347[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 0.8489 - mae: 0.6049 - va

In [175]:
scaler_y = robust_y_scaler
display_scores(y_test_seq, y_pred_tcn, 'TCN', 'robust')

Unnamed: 0,Modèle,Scaler,MAE,RMSE,R2
0,TCN,robust,119980.147568,175992.790121,0.361065
