# 04 — Modeling and Forecasting

Objectif : entraîner, comparer et évaluer plusieurs modèles de prévision de la
consommation électrique nationale à partir du dataset enrichi par feature engineering.

Les modèles sont évalués :
- sur des données réelles (baseline)
- sur des données reconstruites (Prophet-filled)
- avec une séparation temporelle stricte


In [14]:
!pip install prophet
from pathlib import Path
import pandas as pd
import numpy as np

from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression, Ridge
from xgboost import XGBRegressor


Collecting prophet
  Downloading prophet-1.2.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.5 kB)
Collecting cmdstanpy>=1.0.4 (from prophet)
  Downloading cmdstanpy-1.3.0-py3-none-any.whl.metadata (4.2 kB)
Collecting holidays<1,>=0.25 (from prophet)
  Downloading holidays-0.87-py3-none-any.whl.metadata (50 kB)
Collecting importlib_resources (from prophet)
  Downloading importlib_resources-6.5.2-py3-none-any.whl.metadata (3.9 kB)
Collecting stanio<2.0.0,>=0.4.0 (from cmdstanpy>=1.0.4->prophet)
  Downloading stanio-0.5.1-py3-none-any.whl.metadata (1.6 kB)
Downloading prophet-1.2.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (12.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.1/12.1 MB[0m [31m39.7 MB/s[0m  [33m0:00:00[0mm0:00:01[0m
[?25hDownloading holidays-0.87-py3-none-any.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m93.3 MB/s[0m  [33m0:00:00[0m
[?25hDownloading cmdstan

## Chargement des datasets de features

Deux jeux sont utilisés :
- dataset de référence (sans interpolation)
- dataset reconstruit par Prophet (contrefactuel)


In [2]:
PROJECT_ROOT = Path("/home/onyxia/france-grid-stress-prediction")
DATA_PROCESSED = PROJECT_ROOT / "data" / "processed"

BASELINE_PATH = DATA_PROCESSED / "dataset_features.parquet"
PROPHET_PATH  = DATA_PROCESSED / "dataset_features_prophetfilled.parquet"

df_base = pd.read_parquet(BASELINE_PATH)
df_prophet = pd.read_parquet(PROPHET_PATH)

df_base.head()


Unnamed: 0,datetime,y,split,temperature_2m,wind_speed_10m,direct_radiation,diffuse_radiation,cloud_cover,hour,dayofweek,...,doy_sin,doy_cos,load_lag_1h,load_lag_24h,load_lag_48h,load_lag_168h,load_roll_mean_24h,load_roll_std_24h,load_roll_mean_168h,load_roll_std_168h
0,2010-01-08 00:00:00,74564.5,train,-2.365344,12.290582,0.0,0.0,67.0625,0,4,...,0.137185,0.990545,73921.5,73233.0,72064.5,52685.0,82903.416667,4661.83838,74125.791667,10498.593411
1,2010-01-08 01:00:00,77065.5,train,-2.537219,12.808883,0.0,0.0,70.78125,1,4,...,0.137185,0.990545,74564.5,75735.5,74674.5,52142.5,82958.895833,4548.289956,74256.026786,10365.896658
2,2010-01-08 02:00:00,82297.0,train,-2.552844,13.657961,0.0,0.0,73.9375,2,4,...,0.137185,0.990545,77065.5,80790.5,79808.5,52081.5,83014.3125,4463.770184,74404.377976,10224.908115
3,2010-01-08 03:00:00,87563.0,train,-2.551281,14.603605,0.0,0.0,77.5625,3,4,...,0.137185,0.990545,82297.0,85729.0,84932.0,52331.5,83077.083333,4441.676382,74584.232143,10094.816596
4,2010-01-08 04:00:00,89394.5,train,-2.530969,15.81296,0.0,0.0,80.96875,4,4,...,0.137185,0.990545,87563.0,86940.0,87177.5,52171.0,83153.5,4504.615445,74793.943452,9995.227806


## Séparation train / validation / test

La séparation temporelle a été définie lors du feature engineering.


In [3]:
TARGET = "y"
META_COLS = ["datetime", "split"]

FEATURES = [c for c in df_base.columns if c not in META_COLS + [TARGET]]

def split_data(df):
    train = df[df["split"] == "train"]
    valid = df[df["split"] == "valid"]
    test  = df[df["split"] == "test"]

    return (
        train[FEATURES], train[TARGET],
        valid[FEATURES], valid[TARGET],
        test[FEATURES],  test[TARGET],
    )


## Fonctions d’évaluation


In [4]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

def evaluate(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return {
        "MAE": mean_absolute_error(y_true, y_pred),
        "RMSE": np.sqrt(mse),
    }


## Baseline naïve — persistance J-1


In [5]:
def naive_forecast(df):
    return df["load_lag_24h"]

results = []


In [6]:
for name, df in [("Baseline", df_base), ("ProphetFilled", df_prophet)]:
    test = df[df["split"] == "test"]
    y_pred = test["load_lag_24h"]
    scores = evaluate(test[TARGET], y_pred)
    scores["model"] = "Naive (lag 24h)"
    scores["dataset"] = name
    results.append(scores)

print(results)


[{'MAE': 2631.883961502997, 'RMSE': np.float64(3856.414907245179), 'model': 'Naive (lag 24h)', 'dataset': 'Baseline'}, {'MAE': 2636.329732461374, 'RMSE': np.float64(3863.7568084372365), 'model': 'Naive (lag 24h)', 'dataset': 'ProphetFilled'}]



## Modèle Naïf résultat

Le modèle naïf, basé sur une persistance à 24 heures, constitue une référence
simple mais informative. Il atteint une erreur absolue moyenne d’environ 2,6 GW,
ce qui correspond à une erreur relative de l’ordre de quelques pourcents.
La performance très proche obtenue sur les données réelles et reconstruites
confirme que ce modèle ne bénéficie pas du remplissage artificiel de l’année 2020
et constitue un plancher robuste pour la comparaison des modèles plus complexes.


## Régression linéaire (baseline interprétable)


In [7]:
def run_linear_models(df, dataset_name):
    Xtr, ytr, Xv, yv, Xt, yt = split_data(df)

    for model, label in [
        (LinearRegression(), "Linear"),
        (Ridge(alpha=1.0), "Ridge"),
    ]:
        model.fit(Xtr, ytr)
        y_pred = model.predict(Xt)

        scores = evaluate(yt, y_pred)
        scores["model"] = label
        scores["dataset"] = dataset_name
        results.append(scores)


In [8]:
run_linear_models(df_base, "Baseline")
run_linear_models(df_prophet, "ProphetFilled")


In [9]:
import pandas as pd

results_df = pd.DataFrame(results)
results_df


Unnamed: 0,MAE,RMSE,model,dataset
0,2631.883962,3856.414907,Naive (lag 24h),Baseline
1,2636.329732,3863.756808,Naive (lag 24h),ProphetFilled
2,1511.442386,1928.734311,Linear,Baseline
3,1511.440392,1928.734931,Ridge,Baseline
4,1519.691016,1939.319966,Linear,ProphetFilled
5,1519.689455,1939.321286,Ridge,ProphetFilled


Les modèles linéaires améliorent très nettement les performances par rapport au
modèle naïf, avec une réduction d’environ 40 % de la MAE et de 50 % de la RMSE.
Ce gain confirme que les variables issues du feature engineering contiennent une
information explicative forte sur la consommation électrique.

Les performances très proches entre la régression linéaire et le modèle Ridge
suggèrent une colinéarité modérée entre les variables, sans impact significatif
sur la stabilité des prédictions. Enfin, l’utilisation de données reconstruites
par Prophet n’apporte pas d’amélioration notable, ce qui confirme que ce
traitement n’est pas déterminant pour les modèles multivariés considérés ici.


## XGBoost — modèle principal


In [10]:
def run_xgboost(df, dataset_name):
    Xtr, ytr, Xv, yv, Xt, yt = split_data(df)

    model = XGBRegressor(
        n_estimators=300,
        max_depth=6,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        objective="reg:squarederror",
        random_state=42,
        n_jobs=-1
    )

    model.fit(Xtr, ytr)
    y_pred = model.predict(Xt)

    scores = evaluate(yt, y_pred)
    scores["model"] = "XGBoost"
    scores["dataset"] = dataset_name
    results.append(scores)

    return model


In [11]:
xgb_base = run_xgboost(df_base, "Baseline")
xgb_prophet = run_xgboost(df_prophet, "ProphetFilled")


## Prophet comme modèle de référence temporelle

Prophet est utilisé ici comme modèle univarié, sans variables exogènes,
afin de servir de point de comparaison avec les approches multivariées.


In [15]:
from prophet import Prophet


  from .autonotebook import tqdm as notebook_tqdm


In [16]:
def run_prophet(df, dataset_name):
    train = df[df["split"] == "train"][["datetime", "y"]]
    test = df[df["split"] == "test"][["datetime", "y"]]

    prophet_df = train.rename(columns={"datetime": "ds", "y": "y"})

    m = Prophet(daily_seasonality=True, weekly_seasonality=True, yearly_seasonality=True)
    m.fit(prophet_df)

    future = test[["datetime"]].rename(columns={"datetime": "ds"})
    forecast = m.predict(future)

    scores = evaluate(test["y"], forecast["yhat"])
    scores["model"] = "Prophet"
    scores["dataset"] = dataset_name
    results.append(scores)


In [17]:
run_prophet(df_base, "Baseline")


11:54:12 - cmdstanpy - INFO - Chain [1] start processing
11:55:26 - cmdstanpy - INFO - Chain [1] done processing


## Comparaison globale des modèles


In [20]:
results_df = pd.DataFrame(results)
results_df.sort_values(["dataset", "MAE"])


Unnamed: 0,MAE,RMSE,model,dataset
6,675.305638,904.052547,XGBoost,Baseline
3,1511.440392,1928.734931,Ridge,Baseline
2,1511.442386,1928.734311,Linear,Baseline
0,2631.883962,3856.414907,Naive (lag 24h),Baseline
8,4475.610715,5803.667549,Prophet,Baseline
7,677.909657,909.030353,XGBoost,ProphetFilled
5,1519.689455,1939.321286,Ridge,ProphetFilled
4,1519.691016,1939.319966,Linear,ProphetFilled
1,2636.329732,3863.756808,Naive (lag 24h),ProphetFilled
