In [1]:
# %%
# 📚 Imports principaux
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch

# 📦 PyTorch Forecasting + Lightning
import pytorch_lightning as pl
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger

# 📦 PyTorch
from torch.utils.data import DataLoader

# 📂 Gestion des chemins
from pathlib import Path
import sys

# === PATHS ===
BASE_DIR = Path.cwd().parents[1]  # 🔥 remonte de 2 niveaux
DATA_DIR = BASE_DIR / "data" / "modified_data"
MODELS_DIR = BASE_DIR / "models" / "tft"

# Créer le dossier modèle si besoin
MODELS_DIR.mkdir(parents=True, exist_ok=True)

# === GPU Check ===
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"✅ Using device: {DEVICE}")

# Optionnel : Style graphique propre
sns.set_style("whitegrid")




  from .autonotebook import tqdm as notebook_tqdm


✅ Using device: cpu


In [2]:

def split_train_val_dl_global(df: pd.DataFrame, val_size: float = 0.2):
    """
    Split temporel global pour toutes les régions ensemble.
    """
    df = df.sort_values("date").reset_index(drop=True)
    
    n_total = len(df["date"].unique())
    n_val = int(n_total * val_size)

    date_val_start = df["date"].unique()[-n_val]

    df_train = df[df["date"] < date_val_start].copy()
    df_val = df[df["date"] >= date_val_start].copy()

    return df_train, df_val






In [3]:
# %%
# === Paths ===
BASE_DIR = Path.cwd().parents[1]
DATA_DIR = BASE_DIR / "data" / "modified_data"

# === Ajout du dossier data_processing pour pouvoir importer ===
sys.path.append(str(BASE_DIR))

# === Import fonctions ===
from data_processing.transformation import transform_dl

# === Paramètre fréquence (daily ou hourly) ===
FREQ = "daily"

# === Chargement et préparation des données ===

# 1. Charger le train complet
df_train = pd.read_csv(DATA_DIR / f"train_{FREQ}.csv")

# 2. Appliquer la transformation spéciale Deep Learning
df_train_transformed = transform_dl(df_train, filter_too_short=True)

# 3. Split en train/val
df_train_final, df_val_final = split_train_val_dl_global(df_train_transformed)

# 4. Charger aussi le test pour plus tard
df_test = pd.read_csv(DATA_DIR / f"test_{FREQ}.csv")
df_test_final = transform_dl(df_test, filter_too_short=False)





In [4]:
df_train_final.head()

Unnamed: 0,date,insee_region,conso_elec_mw,conso_gaz_mw,temperature_2m_max,temperature_2m_min,precipitation_sum,weather_code,apparent_temperature_max,apparent_temperature_min,...,sunrise,sunset,sunshine_duration,daylight_duration,wind_speed_10m_max,wind_gusts_10m_max,wind_direction_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration,time_idx
0,2013-01-01,11,389597.0,53348.0,8.7,3.8,8.7,61,5.7,0.3,...,35040,65040,24366.94,30049.78,25.0,48.6,248,3.55,0.59,0
1,2013-01-01,27,108098.0,65331.0,9.0,4.4,6.4,55,6.1,0.9,...,35460,65220,24513.07,29719.84,27.1,53.6,261,3.87,0.52,0
2,2013-01-01,28,152060.0,110271.0,8.9,3.9,8.1,55,5.8,0.6,...,34860,65400,25155.29,30508.68,25.5,49.3,246,3.82,0.52,0
3,2013-01-01,32,248073.0,165424.0,8.0,3.7,8.0,53,5.4,0.5,...,34020,64800,0.0,30781.38,25.0,50.0,213,0.96,0.61,0
4,2013-01-01,44,214344.0,161462.0,7.1,4.3,6.1,53,4.1,-0.1,...,33660,63840,0.0,30193.14,19.3,43.6,206,0.91,0.62,0


In [5]:
df_train_final.isnull().sum()

date                           0
insee_region                   0
conso_elec_mw                  0
conso_gaz_mw                   0
temperature_2m_max             0
temperature_2m_min             0
precipitation_sum              0
weather_code                   0
apparent_temperature_max       0
apparent_temperature_min       0
rain_sum                       0
snowfall_sum                   0
precipitation_hours            0
sunrise                        0
sunset                         0
sunshine_duration              0
daylight_duration              0
wind_speed_10m_max             0
wind_gusts_10m_max             0
wind_direction_10m_dominant    0
shortwave_radiation_sum        0
et0_fao_evapotranspiration     0
time_idx                       0
dtype: int64

In [6]:
df_train_final.shape

(31776, 23)

In [7]:
df_train_final["insee_region"].value_counts()

insee_region
11    2648
27    2648
28    2648
32    2648
44    2648
52    2648
75    2648
53    2648
76    2648
84    2648
93    2648
24    2648
Name: count, dtype: int64

In [8]:
df_val_final["insee_region"].value_counts()

insee_region
75    662
53    662
28    662
76    662
84    662
93    662
44    662
32    661
24    661
11    661
27    661
52    661
Name: count, dtype: int64

In [9]:
# === Vérification NaN/None après transformation ===
print("✅ df_train_final info:")
print(df_train_final.isnull().sum())
print("\n✅ df_val_final info:")
print(df_val_final.isnull().sum())

# Optionnel: Afficher lignes problématiques si NaN trouvés
print("\n=== Vérification lignes avec NaN dans train ===")
print(df_train_final[df_train_final.isnull().any(axis=1)])

print("\n=== Vérification lignes avec NaN dans val ===")
print(df_val_final[df_val_final.isnull().any(axis=1)])


✅ df_train_final info:
date                           0
insee_region                   0
conso_elec_mw                  0
conso_gaz_mw                   0
temperature_2m_max             0
temperature_2m_min             0
precipitation_sum              0
weather_code                   0
apparent_temperature_max       0
apparent_temperature_min       0
rain_sum                       0
snowfall_sum                   0
precipitation_hours            0
sunrise                        0
sunset                         0
sunshine_duration              0
daylight_duration              0
wind_speed_10m_max             0
wind_gusts_10m_max             0
wind_direction_10m_dominant    0
shortwave_radiation_sum        0
et0_fao_evapotranspiration     0
time_idx                       0
dtype: int64

✅ df_val_final info:
date                           0
insee_region                   0
conso_elec_mw                  0
conso_gaz_mw                   0
temperature_2m_max             0
temperature_2m_min

In [10]:
print("✅ Points par région dans le train :")
print(df_train_final["insee_region"].value_counts())

print("\n✅ Points par région dans la val :")
print(df_val_final["insee_region"].value_counts())

✅ Points par région dans le train :
insee_region
11    2648
27    2648
28    2648
32    2648
44    2648
52    2648
75    2648
53    2648
76    2648
84    2648
93    2648
24    2648
Name: count, dtype: int64

✅ Points par région dans la val :
insee_region
75    662
53    662
28    662
76    662
84    662
93    662
44    662
32    661
24    661
11    661
27    661
52    661
Name: count, dtype: int64


In [11]:
for col in df_train_final.columns:
    if df_train_final[col].isnull().any():
        print(f"❗ Attention : NaN dans colonne '{col}'")

for col in df_val_final.columns:
    if df_val_final[col].isnull().any():
        print(f"❗ Attention : NaN dans colonne '{col}'")

# Vérifions aussi les types au cas où
print("\n✅ Types des colonnes dans train :")
print(df_train_final.dtypes)

print("\n✅ Types des colonnes dans val :")
print(df_val_final.dtypes)



✅ Types des colonnes dans train :
date                           datetime64[ns]
insee_region                           object
conso_elec_mw                         float64
conso_gaz_mw                          float64
temperature_2m_max                    float64
temperature_2m_min                    float64
precipitation_sum                     float64
weather_code                            int64
apparent_temperature_max              float64
apparent_temperature_min              float64
rain_sum                              float64
snowfall_sum                          float64
precipitation_hours                   float64
sunrise                                 int32
sunset                                  int32
sunshine_duration                     float64
daylight_duration                     float64
wind_speed_10m_max                    float64
wind_gusts_10m_max                    float64
wind_direction_10m_dominant             int64
shortwave_radiation_sum               float64

In [12]:
# %%
from pytorch_forecasting.data.encoders import MultiNormalizer, TorchNormalizer

# === Définir paramètres ===
MAX_ENCODER_LENGTH = 24    # Nombre d'heures passées utilisées
MAX_PREDICTION_LENGTH = 24 # Nombre d'heures à prédire

# === Définir les variables ===
target_cols = ["conso_elec_mw", "conso_gaz_mw"]
time_idx = "time_idx"      # Colonne pour l'ordre temporel
group_id = "insee_region"  # Pour identifier une série temporelle unique

target_normalizer = MultiNormalizer(
    [TorchNormalizer()] * len(target_cols)  # un normalizer par target
)

# === Créer TimeSeriesDataSet pour le TRAIN ===
training = TimeSeriesDataSet(
    df_train_final,
    time_idx="time_idx",
    target=["conso_elec_mw", "conso_gaz_mw"],
    group_ids=["insee_region"],
    max_encoder_length=24,
    max_prediction_length=24,
    min_encoder_length=12,            # 👈 Autoriser des séquences plus courtes
    min_prediction_idx=1,             # 👈 Autoriser à prédire même à partir d'un point
    time_varying_known_reals=[
        "temperature_2m_max", "temperature_2m_min", "precipitation_sum", "weather_code", "apparent_temperature_max",
        "apparent_temperature_min", "rain_sum", "snowfall_sum", "precipitation_hours", "sunrise", "sunset",
        "sunshine_duration", "daylight_duration", "wind_speed_10m_max", "wind_gusts_10m_max",
        "wind_direction_10m_dominant", "shortwave_radiation_sum", "et0_fao_evapotranspiration"
    ],
    time_varying_unknown_reals=["conso_elec_mw", "conso_gaz_mw"],
    static_categoricals=["insee_region"],
    target_normalizer=target_normalizer,
    allow_missing_timesteps=True,
)

# === TimeSeriesDataSet pour la VALIDATION ===
validation = TimeSeriesDataSet.from_dataset(training, df_val_final)

# === DEBUG Prints ===
print(f"✅ Train samples: {len(training)}")
print(f"✅ Val samples: {len(validation)}")

# Optionnel : tester un sample
sample = training[0]
x, y = sample

print(f"✅ x keys: {list(x.keys())}")
print(f"✅ y type: {type(y)}")

# Afficher la “shape” ou la longueur de chaque élément de y
if isinstance(y, tuple):
    for i, yi in enumerate(y):
        if hasattr(yi, "shape"):
            print(f"✅ y[{i}] shape: {tuple(yi.shape)}")
        elif hasattr(yi, "__len__"):
            print(f"✅ y[{i}] length: {len(yi)}")
        else:
            print(f"✅ y[{i}]: {yi}")
else:
    # cas inattendu : y n’est pas un tuple
    print(f"✅ y shape: {getattr(y, 'shape', None)}")


✅ Train samples: 31500
✅ Val samples: 7663
✅ x keys: ['x_cat', 'x_cont', 'encoder_length', 'decoder_length', 'encoder_target', 'encoder_time_idx_start', 'groups', 'target_scale']
✅ y type: <class 'tuple'>
✅ y[0] length: 2
✅ y[1]: None


In [13]:
# à la place de `from pytorch_forecasting.data import TemporalDataLoader`
# et de la construction manuelle de DataLoader :

BATCH_SIZE = 128

train_dataloader = training.to_dataloader(
    train=True,
    batch_size=BATCH_SIZE,
    num_workers=0,
)

val_dataloader = validation.to_dataloader(
    train=False,
    batch_size=BATCH_SIZE,
    num_workers=0,
)



In [14]:
# %%
from pytorch_forecasting import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss

# === Définir le modèle TFT ===
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=1e-3,
    hidden_size=32,
    attention_head_size=4,
    dropout=0.1,
    hidden_continuous_size=16,
    loss=QuantileLoss(quantiles=[0.1, 0.5, 0.9]),  # c’est ici qu’on précise les quantiles
    reduce_on_plateau_patience=4,
)

print(f"Nombre de paramètres du modèle : {tft.size()/1e3:.1f}k paramètres")


Nombre de paramètres du modèle : 139.1k paramètres


c:\Users\arnov\anaconda3\envs\ML_env\lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
c:\Users\arnov\anaconda3\envs\ML_env\lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.


In [15]:
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint

# === Callbacks ===
early_stop = EarlyStopping(
    monitor="val_loss",  # ou "val_quantile_loss" selon ce que vous logguez
    patience=7,
    mode="min",
)

checkpoint = ModelCheckpoint(
    dirpath="checkpoints/tft",
    filename="best_tft",
    monitor="val_loss",
    mode="min",
    save_top_k=1,
)

# === Trainer ===
trainer = Trainer(
    max_epochs=30,
    accelerator="gpu" if torch.cuda.is_available() else "cpu",
    devices=1,
    callbacks=[early_stop, checkpoint],
    gradient_clip_val=0.1,
    limit_train_batches=1.0,
    limit_val_batches=1.0,
    deterministic=True,
)

# === Lancement de l'entraînement ===
trainer.fit(
    tft,                    # votre TemporalFusionTransformer
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
c:\Users\arnov\anaconda3\envs\ML_env\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:654: Checkpoint directory C:\Users\arnov\Desktop\Energy-prediction\model\DeepLearning\checkpoints\tft exists and is not empty.

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | MultiLoss                       | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 72     | train
3  | prescale

Sanity Checking:   0%|          | 0/2 [00:00<?, ?it/s]

c:\Users\arnov\anaconda3\envs\ML_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


                                                                           

c:\Users\arnov\anaconda3\envs\ML_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Epoch 29: 100%|██████████| 246/246 [01:07<00:00,  3.63it/s, v_num=7, train_loss_step=7.76e+3, val_loss=1.12e+4, train_loss_epoch=7.85e+3]

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 246/246 [01:07<00:00,  3.62it/s, v_num=7, train_loss_step=7.76e+3, val_loss=1.12e+4, train_loss_epoch=7.85e+3]


In [None]:

from pytorch_lightning import Trainer

best_model_path = trainer.checkpoint_callback.best_model_path
print(f" Meilleur modèle TFT chargé depuis : {best_model_path}")

# 2. Charger le modèle depuis le checkpoint
from pytorch_forecasting.models import TemporalFusionTransformer
tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)


✅ Meilleur modèle TFT chargé depuis : C:\Users\arnov\Desktop\Energy-prediction\model\DeepLearning\checkpoints\tft\best_tft-v1.ckpt


In [18]:
from pathlib import Path
import pandas as pd

# Remonter au dossier racine
BASE_DIR = Path(__file__).resolve().parents[2] if "__file__" in globals() else Path.cwd().parents[1]
DATA_DIR = BASE_DIR / "data" / "modified_data"

# Charger le fichier test
df_test = pd.read_csv(DATA_DIR / "test_daily.csv")
df_test["date"] = pd.to_datetime(df_test["date"])


In [20]:
from data_processing.transformation import transform_dl

df_test_transformed = transform_dl(df_test, filter_too_short=True)


In [23]:
test_dataset = TimeSeriesDataSet.from_dataset(
    training,                      # your original training TimeSeriesDataSet
    df_test_transformed,           # the DataFrame you preprocessed with transform_dl()
    predict=True,                  # prepare it for prediction
    stop_randomization=True        # turn off any shuffling/augmentation
)

# 2) Create a DataLoader (no shuffling)
test_dataloader = test_dataset.to_dataloader(
    train=False,
    batch_size=64,
    num_workers=4
)

In [24]:
from pytorch_forecasting import TemporalFusionTransformer

# load the best model you trained
best_model_path = trainer.checkpoint_callback.best_model_path  
tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

# produce raw predictions and the input batch x
raw_predictions, x = tft.predict(test_dataloader, mode="raw", return_x=True)

# visualize one example
tft.plot_prediction(x, raw_predictions, idx=0)

c:\Users\arnov\anaconda3\envs\ML_env\lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
c:\Users\arnov\anaconda3\envs\ML_env\lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\arnov\anaconda3\envs\ML_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 

ValueError: too many values to unpack (expected 2)