In [1]:
!where python

C:\Users\usermine\Amine-Jupiter\SESSION6\tft\env_tft\Scripts\python.exe
C:\Users\usermine\AppData\Local\Programs\Python\Python311\python.exe
C:\Users\usermine\AppData\Local\Microsoft\WindowsApps\python.exe


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning import Trainer
from torch.utils.data import DataLoader
from pytorch_lightning import LightningModule
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint

In [3]:
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())

PyTorch version: 2.5.1+cpu
CUDA available: False


In [4]:
# Charger les données
df = pd.read_csv("MetroPT3_imputed_final.csv", delimiter=",", decimal=".", index_col=0)
df.reset_index(drop=True, inplace=True)

In [5]:
# Convertir la colonne 'timestamp' en type datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

In [6]:
# Définir les colonnes continues et catégoriques pour l'entrainement
continuous_features = ["TP2", "DV_pressure", "Oil_temperature", "Motor_current", "Reservoirs"]
categorical_features = ["COMP", "DV_eletric", "Towers"]

In [7]:
# Convertir les colonnes catégoriques en type "category"
for col in categorical_features:
    df[col] = df[col].astype('category')

In [8]:
# Liste des colonnes nécessaires
columns_to_keep = ["timestamp"] + continuous_features + ["panne"] + categorical_features

# Réduire le DataFrame
df = df[columns_to_keep]

# Afficher les premières lignes pour vérifier
display(df.head())


Unnamed: 0,timestamp,TP2,DV_pressure,Oil_temperature,Motor_current,Reservoirs,panne,COMP,DV_eletric,Towers
0,2020-02-01 00:00:00,-0.012,-0.024,53.6,0.04,9.358,0,1.0,0.0,1.0
1,2020-02-01 00:00:10,-0.014,-0.022,53.675,0.04,9.348,0,1.0,0.0,1.0
2,2020-02-01 00:00:20,-0.012,-0.022,53.6,0.0425,9.338,0,1.0,0.0,1.0
3,2020-02-01 00:00:30,-0.012,-0.022,53.425,0.04,9.328,0,1.0,0.0,1.0
4,2020-02-01 00:00:40,-0.012,-0.022,53.475,0.04,9.318,0,1.0,0.0,1.0


In [9]:
# Créer la colonne 'time_idx' basée sur les timestamps
df['time_idx'] = ((df['timestamp'] - df['timestamp'].min()).dt.total_seconds() // 10).astype(int)

# Ajouter une colonne 'group_id' pour identifier les groupes (utile pour un seul groupe)
df['group_id'] = "compresseur"

In [31]:
display(df.head())

Unnamed: 0,timestamp,TP2,DV_pressure,Oil_temperature,Motor_current,Reservoirs,panne,COMP,DV_eletric,Towers,time_idx,group_id
0,2020-02-01 00:00:00,-0.012,-0.024,53.6,0.04,9.358,0,1.0,0.0,1.0,0,compresseur
1,2020-02-01 00:00:10,-0.014,-0.022,53.675,0.04,9.348,0,1.0,0.0,1.0,1,compresseur
2,2020-02-01 00:00:20,-0.012,-0.022,53.6,0.0425,9.338,0,1.0,0.0,1.0,2,compresseur
3,2020-02-01 00:00:30,-0.012,-0.022,53.425,0.04,9.328,0,1.0,0.0,1.0,3,compresseur
4,2020-02-01 00:00:40,-0.012,-0.022,53.475,0.04,9.318,0,1.0,0.0,1.0,4,compresseur


In [11]:
# Diviser les données en ensembles d'entraînement et de validation
train_data = df[(df['timestamp'] >= '2020-02-01 00:00:00') & (df['timestamp'] <= '2020-06-07 14:30:00')]
val_data   = df[(df['timestamp'] >= '2020-06-07 14:30:10') & (df['timestamp'] <= '2020-09-01 03:59:50')]

In [12]:
# Vérifier les tailles des ensembles
print(f"Entraînement : {len(train_data)} lignes")
print(f"Validation : {len(val_data)} lignes")

Entraînement : 1102501 lignes
Validation : 739259 lignes


In [15]:
for col in categorical_features:
    train_data.loc[:, col] = train_data[col].astype(float).astype(int).astype(str).astype('category')
    val_data.loc[:, col] = val_data[col].astype(float).astype(int).astype(str).astype('category')

In [16]:
# Vérifier les types des colonnes
print(df.dtypes)

timestamp          datetime64[ns]
TP2                       float64
DV_pressure               float64
Oil_temperature           float64
Motor_current             float64
Reservoirs                float64
panne                       int64
COMP                     category
DV_eletric               category
Towers                   category
time_idx                    int64
group_id                   object
dtype: object


<ul style="text-align: center;font-family: times, serif; font-size:14pt; color:Red;">
<strong>#########################  Préparation des données pour le modèle ############################</strong>
</ul>

In [32]:
# Définir les longueurs des séquences

# Créer le dataset d'entraînement
training = TimeSeriesDataSet(
    train_data,
    time_idx="time_idx",
    target="panne",  # Cible définie comme 'panne'
    group_ids=["group_id"],
    min_encoder_length=90,  # Ajuster selon votre besoin
    max_encoder_length=180,
    max_prediction_length=1,
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["TP2", "DV_pressure", "Oil_temperature", "Motor_current", "Reservoirs"],
    time_varying_unknown_categoricals=["COMP", "DV_eletric", "Towers"],
)

# Créer le dataset de validation
validation = TimeSeriesDataSet.from_dataset(training, val_data)

In [33]:
# Vérifiez les classes dans le jeu de données d'entraînement et de validation
print("Classes uniques dans le jeu de données d'entraînement :", train_data["panne"].unique())
print("Classes uniques dans le jeu de données de validation :", val_data["panne"].unique())


Classes uniques dans le jeu de données d'entraînement : [0 2 1]
Classes uniques dans le jeu de données de validation : [0 2 1]


In [34]:
# Créer le dataset de validation
validation = TimeSeriesDataSet.from_dataset(
    training,
    val_data,  # Données de validation
    stop_randomization=True  # Pour garantir que les séquences sont cohérentes
)


In [None]:
# Obtenir les prédictions
val_dataloader = validation.to_dataloader(train=False, batch_size=64)

# Récupérer les prédictions et les cibles
all_preds = []
all_targets = []

tft.eval()
with torch.no_grad():
    for batch in val_dataloader:
        x, y = batch
        if isinstance(y, tuple):
            y = y[0]

        y_hat = tft(x)["prediction"]
        preds = torch.argmax(y_hat, dim=-1).cpu().numpy()
        targets = y.cpu().numpy()

        all_preds.append(preds)
        all_targets.append(targets)

# Convertir en tableaux numpy
all_preds = np.concatenate(all_preds)
all_targets = np.concatenate(all_targets)

# Afficher les métriques
print("Rapport de classification par classe :")
print(classification_report(all_targets, all_preds))

print("Matrice de confusion :")
print(confusion_matrix(all_targets, all_preds))


<ul style="text-align: center;font-family: times, serif; font-size:14pt; color:Red;">
<strong>######################### Charger les DataLoaders ############################</strong>
</ul>

In [18]:
# Créer les DataLoaders pour l'entraînement et la validation
train_dataloader = training.to_dataloader(train=True, batch_size=64, num_workers=4, batch_sampler=None)
val_dataloader = validation.to_dataloader(train=False, batch_size=64, num_workers=4, batch_sampler=None)

<ul style="text-align: center;font-family: times, serif; font-size:14pt; color:Red;">
<strong>######################### Initialiser le modèle TFT ############################</strong>
</ul>

In [19]:
# Initialiser le modèle TFT
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,     # Taux d'apprentissage pour l'optimisation
    hidden_size=16,         # Taille des couches cachées
    attention_head_size=4,  # Nombre de têtes d'attention
    dropout=0.1,            # Pour éviter le surapprentissage
    hidden_continuous_size=8,  # Taille des représentations continues
    loss=QuantileLoss(),       # Fonction de perte pour les séries temporelles
    log_interval=10,           # Journaliser toutes les 10 itérations
    reduce_on_plateau_patience=4,  # Réduire le learning rate si stagnation
)

C:\Users\usermine\Amine-Jupiter\SESSION6\tft\env_tft\Lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
C:\Users\usermine\Amine-Jupiter\SESSION6\tft\env_tft\Lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)


In [21]:
# Encapsulation dans LightningModule
class CustomTFT(LightningModule):
    def __init__(self, tft_model):
        super().__init__()
        self.tft_model = tft_model

    def forward(self, x):
        return self.tft_model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        if isinstance(y, tuple):
            y = y[0]
        y_hat = self.tft_model(x)
        predictions = y_hat["prediction"]
        loss = self.tft_model.loss(predictions, y)
        self.log("train_loss", loss, batch_size=y.size(0))
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        if isinstance(y, tuple):
            y = y[0]
        y_hat = self.tft_model(x)
        predictions = y_hat["prediction"]
        val_loss = self.tft_model.loss(predictions, y)
        self.log("val_loss", val_loss, batch_size=y.size(0))
        return val_loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.03)


# Créer un modèle encapsulé
model = CustomTFT(tft)

<ul style="text-align: center;font-family: times, serif; font-size:14pt; color:Red;">
<strong>######################### Entraîner le modèle ############################</strong>
</ul>

In [22]:
from pytorch_lightning import Trainer

# Ajouter les callbacks pour Early Stopping et sauvegarde du modèle
early_stopping = EarlyStopping(
    monitor="val_loss",  # Surveiller la métrique val_loss
    patience=5,          # Arrêter après 5 époques sans amélioration
    verbose=True,
    mode="min"           # Minimiser la perte
)

checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",  # Sauvegarder le meilleur modèle basé sur val_loss
    dirpath="checkpoints",  # Dossier pour les checkpoints
    filename="best-checkpoint",  # Nom du fichier de checkpoint
    save_top_k=1,               # Sauvegarder uniquement le meilleur modèle
    mode="min"                  # Minimiser la perte
)


# Configurer le Trainer
trainer = Trainer(
    max_epochs=2,                # Nombre d'époques maximum
    accelerator="cpu",           # Utilisation du CPU
    gradient_clip_val=0.1,       # Clip pour éviter les gradients explosifs
    callbacks=[early_stopping, checkpoint_callback],  # Ajouter les callbacks
)

# Entraîner le modèle
trainer.fit(model=model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)

# Vérifier si l'entraînement a été interrompu et sauvegarder l'état
trainer.save_checkpoint("last-phase-checkpoint.ckpt")

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\usermine\Amine-Jupiter\SESSION6\tft\env_tft\Lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default

  | Name      | Type                      | Params
--------------------------------------------------------
0 | tft_model | TemporalFusionTransformer | 19.1 K
--------------------------------------------------------
19.1 K    Trainable params
0         Non-trainable params
19.1 K    Total params


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved. New best score: 0.115


Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=2` reached.


<ul style="text-align: center;font-family: times, serif; font-size:14pt; color:Red;">
<strong>######################### Reprendre l'entraînement ############################</strong>
</ul>

In [None]:
from pytorch_forecasting import TemporalFusionTransformer
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint

# Charger le modèle depuis le dernier checkpoint
checkpoint_path = "last-phase-checkpoint.ckpt"
tft = TemporalFusionTransformer.load_from_checkpoint(checkpoint_path)

# Configurer Early Stopping et Checkpoint Callback
early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=5,
    verbose=True,
    mode="min"
)

checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath="checkpoints",
    filename="best-checkpoint",
    save_top_k=1,
    mode="min"
)

# Configurer le Trainer pour reprendre l'entraînement
trainer = Trainer(
    max_epochs=30,               # Le nombre total d'époques à effectuer
    accelerator="cpu",
    gradient_clip_val=0.1,
    callbacks=[early_stopping, checkpoint_callback],
    resume_from_checkpoint=checkpoint_path  # Reprendre depuis le dernier checkpoint
)

# Continuer l'entraînement
trainer.fit(model=tft, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)



<ul style="text-align: center;font-family: times, serif; font-size:14pt; color:Red;">
<strong>######################### Faire des prédictions ############################</strong>
</ul>

In [None]:
# Faire des prédictions
raw_predictions, x = tft.predict(val_dataloader, mode="raw", return_x=True)

# Afficher les prédictions brutes
print(raw_predictions)

C:\Users\usermine\Amine-Jupiter\SESSION6\TFT\env_tft\Lib\site-packages\lightning\pytorch\trainer\connectors\logger_connector\logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
C:\Users\usermine\Amine-Jupiter\SESSION6\TFT\env_tft\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'predict_dataloader' to speed up the dataloader worker initialization.


In [None]:
# Obtenir les prédictions
predictions = tft.predict(val_dataloader)

# Afficher les dimensions des prédictions
print(f"Shape of predictions: {predictions.shape}")


In [None]:
# Afficher quelques exemples de prédictions
print("Example predictions:", predictions[:5])

<ul style="text-align: center;font-family: times, serif; font-size:14pt; color:Red;">
<strong>######################### Évaluation ############################</strong>
</ul>

In [25]:
# Les métriques de validation sont enregistrées par le Trainer
print(f"Meilleure perte de validation (val_loss): {trainer.callback_metrics['val_loss']}")


Meilleure perte de validation (val_loss): 0.5704852342605591


In [28]:
from pytorch_forecasting.metrics import SMAPE, MAE, RMSE
import torch

# Faire des prédictions
actuals = []
for batch in val_dataloader:
    x, y = batch
    if isinstance(y, tuple):  # Si `y` est un tuple, prendre la première valeur
        y = y[0]
    actuals.append(y)

# Concaténer les valeurs réelles en un seul tenseur
actuals = torch.cat(actuals)

# Obtenir les prédictions du modèle
predictions = tft.predict(val_dataloader, mode="raw")
predictions = predictions["prediction"]

# Calculer les métriques
smape = SMAPE()(predictions, actuals).item()
mae = MAE()(predictions, actuals).item()
rmse = RMSE()(predictions, actuals).item()

# Afficher les résultats
print(f"SMAPE: {smape:.4f}")
print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")



C:\Users\usermine\Amine-Jupiter\SESSION6\tft\env_tft\Lib\site-packages\lightning\pytorch\trainer\connectors\logger_connector\logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
C:\Users\usermine\Amine-Jupiter\SESSION6\tft\env_tft\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


AssertionError: Prediction should only have one extra dimension

In [27]:
import torch
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

# Supposons que `tft` est votre modèle déjà chargé
# Préparez votre DataLoader pour la validation
val_dataloader = validation.to_dataloader(train=False, batch_size=64)

# Obtenir les prédictions
all_preds = []
all_targets = []

tft.eval()
with torch.no_grad():
    for batch in val_dataloader:
        x, y = batch

        # Si y est un tuple, extraire la cible réelle
        if isinstance(y, tuple):
            y = y[0]

        # Obtenez les prédictions
        y_hat = tft(x)["prediction"]

        # Convertir les prédictions en classe la plus probable
        preds = torch.argmax(y_hat, dim=-1).cpu().numpy()
        targets = y.cpu().numpy()

        all_preds.append(preds)
        all_targets.append(targets)

# Convertir les listes en tableaux numpy
all_preds = np.concatenate(all_preds)
all_targets = np.concatenate(all_targets)

# Calcul des métriques par classe
print("Rapport de classification par classe :")
print(classification_report(all_targets, all_preds))

# Matrice de confusion
print("Matrice de confusion :")
print(confusion_matrix(all_targets, all_preds))


Rapport de classification par classe :


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00  737458.0
           1       0.00      0.00      0.00    1621.0
           2       0.00      0.00      0.00     180.0
           5       0.00      0.00      0.00       0.0
           6       0.00      0.00      0.00       0.0

    accuracy                           0.00  739259.0
   macro avg       0.00      0.00      0.00  739259.0
weighted avg       0.00      0.00      0.00  739259.0

Matrice de confusion :
[[     0      0      0 684156  53302]
 [     0      0      0   1621      0]
 [     0      0      0    180      0]
 [     0      0      0      0      0]
 [     0      0      0      0      0]]


In [29]:
print("Catégories définies :", validation.categoricals)
print("Reels définis :", validation.reals)


Catégories définies : ['COMP', 'DV_eletric', 'Towers']
Reels définis : ['encoder_length', 'time_idx', 'TP2', 'DV_pressure', 'Oil_temperature', 'Motor_current', 'Reservoirs']


In [30]:
# Vérifier les classes uniques dans la colonne cible
print("Classes uniques dans la colonne 'panne' :")
print(validation.data["panne"].unique())


Classes uniques dans la colonne 'panne' :


KeyError: 'panne'

<ul style="text-align: center;font-family: times, serif; font-size:14pt; color:Red;">
<strong>######################### Sauvegarder le modèle ############################</strong>
</ul>

In [23]:
# Sauvegarder le modèle
model_checkpoint = "tft_model_1_2025_01_02_08_20_epoch2.ckpt"
trainer.save_checkpoint(model_checkpoint)
print(f"Modèle sauvegardé sous : {model_checkpoint}")


Modèle sauvegardé sous : tft_model_1_2025_01_02_08_20_epoch2.ckpt


In [24]:
import torch

# Sauvegarder uniquement les poids du modèle
torch.save(tft.state_dict(), "tft_model_weights_2025_01_02_08_20_epoch2.pth")
print("Poids du modèle sauvegardés avec succès !")


Poids du modèle sauvegardés avec succès !


In [None]:
import pickle
from pytorch_forecasting import TemporalFusionTransformer

# Charger ou initialiser votre modèle TFT
tft = TemporalFusionTransformer(...)  # Assurez-vous que le modèle est déjà entraîné

# Sauvegarder le modèle en fichier `.pkl`
with open("tft_model.pkl", "wb") as f:
    pickle.dump(tft, f)

print("Modèle sauvegardé en 'tft_model.pkl'")


In [None]:
from pytorch_forecasting import TemporalFusionTransformer

# Charger le modèle depuis le checkpoint
tft = TemporalFusionTransformer.load_from_checkpoint("tft_model_final.ckpt")
print("Modèle chargé avec succès !")
