In [None]:
!pip install scipy

In [None]:
import pickle
import numpy as np
import networkx as nx
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

In [None]:
# Fonction pour charger un fichier .pkl
def load_pkl_file(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    return data

# Exemple de chemin de fichier .pkl
file_path = "dataset_trainA.pkl"  # Remplacez par le chemin de votre fichier .pkl

data = load_pkl_file(file_path)


In [3]:
data.iloc[0]["flux_dégradé"]

Unnamed: 0,Abbesses,Aimé Césaire,Alexandre Dumas,Alma - Marceau,Alésia,Anatole France,Anvers,Argentine,Arts et Métiers,Assemblée nationale,...,Villejuif - Paul Vaillant-Couturier,Villiers,Volontaires,Voltaire,Wagram,École Militaire,École vétérinaire de Maisons-Alfort,Église d'Auteuil,Église de Pantin,Étienne Marcel
Abbesses,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Aimé Césaire,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Alexandre Dumas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Alma - Marceau,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Alésia,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
École Militaire,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
École vétérinaire de Maisons-Alfort,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Église d'Auteuil,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Église de Pantin,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
print(data.iloc[0]["flux_dégradé"])  # Affiche la première paire de matrices

                                     Abbesses  Aimé Césaire  Alexandre Dumas  \
Abbesses                                  0.0           0.0              0.0   
Aimé Césaire                              0.0           0.0              0.0   
Alexandre Dumas                           0.0           0.0              0.0   
Alma - Marceau                            0.0           0.0              0.0   
Alésia                                    0.0           0.0              0.0   
...                                       ...           ...              ...   
École Militaire                           0.0           0.0              0.0   
École vétérinaire de Maisons-Alfort       0.0           0.0              0.0   
Église d'Auteuil                          0.0           0.0              0.0   
Église de Pantin                          0.0           0.0              0.0   
Étienne Marcel                            0.0           0.0              0.0   

                                     Al

In [None]:
# Fonction pour normaliser la matrice d'adjacence
def normalize_adj(adj):
    adj = adj + torch.eye(adj.size(0))  # Ajout des boucles propres (self-loops)
    degree = torch.sum(adj, dim=1)
    d_inv_sqrt = torch.pow(degree, -0.5).flatten()
    d_inv_sqrt[torch.isinf(d_inv_sqrt)] = 0.0
    d_mat_inv_sqrt = torch.diag(d_inv_sqrt)
    return torch.mm(torch.mm(d_mat_inv_sqrt, adj), d_mat_inv_sqrt)

def normalize_features(features):
    mean = torch.mean(features, dim=0)
    std = torch.std(features, dim=0)
    std[std == 0] = 1  # Évite la division par 0
    normalized_features = (features - mean) / std
    return normalized_features, mean, std

def denormalize_output(output, mean, std):
    return (output * std) + mean

In [None]:
class CustomLoss(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()
    
    def forward(self, predictions, targets):
        # Masquer les valeurs où target == 0
        mask = (targets != 0).float()  # Masque binaire : 1 si target != 0, sinon 0
        
        # Différence uniquement là où target != 0
        diff = predictions - targets
        
        # Appliquer le masque
        masked_diff = diff * mask
        
        # Calculer la somme des différences au carré (MSE) seulement pour les éléments valides
        loss = torch.sum(masked_diff ** 2) / torch.sum(mask)  # Moyenne normalisée
        return loss


In [None]:
# Définition de la couche GCN
class GCNLayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(GCNLayer, self).__init__()
        self.linear = nn.Linear(in_features, out_features)

    def forward(self, x, adj):
        # Application de la normalisation et propagation
        out = torch.matmul(adj, x)  # Multiplication par la matrice de connectivité
        out = self.linear(out)  # Transformation linéaire
        return F.relu(out)

# Définition du modèle GCN complet
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.gcn1 = GCNLayer(input_dim, hidden_dim)  # Première couche
        self.gcn2 = GCNLayer(hidden_dim, output_dim)  # Deuxième couche

    def forward(self, x, adj):
        h = self.gcn1(x, adj)  # Propagation dans la première couche
        h = self.gcn2(h, adj)  # Propagation dans la deuxième couche
        return h  # Sortie du modè


In [None]:
class GATLayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(GATLayer, self).__init__()
        self.W = nn.Linear(in_features, out_features, bias=False)
        self.a = nn.Linear(2 * out_features, 1, bias=False)

    def forward(self, x, adj):
        h = self.W(x)
        N = h.size(0)
        a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1)
        e = F.leaky_relu(self.a(a_input).view(N, N))
        attention = F.softmax(e.masked_fill(adj == 0, -1e9), dim=1)
        h_prime = torch.matmul(attention, h)
        return F.relu(h_prime)

class GAT(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GAT, self).__init__()
        self.gat1 = GATLayer(input_dim, hidden_dim)
        self.gat2 = GATLayer(hidden_dim, output_dim)

    def forward(self, x, adj):
        h = self.gat1(x, adj)
        h = self.gat2(h, adj)
        return h


In [None]:
# Normalisation des matrices d'adjacence
normalized_adj_matrices = [normalize_adj(adj) for adj in adj_matrices]

# Division des données en ensembles d'entraînement et de validation
train_ratio = 0.8
num_train = int(len(normalized_adj_matrices) * train_ratio)

train_adj = normalized_adj_matrices[:num_train]
train_flux = flux_matrices[:num_train]  # Flux réels non normalisés
val_adj = normalized_adj_matrices[num_train:]
val_flux = flux_matrices[num_train:]  # Flux réels non normalisés

In [None]:
# Définition de l'appareil (CPU ou GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparamètres
input_dim = adj_matrices[0].size(0)  # Nombre de nœuds
hidden_dim = 64
output_dim = flux_matrices[0].size(0)
learning_rate = 0.001
epochs = 1000
batch_size = 64

# Initialisation du modèle, de la fonction de perte et de l'optimiseur
model_gcn = GCN(input_dim, hidden_dim, output_dim).to(device)  # Transférer le modèle sur GPU
criterion = CustomLoss().to(device)  # Transférer la fonction de perte sur GPU
optimizer = optim.Adam(model_gcn.parameters(), lr=learning_rate)

print(next(model_gcn.parameters()).device)  # Doit afficher 'cuda:0' si le GPU est utilisé

# Transfert des données d'entraînement et de validation sur GPU
train_adj = [adj.to(device) for adj in train_adj]
train_flux = [flux.to(device) for flux in train_flux]
val_adj = [adj.to(device) for adj in val_adj]
val_flux = [flux.to(device) for flux in val_flux]

# Création des DataLoader pour les ensembles d'entraînement et de validation
train_dataset = TensorDataset(torch.stack(train_adj), torch.stack(train_flux))
val_dataset = TensorDataset(torch.stack(val_adj), torch.stack(val_flux))

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Boucle d'entraînement
for epoch in range(epochs):
    model_gcn.train()  # Mettre le modèle en mode entraînement
    epoch_loss = 0

    for batch_idx, (adj_batch, flux_batch) in enumerate(train_loader):
        # Envoyer les lots sur le GPU
        adj_batch = adj_batch.to(device)
        flux_batch = flux_batch.to(device)

        # Propagation avant
        output = model_gcn(adj_batch, adj_batch)  # Utilisation des matrices d'adjacence
        loss = criterion(output, flux_batch)  # Calcul de la perte
        epoch_loss += loss.item()

        # Rétropropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validation périodique
    if epoch % 20 == 0:
        model_gcn.eval()  # Mettre le modèle en mode évaluation
        val_loss = 0
        with torch.no_grad():  # Pas de calcul des gradients pendant la validation
            for adj_batch, flux_batch in val_loader:
                adj_batch = adj_batch.to(device)
                flux_batch = flux_batch.to(device)

                output = model_gcn(adj_batch, adj_batch)
                loss = criterion(output, flux_batch)
                val_loss += loss.item()

        print(f"Epoch {epoch} | Train Loss: {epoch_loss / len(train_loader)} | Val Loss: {val_loss / len(val_loader)}")

  adj = torch.tensor(adj, dtype=torch.float32)
  flux = torch.tensor(flux, dtype=torch.float32)


AssertionError: NaN in model output!

In [None]:
# Fonction pour prédire la matrice de flux
def predict_flux(model, adj_matrix, device):
    """
    Prédit la matrice de flux pour une matrice d'adjacence donnée.

    Args:
        model: Le modèle GCN entraîné.
        adj_matrix: La matrice d'adjacence du graphe (Tensor).
        device: L'appareil à utiliser (CPU ou GPU).
    Returns:
        Une matrice de flux prédite.
    """
    model.eval()  # Mettre le modèle en mode évaluation

    # Normalisation de la matrice d'adjacence
    normalized_adj = normalize_adj(adj_matrix).to(device)  # Transfert sur GPU

    # Créer un tenseur de caractéristiques initiales (par exemple, une matrice identité si aucun feature n’est donné)
    x = torch.eye(normalized_adj.size(0)).to(device)  # Matrice identité sur GPU

    with torch.no_grad():  # Pas de calcul de gradients
        # Prédiction
        output = model(x, normalized_adj)  # Passer les deux sur GPU
        return output.cpu()  # Retour sur CPU pour analyse

# Index du graphe que nous voulons analyser
graph_index = 250  # Indices en Python commencent à 0, donc 251e élément correspond à index 250

# Récupérer la matrice d'adjacence et de flux
adj_matrix = adj_matrices[graph_index]
flux_matrix = flux_matrices[graph_index]

# Prédiction du flux
predicted_flux = predict_flux(model_gcn, adj_matrix, device)

# Afficher les résultats
print("Matrice de flux réelle :")
print(flux_matrix)

print("Matrice de flux prédite :")
print(predicted_flux)

Epoch 0 | Train Loss: nan | Val Loss: nan
Epoch 20 | Train Loss: nan | Val Loss: nan
Epoch 40 | Train Loss: nan | Val Loss: nan


KeyboardInterrupt: 

In [None]:
station_names = data.iloc[graph_index]["flux_dégradé"].index

In [None]:
# Conversion en numpy pour faciliter l'accès par indices
flux_matrix_np = flux_matrix.numpy()
predicted_flux_np = predicted_flux.numpy()

erreur = 0
n=0
# Comparaison des valeurs là où flux_matrix != 0
print("Comparaison des flux réels et prédits :\n")
for i in range(flux_matrix_np.shape[0]):
    for j in range(flux_matrix_np.shape[1]):
        if flux_matrix_np[i, j] != 0:  # Vérifier uniquement les flux non nuls
            real_value = flux_matrix_np[i, j]
            predicted_value = predicted_flux_np[i, j]
            erreur += abs(predicted_value-real_value)
            n += 1
            print(f"De {station_names[i]} à {station_names[j]} :")
            print(f"  Flux réel    : {real_value}")
            print(f"  Flux prédit  : {predicted_value}\n")
print('Erreur:',erreur/n)

307