In [79]:
import torch
import torch.nn as nn
from torch_geometric.nn import VGAE, NNConv
import torch.nn.functional as F
from torch_geometric.loader import DataLoader
import os
from pathlib import Path

In [80]:
def load_graphs(graph_dir):
    # Vérification du dossier
    graph_dir = Path(graph_dir)
    if not graph_dir.exists():
        raise FileNotFoundError(
            f"Le dossier '{graph_dir}' n'existe pas."
        )
    if not graph_dir.is_dir():
        raise NotADirectoryError(
            f"'{graph_dir}' n'est pas un dossier valide."
        )
    graphs = []
    # Charger tous les fichiers .pt
    for file_path in sorted(graph_dir.glob("*.pt")):
        try:
            g = torch.load(file_path)
            graphs.append(g)
            print(f"Graphe chargé : {file_path}")
        except Exception as e:
            print(f"Erreur lors du chargement de {file_path} : {e}")
    return graphs

# Charger les graphes
train_graphs_dir = "graphs/train"
test_graphs_dir = "graphs/test"

train_graphs = load_graphs(train_graphs_dir)
test_graphs = load_graphs(test_graphs_dir)

Graphe chargé : graphs/train/graph_batch_1.pt
Graphe chargé : graphs/train/graph_batch_10.pt
Graphe chargé : graphs/train/graph_batch_11.pt
Graphe chargé : graphs/train/graph_batch_12.pt
Graphe chargé : graphs/train/graph_batch_2.pt
Graphe chargé : graphs/train/graph_batch_3.pt
Graphe chargé : graphs/train/graph_batch_4.pt
Graphe chargé : graphs/train/graph_batch_5.pt
Graphe chargé : graphs/train/graph_batch_6.pt
Graphe chargé : graphs/train/graph_batch_7.pt
Graphe chargé : graphs/train/graph_batch_8.pt
Graphe chargé : graphs/train/graph_batch_9.pt
Graphe chargé : graphs/test/graph_batch_1.pt
Graphe chargé : graphs/test/graph_batch_2.pt
Graphe chargé : graphs/test/graph_batch_3.pt
Graphe chargé : graphs/test/graph_batch_4.pt
Graphe chargé : graphs/test/graph_batch_5.pt
Graphe chargé : graphs/test/graph_batch_6.pt
Graphe chargé : graphs/test/graph_batch_7.pt


  g = torch.load(file_path)


## Split temporel (train / val)

In [81]:
num_graphs = len(train_graphs)

train_ratio = 0.8

train_end = int(train_ratio * num_graphs)

val_graphs   = train_graphs[train_end:]
train_graphs = train_graphs[:train_end]


# Créer un DataLoader pour l'entraînement
# batch_size = nombre de graphes dans chaque batch
batch_size = 1

train_loader = DataLoader(
    train_graphs,
    batch_size=batch_size,
    shuffle=False
)

val_loader = DataLoader(
    val_graphs,
    batch_size=batch_size,
    shuffle=False
)

test_loader = DataLoader(
    test_graphs,
    batch_size=batch_size,
    shuffle=False
)

## Normalisation des features

In [82]:
# calcul mean/std sur tous les noeuds du train
xs = torch.cat([g.x for g in train_graphs], dim=0)
mean = xs.mean(dim=0)
std = xs.std(dim=0) + 1e-6

def normalize_graph(g):
    g.x = (g.x - mean) / std
    return g

train_graphs = [normalize_graph(g) for g in train_graphs]
val_graphs   = [normalize_graph(g) for g in val_graphs]

## Définir l’encodeur (avec NNConv)

In [83]:
class NNConvEncoder(nn.Module):
    def __init__(self, in_channels, out_channels, edge_dim):
        super(NNConvEncoder, self).__init__()

        self.out_channels = out_channels
        # MLP pour transformer edge_attr en matrice de poids pour NNConv
        self.edge_mlp = nn.Sequential(
            nn.Linear(edge_dim, in_channels * 2 * out_channels),
            nn.ReLU()
        )

        # première couche NNConv
        self.conv1 = NNConv(
            in_channels=in_channels,
            out_channels=2 * out_channels,
            nn=self.edge_mlp,
            aggr='mean'
        )

        # couches mu et logvar
        self.conv_mu = NNConv(
            in_channels=2 * out_channels,
            out_channels=out_channels,
            nn=nn.Sequential(
                nn.Linear(edge_dim, 2 * out_channels * out_channels),
                nn.ReLU()
            ),
            aggr='mean'
        )

        self.conv_logvar = NNConv(
            in_channels=2 * out_channels,
            out_channels=out_channels,
            nn=nn.Sequential(
                nn.Linear(edge_dim, 2 * out_channels * out_channels),
                nn.ReLU()
            ),
            aggr='mean'
        )

    def forward(self, x, edge_index, edge_attr):
        # Couche 1
        x = F.relu(self.conv1(x, edge_index, edge_attr))
        # Mu et LogVar pour VGAE
        mu = self.conv_mu(x, edge_index, edge_attr)
        logvar = self.conv_logvar(x, edge_index, edge_attr)
        return mu, logvar


## Créer le modèle VGAE

In [84]:
# Dimensions extraites depuis UN graphe (tous ont la même structure)
in_channels = train_graphs[0].x.shape[1]        # features par nœud
out_channels = 32                               # dimension latente
edge_dim = train_graphs[0].edge_attr.shape[1]   # features par arête

encoder = NNConvEncoder(in_channels, out_channels, edge_dim)
model = VGAE(encoder)


## Définir l’optimiseur

In [85]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


## Détecter le device (GPU si dispo)

In [86]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
device = torch.device("cpu")
# Déplacer le modèle
model = model.to(device)

Using device: cuda


## Early Stopping

In [87]:
patience = 10  # nombre d'époques sans amélioration
best_loss = float('inf')
counter = 0

## Entraînement du modèle

In [None]:
num_epochs = 200

for epoch in range(1, num_epochs + 1):
    model.train()
    train_loss = 0.0
    
    # ===== ENTRAÎNEMENT SUR LE PASSÉ =====
    for t, data in enumerate(train_loader):
        assert data.edge_index.dtype == torch.long
        assert data.edge_index.min() >= 0
        assert data.edge_index.max() < data.num_nodes
        # Chaque data = 1 graphe temporel
        data = data.to(device)

        # Sécurité NNConv
        assert data.edge_attr is not None
        assert data.edge_attr.dim() == 2
        assert data.edge_attr.size(0) == data.edge_index.size(1)
        data.edge_attr = data.edge_attr.float()

        optimizer.zero_grad()
        # Encodage VGAE (NNConv utilise edge_attr)
        z = model.encode(
            data.x,
            data.edge_index,
            data.edge_attr
        )
        # Loss VGAE = reconstruction + KL
        recon_loss = model.recon_loss(z, data.edge_index) / data.num_nodes  # normalisation
        kl_loss = model.kl_loss() / data.num_nodes  # normalisation
        loss = recon_loss + kl_loss
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # ===== VALIDATION SUR LE FUTUR (SANS BACKPROP) =====
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for data in val_loader:
            data = data.to(device)
            data.edge_attr = data.edge_attr.float()

            z = model.encode(
                data.x,
                data.edge_index,
                data.edge_attr
            )

            recon_loss = model.recon_loss(z, data.edge_index) / data.num_nodes  # normalisation
            kl_loss = model.kl_loss() / data.num_nodes

            val_loss += (recon_loss + kl_loss).item()

    print(
        f"Epoch {epoch:03d} | "
        f"Train Loss: {train_loss:.4f} | "
        f"Val Loss: {val_loss:.4f}"
    )

    # ===== EARLY STOPPING TEMPOREL =====
    if val_loss < best_loss:
        best_loss = val_loss
        counter = 0
        torch.save(model.state_dict(), "best_vgae_nnconv.pt")
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping triggered at epoch {epoch}")
            break


Epoch 001 | Train Loss: 94408.2759 | Val Loss: 37110.8477


## Évaluer sur le graphe test

In [None]:
# Charger le meilleur modèle
model.load_state_dict(torch.load("best_vgae_nnconv.pt"))
model.eval()

test_loss = 0
with torch.no_grad():
    for data in test_loader:
        data = data.to(device)

        z = model.encode(
            data.x,
            data.edge_index,
            data.edge_attr
        )

        recon_loss = model.recon_loss(z, data.edge_index) / data.num_nodes
        kl_loss = model.kl_loss() / data.num_nodes

        test_loss += (recon_loss + kl_loss).item()

print(f"Final Test Loss: {test_loss:.4f}")



  model.load_state_dict(torch.load("best_vgae_nnconv.pt"))


RuntimeError: Expected size for first two dimensions of batch2 tensor to be: [113290, 21] but got: [113290, 6].