In [47]:
from tools import *
import os
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from tqdm import tqdm  

In [48]:
graph_json_dir = "graphes_JSON_Complet"  
graph_json_files = [graph_json_dir+"/"+f for f in os.listdir(graph_json_dir) if f.endswith('.json')]

all_graphs = []

for path in graph_json_files:
    node_features_df=extract_node_features_from_json_file(path)
    edges_df=extract_mapped_edges_from_json(path)
    target_df=extract_optimal_repartition_from_json(path)
    node_features_tensor, edge_index_tensor, y_target_tensor = prepare_data_for_GNN(node_features_df, edges_df, target_df)
    data = Data(x=node_features_tensor, edge_index=edge_index_tensor, y=y_target_tensor)

    all_graphs.append(data)


print("finished data preparting & loading")

finished data preparting & loading


In [49]:
N=4
filtered_graphs = []

for graph in all_graphs:
    num_classes = len(torch.unique(graph.y))  
    
    if graph.x[0][8] == N:
        # selected_indices = [0, 1, 2, 3,4]
        # graph.x = graph.x[:, selected_indices]
        filtered_graphs.append(graph)
        
print(f"Selected {len(filtered_graphs)}/{len(all_graphs)} graphs with ",N, " classes")

Selected 46/4998 graphs with  4  classes


In [55]:
max_nodes = max(graph.x.shape[0] for graph in filtered_graphs)
max_nodes
filtered_graphs[41].edge_index

tensor([[  0,   0,   0,   0,   1,   1,   1,   2,   2,   2,   3,   4,   4,   5,
           6,   6,   6,   6,   6,   7,   7,   7,   7,   7,   7,   9,   9,  10,
          11,  11,  11,  11,  11,  12,  13,  13,  13,  13,  13,  13,  13,  13,
          14,  15,  16,  16,  16,  16,  17,  17,  19,  19,  19,  19,  20,  20,
          21,  21,  21,  21,  22,  22,  22,  22,  22,  23,  24,  24,  25,  26,
          27,  27,  27,  28,  28,  29,  29,  29,  30,  31,  31,  31,  31,  32,
          32,  32,  32,  33,  33,  33,  35,  35,  36,  36,  36,  37,  37,  37,
          37,  38,  38,  39,  40,  40,  40,  40,  40,  40,  40,  40,  41,  41,
          41,  41,  42,  42,  43,  43,  43,  44,  45,  45,  45,  46,  46,  46,
          47,  48,  49,  49,  49,  49,  49,  50,  51,  51,  51,  52,  52,  52,
          52,  53,  53,  54,  54,  55,  56,  56,  57,  57,  58,  59,  59,  60,
          60,  60,  60,  61,  61,  61,  61,  62,  63,  63,  65,  65,  66,  66,
          66,  67,  67,  68,  68,  68,  68,  69,  69

In [51]:
from torch.utils.data import random_split

total_graphs = len(filtered_graphs)
train_size = int(0.8 * total_graphs)
val_size = int(0.1 * total_graphs)
test_size = total_graphs - train_size - val_size

train_data, val_data, test_data = random_split(filtered_graphs, [train_size, val_size, test_size])

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=8)
test_loader = DataLoader(test_data, batch_size=16,shuffle=True)

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv

class GraphSAGE(torch.nn.Module):
    def __init__(self, num_nodes, embedding_dim=64, hidden_dim=64, hidden_dim_2=64, hidden_dim_3=64, output_layer=4):
        super().__init__()
        self.embedding = torch.nn.Embedding(num_nodes, embedding_dim)

        self.conv1 = SAGEConv(embedding_dim, hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, hidden_dim_2)
        self.conv3 = SAGEConv(hidden_dim_2, hidden_dim_3)
        self.conv4 = SAGEConv(hidden_dim_3, hidden_dim_3)
        self.conv5 = SAGEConv(hidden_dim_3, hidden_dim_3)
        
        self.lin = torch.nn.Linear(hidden_dim_3, output_layer)

    def forward(self, data):
        x = self.embedding(torch.arange(data.num_nodes, device=data.edge_index.device))
        edge_index = data.edge_index

        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.22, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=0.22, training=self.training)
        x = F.relu(self.conv3(x, edge_index))
        x = F.dropout(x, p=0.22, training=self.training)
        x = F.relu(self.conv4(x, edge_index))
        x = F.dropout(x, p=0.22, training=self.training)
        x = F.relu(self.conv5(x, edge_index))
        x = self.lin(x)
        return x


In [53]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = GraphSAGE(num_nodes=max_nodes, output_layer=N).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

data = data.to(device)

In [54]:
for epoch in range(200):
    model.train()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        
        out = model(data)  # model now uses learnable embeddings internally
        targets = data.y.long()

        loss = F.cross_entropy(out, targets)
        loss.backward()
        optimizer.step()

        _, pred = out.max(dim=-1)
        correct = (pred == targets).sum().item()
        total_loss += loss.item()
        total_correct += correct
        total_samples += targets.size(0)

    if (epoch % 10 == 0) or (epoch == 199):
        epoch_loss = total_loss / len(train_data)
        epoch_acc = total_correct / total_samples
        print(f'Epoch {epoch+1:3d} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}')


IndexError: Found indices in 'edge_index' that are larger than 130 (got 749). Please ensure that all indices in 'edge_index' point to valid indices in the interval [0, 131) in your node feature matrix and try again.