In [79]:
from tools import *
import os
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from tqdm import tqdm  

In [80]:
graph_json_dir = "graphes_JSON_Complet"  
graph_json_files = [graph_json_dir+"/"+f for f in os.listdir(graph_json_dir) if f.endswith('.json')]

all_graphs = []

for path in graph_json_files:
    node_features_df=extract_node_features_from_json_file(path)
    edges_df=extract_mapped_edges_from_json(path)
    target_df=extract_optimal_repartition_from_json(path)
    node_features_tensor, edge_index_tensor, y_target_tensor = prepare_data_for_GNN(node_features_df, edges_df, target_df)
    data = Data(x=node_features_tensor, edge_index=edge_index_tensor, y=y_target_tensor)

    all_graphs.append(data)


print("finished data preparting & loading")

finished data preparting & loading


In [81]:
N=100
filtered_graphs = []
graphe=all_graphs
for graph in graphe:
    num_classes = len(torch.unique(graph.y))  
    
    if graph.x[0][7] == N:
        # selected_indices = [0, 1, 2, 6]
        # graph.x = graph.x[:, selected_indices]
        filtered_graphs.append(graph)
        
print(f"Selected {len(filtered_graphs)}/{len(all_graphs)} graphs with ",N, " classes")

Selected 47/4998 graphs with  100  classes


In [82]:
filtered_graphs[0].x.shape

torch.Size([4, 8])

In [83]:
from torch.utils.data import random_split

total_graphs = len(filtered_graphs)
train_size = int(0.8 * total_graphs)
val_size = int(0.1 * total_graphs)
test_size = total_graphs - train_size - val_size

train_data, val_data, test_data = random_split(filtered_graphs, [train_size, val_size, test_size])

train_loader = DataLoader(train_data, batch_size=4, shuffle=True)
val_loader = DataLoader(val_data, batch_size=8)
test_loader = DataLoader(test_data, batch_size=4,shuffle=True)

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv

class GraphSAGE(torch.nn.Module):
    def __init__(self, num_features, hidden_dim=64,hidden_dim_2=64,hidden_dim_3=64,output_layer=N):
        super().__init__()
        self.conv1 = SAGEConv(num_features, hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, hidden_dim_2)
        self.conv3 = SAGEConv(hidden_dim_2, hidden_dim_3)

         
        self.lin = torch.nn.Linear(hidden_dim_3, output_layer)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = torch.relu(self.conv1(x, edge_index))
        # print("After conv1:", x.shape)
        x = torch.dropout(x, p=0.187, train=self.training)
        x = torch.relu(self.conv2(x, edge_index))
        # print("After conv2:", x.shape)
        x = torch.dropout(x, p=0.187, train=self.training)
        x = torch.relu(self.conv3(x, edge_index))
        # print("After conv3:", x.shape)
        x = self.lin(x)
        # print("After linear:", x.shape)
        return x



In [85]:
device = torch.device('cpu')
model = GraphSAGE(num_features=8).to(device)  
optimizer = torch.optim.Adam(model.parameters(), lr=0.0002)


In [None]:
for epoch in range(100 ):
    model.train()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        targets = data.y.long()
        loss = F.cross_entropy(out, targets)
        loss.backward()
        optimizer.step()
        _, pred = out.max(dim=-1)
        correct = (pred == targets).sum().item()
        total_loss += loss.item()
        total_correct += correct
        total_samples += targets.size(0)

    if (epoch % 10 == 0) or (epoch == 99):
        epoch_loss = total_loss / len(train_data)
        epoch_acc = total_correct / total_samples
        print(f'Epoch {epoch+1:3d} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}')


Epoch   1 | Loss: 3.5342 | Acc: 0.0031


In [87]:
# Test Phase
def evaluate(model, loader):
    model.eval()
    total_correct = 0
    total_samples = 0
    
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            out = model(data)
            pred = out.argmax(dim=1)
            total_correct += (pred == data.y).sum().item()
            total_samples += data.y.size(0)
    
    accuracy = total_correct / total_samples
    return accuracy

# Evaluate on all splits
train_acc = evaluate(model, train_loader)
# val_acc = evaluate(model, val_loader)
test_acc = evaluate(model, test_loader)

print('\nFinal Results:')
print(f'Train Accuracy: {train_acc:.4f}')
# print(f'Val Accuracy: {val_acc:.4f}')
print(f'Test Accuracy: {test_acc:.4f}')



Final Results:
Train Accuracy: 0.0016
Test Accuracy: 0.0000


In [88]:
import optuna
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv

# Define your model class
class GraphSAGE(torch.nn.Module):
    def __init__(self, num_features, hidden_dim, hidden_dim_2, hidden_dim_3, output_layer, dropout_rate):
        super().__init__()
        self.conv1 = SAGEConv(num_features, hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, hidden_dim_2)
        self.conv3 = SAGEConv(hidden_dim_2, hidden_dim_3)
        self.lin = torch.nn.Linear(hidden_dim_3, output_layer)
        self.dropout_rate = dropout_rate

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = torch.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=self.dropout_rate, training=self.training)
        x = torch.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=self.dropout_rate, training=self.training)
        x = torch.relu(self.conv3(x, edge_index))
        x = self.lin(x)
        return x


In [89]:
def objective(trial):
    # Suggest hyperparameters
    hidden_dim = trial.suggest_categorical("hidden_dim", [16,8,32, 64, 128,256,512,1024])
    hidden_dim_2 = trial.suggest_categorical("hidden_dim_2", [16,8,32, 64, 128,256,512,1024])
    hidden_dim_3 = trial.suggest_categorical("hidden_dim_3", [16,8,32, 64, 128,256,512,1024])
    dropout_rate = trial.suggest_float("dropout", 0.1, 0.5)
    lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)

    device = torch.device("cpu")
    model = GraphSAGE(num_features=8, hidden_dim=hidden_dim, hidden_dim_2=hidden_dim_2,
                      hidden_dim_3=hidden_dim_3, output_layer=N, dropout_rate=dropout_rate).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(30):  # keep it short for Optuna
        model.train()
        for data in train_data:
            data = data.to(device)
            optimizer.zero_grad()
            out = model(data)
            loss = F.cross_entropy(out, data.y.long())
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_data:  # You need to define val_data separately
            data = data.to(device)
            out = model(data)
            pred = out.argmax(dim=1)
            correct += (pred == data.y).sum().item()
            total += data.y.size(0)

    accuracy = correct / total
    return accuracy  # Optuna tries to maximize this


In [90]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

print("Best trial:")
print(study.best_trial)


[I 2025-05-09 21:35:28,903] A new study created in memory with name: no-name-a461319f-ecdb-4b01-8b31-870ce7913608
  lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
[I 2025-05-09 21:35:35,835] Trial 0 finished with value: 0.2631578947368421 and parameters: {'hidden_dim': 128, 'hidden_dim_2': 512, 'hidden_dim_3': 32, 'dropout': 0.11083227566728825, 'lr': 0.0014419126823819045}. Best is trial 0 with value: 0.2631578947368421.
[I 2025-05-09 21:35:43,272] Trial 1 finished with value: 0.21052631578947367 and parameters: {'hidden_dim': 128, 'hidden_dim_2': 512, 'hidden_dim_3': 32, 'dropout': 0.3468501898329367, 'lr': 0.005122203232452111}. Best is trial 0 with value: 0.2631578947368421.
[I 2025-05-09 21:35:53,025] Trial 2 finished with value: 0.3684210526315789 and parameters: {'hidden_dim': 1024, 'hidden_dim_2': 128, 'hidden_dim_3': 128, 'dropout': 0.24999509883109075, 'lr': 0.0003283161727937086}. Best is trial 2 with value: 0.3684210526315789.
[I 2025-05-09 21:35:58,391] Trial 3 finished w

Best trial:
FrozenTrial(number=2, state=TrialState.COMPLETE, values=[0.3684210526315789], datetime_start=datetime.datetime(2025, 5, 9, 21, 35, 43, 274631), datetime_complete=datetime.datetime(2025, 5, 9, 21, 35, 53, 25051), params={'hidden_dim': 1024, 'hidden_dim_2': 128, 'hidden_dim_3': 128, 'dropout': 0.24999509883109075, 'lr': 0.0003283161727937086}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'hidden_dim': CategoricalDistribution(choices=(16, 8, 32, 64, 128, 256, 512, 1024)), 'hidden_dim_2': CategoricalDistribution(choices=(16, 8, 32, 64, 128, 256, 512, 1024)), 'hidden_dim_3': CategoricalDistribution(choices=(16, 8, 32, 64, 128, 256, 512, 1024)), 'dropout': FloatDistribution(high=0.5, log=False, low=0.1, step=None), 'lr': FloatDistribution(high=0.01, log=True, low=0.0001, step=None)}, trial_id=2, value=None)


FrozenTrial(number=14, state=TrialState.COMPLETE, values=[0.6075268817204301], datetime_start=datetime.datetime(2025, 5, 9, 18, 24, 9, 723515), datetime_complete=datetime.datetime(2025, 5, 9, 18, 24, 15, 886578), params={'hidden_dim': 128, 'hidden_dim_2': 64, 'hidden_dim_3': 64, 'dropout': 0.23191629633471522, 'lr': 0.0007840405583017338}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'hidden_dim': CategoricalDistribution(choices=(32, 64, 128)), 'hidden_dim_2': CategoricalDistribution(choices=(32, 64, 128)), 'hidden_dim_3': CategoricalDistribution(choices=(32, 64, 128)), 'dropout': FloatDistribution(high=0.5, log=False, low=0.1, step=None), 'lr': FloatDistribution(high=0.01, log=True, low=0.0001, step=None)}, trial_id=14, value=None)