In [11]:
from tools import *
import os
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.loader import DataLoader
from sklearn.metrics import accuracy_score
import torch.nn.functional as F
from tqdm import tqdm  

In [12]:
graph_json_dir = "graphes_JSON_Complet"  
graph_json_files = [graph_json_dir+"/"+f for f in os.listdir(graph_json_dir) if f.endswith('.json')]

all_graphs = []

for path in graph_json_files:
    node_features_df=extract_node_features_from_json_file(path)
    edges_df=extract_mapped_edges_from_json(path)
    target_df=extract_optimal_repartition_from_json(path)

    node_features_tensor, edge_index_tensor, y_target_tensor = prepare_data_for_GNN(node_features_df, edges_df, target_df)
    data = Data(x=node_features_tensor, edge_index=edge_index_tensor, y=y_target_tensor)

    all_graphs.append(data)


print("finished data preparting & loading")

finished data preparting & loading


In [34]:
N=2
filtered_graphs = []
graphe=all_graphs
for graph in graphe:
    num_classes = len(torch.unique(graph.y))  
    
    if graph.x[0][7] == N:
        # selected_indices = [0, 1, 2, 6]
        # graph.x = graph.x[:, selected_indices]
        filtered_graphs.append(graph)
        
print(f"Selected {len(filtered_graphs)}/{len(all_graphs)} graphs with ",N, " classes")

Selected 56/4998 graphs with  2  classes


In [35]:
from torch.utils.data import random_split

total_graphs = len(filtered_graphs)
train_size = int(0.8 * total_graphs)
val_size = int(0.1 * total_graphs)
test_size = total_graphs - train_size - val_size

train_data, val_data, test_data = random_split(filtered_graphs, [train_size, val_size, test_size])

train_loader = DataLoader(train_data, batch_size=8, shuffle=True)
val_loader = DataLoader(val_data, batch_size=8)
test_loader = DataLoader(test_data, batch_size=8)

In [36]:
train_data[0].x[0]

tensor([1.0000, 4.0000, 2.0000, 2.0800, 3.2900, 0.4125, 0.4146, 2.0000])

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.data import DataLoader

# Define the GAT model
class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=1, dropout=0.6):
        super().__init__()
        self.gat1 = GATConv(in_channels, hidden_channels, heads=heads, dropout=dropout)
        self.gat2 = GATConv(hidden_channels * heads, out_channels, heads=1, concat=True, dropout=dropout)
        self.dropout = dropout

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = F.elu(self.gat1(x, edge_index))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.gat2(x, edge_index)
        return x


def train(model, loader, optimizer, device):
    model.train()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.cross_entropy(out, data.y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_correct += (out.argmax(dim=1) == data.y).sum().item()
        total_samples += data.y.size(0)

    return total_loss / len(loader), total_correct / total_samples

# Evaluation function
def evaluate(model, loader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            out = model(data)
            pred = out.argmax(dim=1)
            correct += (pred == data.y).sum().item()
            total += data.y.size(0)

    return correct / total


In [38]:
#  Example usage
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GAT(in_channels=8, hidden_channels=64, out_channels=N, heads=8).to(device)  # Adapt dimensions
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

# Assuming you have train_loader and val_loader already defined
for epoch in range(1, 101):
    loss, acc = train(model, train_loader, optimizer, device)
    val_acc = evaluate(model, val_loader, device)

    if epoch % 10 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Train Acc: {acc:.4f} | Val Acc: {val_acc:.4f}")


Epoch 001 | Loss: 1.5696 | Train Acc: 0.4861 | Val Acc: 0.6667
Epoch 010 | Loss: 0.8851 | Train Acc: 0.5139 | Val Acc: 0.6667
Epoch 020 | Loss: 0.8775 | Train Acc: 0.5206 | Val Acc: 0.3333
Epoch 030 | Loss: 0.8555 | Train Acc: 0.5177 | Val Acc: 0.6667
Epoch 040 | Loss: 0.7379 | Train Acc: 0.5120 | Val Acc: 0.6667
Epoch 050 | Loss: 0.8096 | Train Acc: 0.5139 | Val Acc: 0.6667
Epoch 060 | Loss: 0.7761 | Train Acc: 0.5475 | Val Acc: 0.6667
Epoch 070 | Loss: 0.7775 | Train Acc: 0.5216 | Val Acc: 0.6667
Epoch 080 | Loss: 0.7850 | Train Acc: 0.5264 | Val Acc: 0.6667
Epoch 090 | Loss: 0.8473 | Train Acc: 0.5187 | Val Acc: 0.6667
Epoch 100 | Loss: 0.9089 | Train Acc: 0.5235 | Val Acc: 0.6667
