In [2]:
import os
import networkx as nx
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import from_networkx
from torch_geometric.utils import subgraph
from torch_geometric.data import Data

  from .autonotebook import tqdm as notebook_tqdm


In [3]:

dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]


folder_path = 'client_subgraphs'
sub_data_list = []

for filename in os.listdir(folder_path):
    if filename.endswith('.gml'):
      
        file_path = os.path.join(folder_path, filename)
        g = nx.read_gml(file_path)

        subgraph_nodes = list(g.nodes)
        subgraph_nodes = [int(node) for node in subgraph_nodes]  # Convert to integer if they are not

        sub_edge_index, _ = subgraph(subgraph_nodes, data.edge_index, relabel_nodes=True)

        sub_data = Data(x=data.x[subgraph_nodes], edge_index=sub_edge_index, y=data.y[subgraph_nodes])
        sub_data_list.append(sub_data)

# for filename in os.listdir(folder_path):
#     if filename.endswith('.gml'):
#         # Read GML file using networkx
#         file_path = os.path.join(folder_path, filename)
#         g = nx.read_gml(file_path)

#         # Convert networkx graph to PyTorch Geometric data
#         sub_data = from_networkx(g)
#         # Ensure node features and labels are set (this will depend on how data is stored in the GML file)

#         # Example: Set dummy features and labels if not present
#         if sub_data.x is None:
#             num_nodes = sub_data.num_nodes
#             sub_data.x = torch.randn((num_nodes, data.num_node_features))  # Replace with actual node features
#         if sub_data.y is None:
#             sub_data.y = torch.randint(0, dataset.num_classes, (sub_data.num_nodes,))  # Replace with actual labels
        
#         sub_data_list.append(sub_data)

In [4]:
print(sub_data_list[0])

Data(x=[17, 1433], edge_index=[2, 58], y=[17])


In [5]:
# #plot subgraph 0 to make sure it looks right
# import matplotlib.pyplot as plt
# import networkx as nx
# from torch_geometric.utils import to_networkx

# G = to_networkx(sub_data_list[82], to_undirected=False)
# plt.figure(figsize=(20,20))
# nx.draw(G, with_labels=True, node_size=15, node_color='g', edge_color='b')
# plt.show()


In [6]:

class GCN(torch.nn.Module):
    def __init__(self, num_features, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_features, 16)
        self.conv2 = GCNConv(16, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        # First convolutional layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)

        # Second convolutional layer
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)


In [8]:
#split into train and validation
from torch_geometric.data import DataLoader
from sklearn.model_selection import train_test_split

train_data, val_data = train_test_split(sub_data_list, test_size=0.2, random_state=42)

In [13]:
for i in range(0, 100):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = GCN(sub_data.num_node_features, dataset.num_classes).to(device)
    sub_data_train = train_data[i].to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

    model.train()
    for epoch in range(200):
        optimizer.zero_grad()
        out = model(sub_data_train)
        loss = F.nll_loss(out, sub_data_train.y)
        loss.backward()
        optimizer.step()
    # print out metrics
    #calculate final training accuracy
    true_labels = train_data[i].y
    _, pred = out.max(1)
    train_correct = pred.eq(true_labels).sum().item()
    print("final training loss: ", loss.item(), " for subgraph ", i)
    print("final training accuracy: ", train_correct / len(true_labels), " for subgraph ", i)
    print()
    #calculate validation accuracy and loss
    model.eval()
    sub_data_val = val_data[i].to(device)
    out = model(sub_data_val)
    loss = F.nll_loss(out, sub_data_val.y)
    true_labels = val_data[i].y
    _, pred = out.max(1)
    val_correct = pred.eq(true_labels).sum().item()
    
    print("final validation loss: ", loss.item(), " for subgraph ", i)
    print("final validation accuracy: ", val_correct / len(true_labels), " for subgraph ", i)
    print("------------------------------------------------------------")

final training loss:  0.36027586460113525  for subgraph  0
final training accuracy:  0.8888888888888888  for subgraph  0

final validation loss:  2.7909417152404785  for subgraph  0
final validation accuracy:  0.0  for subgraph  0
------------------------------------------------------------
final training loss:  0.24926544725894928  for subgraph  1
final training accuracy:  0.8571428571428571  for subgraph  1

final validation loss:  1.5061702728271484  for subgraph  1
final validation accuracy:  0.3076923076923077  for subgraph  1
------------------------------------------------------------
final training loss:  0.005544556304812431  for subgraph  2
final training accuracy:  1.0  for subgraph  2

final validation loss:  4.897409915924072  for subgraph  2
final validation accuracy:  0.0  for subgraph  2
------------------------------------------------------------
final training loss:  2.382984894211404e-05  for subgraph  3
final training accuracy:  1.0  for subgraph  3

final validatio

IndexError: list index out of range