This notebook shows how a GCN/GraphSAGE model is trained to compute Node betweenness centrality on different graphs

In [15]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        # output as multiclass target
        #return F.log_softmax(x, dim=1)
        
        # output as regression target
        return x
    
    


In [20]:
import networkx as nx
import time
import torch
from torch_geometric.data import DataLoader
import importlib
import torch
from torch_geometric.data import DataLoader
import networkx as nx
from torch_geometric.data import Data
import pickle

def loadDataset(collection, name=None):
    try:
        # import datasets
        themodule = importlib.import_module("torch_geometric.datasets")
        # get the function corresponding to collection
        method_to_call = getattr(themodule, collection)
        if name:
            return method_to_call(root='./data/'+str(collection), name=name)
        else:
            return method_to_call(root='./data/'+str(collection)) 
    except:
        # custom module
        method_to_call = globals()[collection]
       
        if name:
            return method_to_call(root='./data/'+str(collection), name=name)
        else:
            return method_to_call(root='./data/'+str(collection)) 
        




def transformMask(mask):
    train_mask = []
    i = 0
    for pick in mask:
        if pick[0]==1:
            train_mask.append(i)
        i+=1
    return train_mask


def shuffleTrainTestMasks(data, trainpct = 0.7):
    ysize = list(data.y.size())[0]
    data.train_mask = torch.zeros(ysize,1, dtype=torch.long)
    data.train_mask[int(ysize*trainpct):] = 1
    data.train_mask = data.train_mask[torch.randperm(ysize)]
    data.test_mask = torch.ones(ysize,1, dtype=torch.long) - data.train_mask
    
    data.train_mask = transformMask(data.train_mask)
    data.test_mask = transformMask(data.test_mask)
  

def shuffleTrainTestValMasks(data, trainpct = 0.7, valpct = 0.2):

    ysize = list(data.y.size())[0]
    #print("total ", ysize)
    #print(" train ",int(ysize*trainpct)-int(ysize*trainpct*valpct))
    #print(" val ",int(ysize*trainpct*valpct))
    #print(" test ",int(ysize*(1- trainpct) ))
    data.train_mask = torch.zeros(ysize,1, dtype=torch.long)
    data.train_mask[:int(ysize*trainpct)] = 1
    data.train_mask = data.train_mask[torch.randperm(ysize)]
    #print(" train sum ",data.train_mask.sum())
    data.test_mask = torch.ones(ysize,1, dtype=torch.long) - data.train_mask
    #print(" test sum ",data.test_mask.sum())
    
    # transform to list of indexes
    data.train_mask = transformMask(data.train_mask)
    data.test_mask = transformMask(data.test_mask)
    
    data.val_mask = data.train_mask[:int(ysize*trainpct*valpct)]
    data.train_mask = data.train_mask[int(ysize*trainpct*valpct):]

    
    #print(data.train_mask)
    #print(data.val_mask)
    #print(data.test_mask)
    
    

def trainTestEval(dataset, iterations=1, batch_size=32):
    loader = DataLoader(dataset,  shuffle=False)
    i = 0
    print(loader)
    print(dir(loader))
    
    G = dataset.data
    print(G)
    start = time.time()


    # 1.  prepare model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    #print("using ",device)
    model = Net().to(device)
    data = G.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    model.train()

    # 2.  create a train_mask, and a test_mask (val_mask for further experiments)
    #shuffleTrainTestMasks(data)
    #shuffleTrainTestValMasks(data)
    shuffleTrainTestMasks(data)

    # 3. train some epochs
    for epoch in range(200):
        optimizer.zero_grad()
        out = model(data)
        loss = F.mse_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        if epoch % 25 == 0 :
            print("epoch-loss: ",epoch, loss)

    # 4. Model evaluation
    model.eval()
    #  classification in a multiclass setting
    #_, pred = model(data).max(dim=1)
    #correct = pred[data.test_mask].eq(data.y[data.test_mask]).sum().item()
    #acc = correct / data.test_mask.sum().item()
    #print('Accuracy: {:.4f}'.format(acc))


    # regression 
    pred = model(data)
    print("target: ",data.y[data.test_mask])
    print("prediction: ",pred[data.test_mask])
    #print(pred[data.test_mask].type())
    #print(data.y[data.test_mask].type())
    # prepare the normalized mean root squared error
    t = data.y[data.test_mask]
    y = pred[data.test_mask]
    nrmse = torch.sum((t - y) ** 2)/len(data.test_mask)
    nrmse = nrmse.sqrt()
    print("RMSE: ",nrmse)

    #m = torch.mean(t)
    #print("mean",m)
    #tmax = torch.max(t)
    #tmin = torch.min(t)
    #sd = tmax-tmin
    #print("sd",sd)
    #nrmse = (nrmse - m)/sd
    #print("NRMSE:",nrmse)


    endtime = time.time()
    print("Total train-test time: "+str(endtime-start))

    #i+=1
    #if i==1:
    #    break

In [21]:
class MyOwnDataset2():
    def __init__(self,  root, name, transform=None, pre_transform=None):
        f = open(name, 'rb')
        self.data = pickle.load(f) 
        #print(self.data.num_features)
        self.num_features = self.data.num_features
        self.num_classes = 1
        f.close()
        
        

## Testing the Net for node Betweenness

In [24]:
# load the dataset examples---------------------------------------

#PPI
#dataset = loadDataset('PPI')
#QM7b
#dataset = loadDataset('QM7b')
#MUTAG
#dataset = loadDataset(collection='Entities',name='MUTAG')
#ENZYMES FROM TUDataset
#dataset = loadDataset(collection='TUDataset',name='ENZYMES')
# Cora
#dataset = loadDataset(collection='Planetoid',name='Cora')

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        # output as multiclass target
        #return F.log_softmax(x, dim=1)
        
        # output as regression target
        return x
    
    

dataset = loadDataset(collection='MyOwnDataset2', name='er_1000_0_45_nb.pickle')
#print(dataset.data.y)

trainTestEval(dataset, iterations=2, batch_size=500)

<torch_geometric.data.dataloader.DataLoader object at 0x7fb41e4c0fd0>
['_DataLoader__initialized', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'batch_sampler', 'batch_size', 'collate_fn', 'dataset', 'drop_last', 'num_workers', 'pin_memory', 'sampler', 'timeout', 'worker_init_fn']
Data(edge_index=[2, 224581], x=[1000, 1], y=[1000])
epoch-loss:  0 tensor(2.2785, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  25 tensor(0.0096, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  50 tensor(0.0097, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  75 tensor(0.0081, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  100 tensor(0.0120, device='cuda:

In [34]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 300 )
        #self.conv2 = GCNConv(16, dataset.num_classes)
        self.fc1 = nn.Linear(300, 100)
        self.fc2 = nn.Linear(100, dataset.num_features)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        # 2 fc layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        # output as regression target
        return x
    
    

dataset = loadDataset(collection='MyOwnDataset2', name='er_1000_0_45_nb.pickle')
#print(dataset.data.y)

trainTestEval(dataset, iterations=2, batch_size=500)
# result has improved! RMSE= 0.0125

<torch_geometric.data.dataloader.DataLoader object at 0x7fb41e498710>
['_DataLoader__initialized', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'batch_sampler', 'batch_size', 'collate_fn', 'dataset', 'drop_last', 'num_workers', 'pin_memory', 'sampler', 'timeout', 'worker_init_fn']
Data(edge_index=[2, 224581], x=[1000, 1], y=[1000])
epoch-loss:  0 tensor(0.0101, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  25 tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  50 tensor(4.6451e-05, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  75 tensor(5.2031e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  100 tensor(2.3837e-06, d

In [35]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 150 )
        #self.conv2 = GCNConv(16, dataset.num_classes)
        self.fc1 = nn.Linear(150, 50)
        self.fc2 = nn.Linear(50, dataset.num_features)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        # 2 fc layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        # output as regression target
        return x
    
dataset = loadDataset(collection='MyOwnDataset2', name='er_1000_0_45_nb.pickle')
#print(dataset.data.y)

trainTestEval(dataset, iterations=2, batch_size=500)
# result has improved! RMSE= 0.0011

<torch_geometric.data.dataloader.DataLoader object at 0x7fb41e498b00>
['_DataLoader__initialized', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'batch_sampler', 'batch_size', 'collate_fn', 'dataset', 'drop_last', 'num_workers', 'pin_memory', 'sampler', 'timeout', 'worker_init_fn']
Data(edge_index=[2, 224581], x=[1000, 1], y=[1000])
epoch-loss:  0 tensor(0.0014, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  25 tensor(1.4312e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  50 tensor(1.0847e-07, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  75 tensor(2.9290e-08, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  100 tensor(1.1245e-0

In [37]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 300 )
        #self.conv2 = GCNConv(16, dataset.num_classes)
        self.fc1 = nn.Linear(300, 100)
        self.fc2 = nn.Linear(100, dataset.num_features)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        # 2 fc layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        # output as regression target
        return x
    
    

dataset = loadDataset(collection='MyOwnDataset2', name='er_1000_0_15_nb.pickle')
#print(dataset.data.y)

trainTestEval(dataset, iterations=2, batch_size=500)
# result has improved! RMSE= 

<torch_geometric.data.dataloader.DataLoader object at 0x7fb41e45f470>
['_DataLoader__initialized', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'batch_sampler', 'batch_size', 'collate_fn', 'dataset', 'drop_last', 'num_workers', 'pin_memory', 'sampler', 'timeout', 'worker_init_fn']
Data(edge_index=[2, 74784], x=[1000, 1], y=[1000])
epoch-loss:  0 tensor(0.0007, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  25 tensor(6.7004e-05, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  50 tensor(4.7210e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  75 tensor(1.0790e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  100 tensor(6.9518e-07

In [36]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 150 )
        #self.conv2 = GCNConv(16, dataset.num_classes)
        self.fc1 = nn.Linear(150, 50)
        self.fc2 = nn.Linear(50, dataset.num_features)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        # 2 fc layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        # output as regression target
        return x
    
dataset = loadDataset(collection='MyOwnDataset2', name='er_1000_0_15_nb.pickle')
#print(dataset.data.y)

trainTestEval(dataset, iterations=2, batch_size=500)
# result has improved! RMSE= 0.0124

<torch_geometric.data.dataloader.DataLoader object at 0x7fb41e498b00>
['_DataLoader__initialized', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'batch_sampler', 'batch_size', 'collate_fn', 'dataset', 'drop_last', 'num_workers', 'pin_memory', 'sampler', 'timeout', 'worker_init_fn']
Data(edge_index=[2, 74784], x=[1000, 1], y=[1000])
epoch-loss:  0 tensor(0.0016, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  25 tensor(0.0001, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  50 tensor(4.0889e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  75 tensor(2.9083e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  100 tensor(1.7660e-06, de