This notebook shows how a GCN/GraphSAGE model is trained to compute Node betweenness centrality on different graphs

In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        # output as multiclass target
        #return F.log_softmax(x, dim=1)
        
        # output as regression target
        return x
    
    


In [40]:
import networkx as nx
import time
import torch
from torch_geometric.data import DataLoader
import importlib
import torch
from torch_geometric.data import DataLoader
import networkx as nx
from torch_geometric.data import Data
import pickle

def loadDataset(collection, name=None):
    try:
        # import datasets
        themodule = importlib.import_module("torch_geometric.datasets")
        # get the function corresponding to collection
        method_to_call = getattr(themodule, collection)
        if name:
            return method_to_call(root='./data/'+str(collection), name=name)
        else:
            return method_to_call(root='./data/'+str(collection)) 
    except:
        # custom module
        method_to_call = globals()[collection]
       
        if name:
            return method_to_call(root='./data/'+str(collection), name=name)
        else:
            return method_to_call(root='./data/'+str(collection)) 
        


def shuffleTrainTestMasks(data, trainpct = 0.7):
    ysize = list(data.y.size())[0]
    data.train_mask = torch.zeros(ysize,1, dtype=torch.long)
    data.train_mask[int(ysize*trainpct):] = 1
    data.train_mask = data.train_mask[torch.randperm(ysize)]
    data.test_mask = torch.ones(ysize,1, dtype=torch.long) - data.train_mask
  

def transformMask(mask):
    train_mask = []
    i = 0
    for pick in mask:
        if pick[0]==1:
            train_mask.append(i)
        i+=1
    return train_mask
    
def shuffleTrainTestValMasks(data, trainpct = 0.7, valpct = 0.2):

    ysize = list(data.y.size())[0]
    #print("total ", ysize)
    #print(" train ",int(ysize*trainpct)-int(ysize*trainpct*valpct))
    #print(" val ",int(ysize*trainpct*valpct))
    #print(" test ",int(ysize*(1- trainpct) ))
    data.train_mask = torch.zeros(ysize,1, dtype=torch.long)
    data.train_mask[:int(ysize*trainpct)] = 1
    data.train_mask = data.train_mask[torch.randperm(ysize)]
    #print(" train sum ",data.train_mask.sum())
    data.test_mask = torch.ones(ysize,1, dtype=torch.long) - data.train_mask
    #print(" test sum ",data.test_mask.sum())
    
    # quick and dirt
    # set first ysize*trainpct*valpct to 0, for those that are 1
    data.val_mask = torch.zeros(ysize,1, dtype=torch.long)
    #print(" val sum ",data.val_mask.sum())
    data.val_mask[:int(ysize*trainpct*valpct)] = 1
    #print(" val sum ",data.val_mask.sum())
    data.val_mask = data.val_mask[torch.randperm(ysize)]
    #print(" val sum ",data.val_mask.sum())
    data.val_mask = data.val_mask - data.test_mask
    #print(" val sum ",data.val_mask.sum())
    data.val_mask[data.val_mask <= 0 ]= 0
    #print(" val sum ",data.val_mask.sum())

    while data.val_mask.sum() < int(ysize*trainpct*valpct):
        data.val_mask = torch.zeros(ysize,1, dtype=torch.long)
        #print(" val sum ",data.val_mask.sum())
        data.val_mask[:int(ysize*trainpct*valpct)] = 1
        #print(" val sum ",data.val_mask.sum())
        data.val_mask = data.val_mask[torch.randperm(ysize)]
        #print(" val sum ",data.val_mask.sum())
        data.val_mask = data.val_mask - data.test_mask
        #print(" val sum ",data.val_mask.sum())
        data.val_mask[data.val_mask <= 0 ]= 0
        #print(" val sum ",data.val_mask.sum())
    
        
    #print("final val sum ",data.val_mask.sum())
    data.train_mask = data.train_mask - data.val_mask
    #print("final train sum ",data.train_mask.sum())
    
    
    print(data.train_mask.sum())
    #print(data.val_mask)
    print(data.val_mask.sum())
    #print(data.test_mask)  
    print(data.test_mask.sum())
    
    
    # transform to list of indexes
    data.train_mask = transformMask(data.train_mask)
    data.val_mask = transformMask(data.val_mask)
    data.test_mask = transformMask(data.test_mask)
    
    print(data.train_mask)
    print(data.val_mask)
    print(data.test_mask)
    
    

def trainTestEval(dataset, iterations=1, batch_size=32):
    loader = DataLoader(dataset,  shuffle=False)
    i = 0
    print(loader)
    print(dir(loader))
    
    G = dataset.data
    print(G)
    start = time.time()


    # 1.  prepare model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    #print("using ",device)
    model = Net().to(device)
    data = G.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    model.train()

    # 2.  create a train_mask, and a test_mask (val_mask for further experiments)
    #shuffleTrainTestMasks(data)
    shuffleTrainTestValMasks(data)

    # 3. train some epochs
    for epoch in range(200):
        optimizer.zero_grad()
        out = model(data)
        loss = F.mse_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        if epoch % 25 == 0 :
            print("epoch-loss: ",epoch, loss)

    # 4. Model evaluation
    model.eval()
    #  classification in a multiclass setting
    #_, pred = model(data).max(dim=1)
    #correct = pred[data.test_mask].eq(data.y[data.test_mask]).sum().item()
    #acc = correct / data.test_mask.sum().item()
    #print('Accuracy: {:.4f}'.format(acc))


    # regression 
    pred = model(data)
    print("target: ",data.y[data.test_mask])
    print("prediction: ",pred[data.test_mask])
    #print(pred[data.test_mask].type())
    #print(data.y[data.test_mask].type())
    # prepare the normalized mean root squared error
    t = data.y[data.test_mask]
    y = pred[data.test_mask]
    nrmse = torch.sum((t - y) ** 2)/len(data.test_mask)
    nrmse = nrmse.sqrt()
    print("RMSE: ",nrmse)

    #m = torch.mean(t)
    #print("mean",m)
    #tmax = torch.max(t)
    #tmin = torch.min(t)
    #sd = tmax-tmin
    #print("sd",sd)
    #nrmse = (nrmse - m)/sd
    #print("NRMSE:",nrmse)


    endtime = time.time()
    print("Total train-test time: "+str(endtime-start))

    #i+=1
    #if i==1:
    #    break

In [41]:
class MyOwnDataset2():
    def __init__(self,  root, name, transform=None, pre_transform=None):
        f = open(name, 'rb')
        self.data = pickle.load(f) 
        #print(self.data.num_features)
        self.num_features = self.data.num_features
        self.num_classes = 1
        f.close()
        
        

In [42]:
# load the dataset examples---------------------------------------

#PPI
#dataset = loadDataset('PPI')
#QM7b
#dataset = loadDataset('QM7b')
#MUTAG
#dataset = loadDataset(collection='Entities',name='MUTAG')
#ENZYMES FROM TUDataset
#dataset = loadDataset(collection='TUDataset',name='ENZYMES')
# Cora
#dataset = loadDataset(collection='Planetoid',name='Cora')

dataset = loadDataset(collection='MyOwnDataset2', name='er_100_0_15_nb.pickle')
print(dataset.data.y)

trainTestEval(dataset, iterations=2, batch_size=500)

tensor([0.0160, 0.0056, 0.0083, 0.0097, 0.0020, 0.0191, 0.0096, 0.0042, 0.0111,
        0.0057, 0.0136, 0.0135, 0.0079, 0.0100, 0.0040, 0.0146, 0.0095, 0.0062,
        0.0047, 0.0131, 0.0062, 0.0040, 0.0202, 0.0061, 0.0082, 0.0046, 0.0091,
        0.0079, 0.0078, 0.0083, 0.0074, 0.0140, 0.0079, 0.0097, 0.0174, 0.0144,
        0.0087, 0.0117, 0.0085, 0.0070, 0.0216, 0.0101, 0.0106, 0.0102, 0.0132,
        0.0101, 0.0161, 0.0089, 0.0082, 0.0051, 0.0059, 0.0105, 0.0046, 0.0058,
        0.0083, 0.0060, 0.0039, 0.0080, 0.0052, 0.0033, 0.0092, 0.0194, 0.0152,
        0.0053, 0.0037, 0.0108, 0.0213, 0.0118, 0.0039, 0.0106, 0.0032, 0.0078,
        0.0043, 0.0107, 0.0056, 0.0101, 0.0061, 0.0081, 0.0138, 0.0102, 0.0124,
        0.0056, 0.0137, 0.0100, 0.0137, 0.0050, 0.0055, 0.0047, 0.0052, 0.0156,
        0.0089, 0.0204, 0.0266, 0.0038, 0.0094, 0.0036, 0.0224, 0.0104, 0.0042,
        0.0176])
<torch_geometric.data.dataloader.DataLoader object at 0x7f96caf5d6a0>
['_DataLoader__initialized', '__c