# Datasets preparation (GraphSAGE TF version)

This notebook:

- reads graphs from the PyTorch Geometric dataset, into Networkx versions of the graphs, then computes the node betweenness centrality (and edge betweenness centrality).
- then transforms the Networkx graph into a json graph object
- it also creates id_map, which assign every node identifier to a consecutive integer
- it also creates class_maps, which will save the betweeness centrality of each node
- finally an edge version, converts graph into  node representation of vertices  and saves the edge betweenees in the class_maps

## 1. Code base

In [8]:
import importlib
import torch
import torch_geometric
from torch_geometric.data import DataLoader
import networkx as nx
from torch_geometric.data import Data
import time
import pickle
from torch_geometric.data import InMemoryDataset
import numpy as np
import traceback

      
    
def writeAdjacencyMatrixToDisk(G, filename='temp_adjacency_matrix.txt'):
    """
        Transform to networkx dataset

        possible formats: GML, Adjacency matrix, ..
        start by Adjcency list 
             --> (ignoring edge/node features)
             --> line format: source target target2 target3 ... 
        later we can improve this...
    """
    f = open(filename,'w')
    _ni=-1
    newline = False
    theline = []
    careturn = ""
    for ei in range(G.edge_index.size()[1]):
        if int(G.edge_index[0,ei].item()) != _ni:
            newline=True
            _ni=int(G.edge_index[0,ei].item())
            
        else:
            newline=False
            
            
        ni = str(G.edge_index[0,ei].item())
        vi = str(G.edge_index[1,ei].item())
        if newline:
            f.write(''.join(theline))
            #print(''.join(theline))
            #print(" --> "+str(_ni))
            theline =[]
            theline.append(careturn+ni+" ")
            theline.append(vi+" ")
            careturn = "\n"
        else:
            theline.append(vi+" ")
        # print("({},{})".format(ni,vi))
    
    
def nx_createNxGraphInMem(G):
    """
        Transform to networkx dataset

        possible formats: GML, Adjacency matrix, ..
        start by Adjcency list 
             --> (ignoring edge/node features)
             --> line format: source target target2 target3 ... 
        later we can improve this...
    """
    g = nx.MultiGraph()
   
    for ei in range(G.edge_index.size()[1]):    
        ni = str(G.edge_index[0,ei].item())
        vi = str(G.edge_index[1,ei].item())
        g.add_edge(ni,vi)
    return g
    



def pyTorchGeometricDatasetToNx(G,suffix=0):
    """
        Alternatives:
            - to disk, to nx, then dict of betweenness
            - transform in memory
            - directly pickle a G object with the betweenness
    """
    prefix = 'temp_aj_m'
    # 1. PyTorch Geometric graph -> nx -> compute betweenness 
    #             -> PyTorch Geom with target the betweenness-------
    # Transform to networkx graph
    # write to adjacency matrix on disk
    writeAdjacencyMatrixToDisk(G, filename=prefix+str(suffix)+'.txt')

    # load into a networkx graph object
    g2 = nx.read_adjlist(prefix+str(suffix)+'.txt')
    #g2 = nx_createNxGraphInMem(G)
    
    return g2




    
def loadDataset(collection, name=None, split=None, karate=None):
    # import datasets
    themodule = importlib.import_module("torch_geometric.datasets")
    # get the function corresponding to collection
    method_to_call = getattr(themodule, collection)
    try:
        if name:
            return method_to_call(root='./data/'+str(collection), name=name)
        elif split:
            return method_to_call(root='./data/'+str(collection), split=split)
        elif karate:
            return method_to_call()
        else:
            return method_to_call(root='./data/'+str(collection)) 
    except:
        traceback.print_tb()
        if name:
            return method_to_call( name=name)
        elif split:
            return method_to_call(split=split)
        else:
            return method_to_call()         
        
    
def createDataset(x, edge_index):
    return Data(x=x, edge_index=edge_index)
    

def createDatasetFromNX(g, xlen, undirected=True):
    # get edge list
    edges = g.edges
    edge_list_1 = []
    edge_list_2 = []
    for e in edges:
        # node id must be an int
        edge_list_1.append(int(e[0])) 
        edge_list_2.append(int(e[1]))
        if undirected:
            edge_list_1.append(int(e[1])) 
            edge_list_2.append(int(e[0]))
            
        
    edge_index = torch.tensor([ edge_list_1,
                                edge_list_2], dtype=torch.long)
    
    # create single 1 feature for each node
    n = xlen
    x = [[1.0] for i in range(n)]
    x = torch.tensor(x, dtype=torch.float)
    
    return createDataset(x, edge_index)
                         
def createDatasetFromNXwithTarget(g,y,xlen, undirected=True):
    dataset =  createDatasetFromNX(g,xlen, undirected)
    y = torch.FloatTensor(y)
    dataset.y = y 
    return dataset
                   

In [21]:
import json
from networkx.readwrite import json_graph
import random


def  edgeToNodeTransform(g,verif=False):
    """
    # transform:
        #  1-every edge is a node
        #  2.every edge that share a node creates a link between them
        #º 3- id_map with an ide for each "edge_node"
        #  3- class_map with edge-betwweennes and id
        
        
    1) create a new graph g2
    2) iter edge of g 
    2.1) and add as node in g2
    2.2) iter edges of g looking for node in original edge, 
         add them as nodes in g2 and add links in g2
        
    """
    g2 = nx.Graph()
    i=0
    for e in g.edges_iter():
        g2.add_node(i)
        g2.node[i]['old_edge']=e
        i+=1
        
        
    for n in g2.nodes():
        # now for each node in the old_edge
        # find other edges that are neighbors
        # and add the corresponding new edge in g2

        e = g2.node[n]['old_edge']
        for n2 in g2.nodes():
            if n==n2:
                continue
            e2 = g2.node[n2]['old_edge']
            if e2[0] == e[0] or e2[1] == e[0] \
               or e2[0] == e[1] or e2[1] == e[1]:
                g2.add_edge(n,n2)
                # this will add 2 times n,n2 and n2,n edges...
                # should we remove it?
        
    return g2

def createGraphSAGEDataset(g,dname,betweenness, undirected=True, graph_type=None):
    
    # compute its node and edge betweenness
    y =[]
    x=[]
    xlen=len(g.nodes())
    feats = []
    
    class_map = {}
    id_map ={}
    i = 0
    
    if betweenness == 'node':
        nx_betweenness = nx.betweenness_centrality(g)
        #y = [ v for k,v in nx_betweenness.items()]
        y = []
        for k,v in nx_betweenness.items():
            y.append(v)
            id_map[str(k)] = i
            class_map[str(k)] = v
            i+=1
    elif betweenness == 'classes':
        nx_betweenness = nx.betweenness_centrality(g)
        
        # discretize in 10 classes and assign 
        
        y = []
        for k,v in nx_betweenness.items():
            y.append(v)
            theclass = [0]*10
            #print(theclass)
            if v*1000 > 9.0:
                theclass[9]=1
            elif v*1000 > 8.0:
                theclass[8]=1
            elif v*1000 > 7.0:
                theclass[7]=1
            elif v*1000 > 6.0:
                theclass[6]=1
            elif v*1000 > 5.0:
                theclass[5]=1
            elif v*1000 > 4.0:
                theclass[4]=1
            elif v*1000 > 3.0:
                theclass[3]=1
            elif v*1000 > 2.0:
                theclass[2]=1
            elif v*1000 > 1.0:
                theclass[1]=1
            id_map[str(k)] = i
            class_map[str(k)] = theclass
            i+=1
        
    else:
        nx_edge_betweenness = nx.edge_betweenness_centrality(g)
        
        # this part needs a rethinking, edge_betweenness goest to edges
        #class_map[str(k)] = v
        # we would need to TRANSFORM the graph and then save the betweenness 
        # as a node betweenness
        g2 = edgeToNodeTransform(g)
        g = g2
        
        y = []
        
        # add degree as a feature
        degrees = g2.degree()
        
        for n in g2.nodes():
            id_map[str(n)] = n
            class_map[str(n)] = nx_edge_betweenness[g2.node[n]['old_edge']]
            y.append(class_map[str(n)])
            feats.append(degrees[n])
            
            
        feats = np.array(feats).reshape((-1,1))
        xlen=len(y)
        
    
    
    # add 'val' and 'test' attributes to all nodes
    for nd in g.nodes():
        # random uniform with 3 values
        node = g.node[nd]
        node['val']=0
        node['test']=0
        node['train']=0
        if graph_type is None:
            rand_elem = random.choice(['val','test','train'])
        elif graph_type in ['val','train','test']:
            rand_elem=graph_type
        node[rand_elem]=1
        
        
        
    
    
    # translate into a Networkx graph json object
    jg = json_graph.node_link_data(g)
    
    # and write to disk as a json object
    with open(dname+"-G.json", "w+") as write_file:
        json.dump(jg, write_file)
    
    # id_map
    with open(dname+"-id_map.json", "w+") as write_file:
        json.dump(id_map, write_file)
    
    # class_map
    with open(dname+"-class_map.json", "w+") as write_file:
        json.dump(class_map, write_file)
        
    # features feats array
    np.save(dname+'-feats.npy', feats)
    
    
    

# plan
# 1. get graph from PyTorch Geom
# 2. transform to nx (pyTorchGeometricDatasetToNx)
# 3. call createGraphSAGEDataset(ba,'ba_1000_5','edge') 
#      or createGraphSAGEDataset(ba,'ba_1000_5','node')


def processDatasetsSingleTF(dname, single_dataset):
    G = single_dataset.data
    print(G)
    g = pyTorchGeometricDatasetToNx(G,i)
    i+=1
    print(i)
    createGraphSAGEDataset(g,'./precomputed-graphSAGE-TF/'+dname+'_'+str(i)+'_eb','edge', undirected=False) 
    createGraphSAGEDataset(g,'./precomputed-graphSAGE-TF/'+dname+'_'+str(i)+'_nd','node') 

        
def processDatasetsTF(dname, dataset):
    # set size of batch to total size of graph here
    loader = DataLoader(dataset, shuffle=False)
    i = 0
    for G in loader:
        print(G)
        g = pyTorchGeometricDatasetToNx(G,i)
        i+=1
        print(i)
        createGraphSAGEDataset(g,'./precomputed-graphSAGE-TF/'+dname+'_'+str(i)+'_eb','edge', undirected=False) 
        createGraphSAGEDataset(g,'./precomputed-graphSAGE-TF/'+dname+'_'+str(i)+'_nd','node') 
    
    

## 2. GraphSAGE TF Random Graph datasets

### Tests

In [34]:
# test graph transform
print("\ntest1")
G = nx.Graph()
G.add_path([0,1,2,3])
print(list([e for e in G.edges_iter()]))
G = edgeToNodeTransform(G)
print(list([e for e in G.edges_iter()]))

print("\ntest2")
G = nx.Graph()
G.add_path([0,1,2,3])

G.add_path([4,5,6])
G.add_edge(0,4)
G.add_edge(3,6)
print(list([e for e in G.edges_iter()]))
G = edgeToNodeTransform(G)
print(list([e for e in G.edges_iter()]))


print("\ntest3")
G = nx.Graph()
G.add_path([0,1,3])

G.add_path([4,5,6])
G.add_edge(0,4)
G.add_edge(3,6)

G.add_path([7,8,9])
G.add_edge(0,7)
G.add_edge(3,9)

print(list([e for e in G.edges_iter()]))
G = edgeToNodeTransform(G)
print("nodes",G.nodes(data=True))
print(list([e for e in G.edges_iter()]))


test1
[(0, 1), (1, 2), (2, 3)]
[(0, 1), (1, 2)]

test2
[(0, 1), (0, 4), (1, 2), (2, 3), (3, 6), (4, 5), (5, 6)]
[(0, 1), (0, 2), (1, 5), (2, 3), (3, 4), (4, 6), (5, 6)]

test3
[(0, 1), (0, 4), (0, 7), (1, 3), (3, 6), (3, 9), (4, 5), (5, 6), (7, 8), (8, 9)]
nodes [(0, {'old_edge': (0, 1)}), (1, {'old_edge': (0, 4)}), (2, {'old_edge': (0, 7)}), (3, {'old_edge': (1, 3)}), (4, {'old_edge': (3, 6)}), (5, {'old_edge': (3, 9)}), (6, {'old_edge': (4, 5)}), (7, {'old_edge': (5, 6)}), (8, {'old_edge': (7, 8)}), (9, {'old_edge': (8, 9)})]
[(0, 1), (0, 2), (0, 3), (1, 2), (1, 6), (2, 8), (3, 4), (3, 5), (4, 5), (4, 7), (5, 9), (6, 7), (8, 9)]


In [20]:
er = nx.erdos_renyi_graph(100, 0.15)
ws = nx.watts_strogatz_graph(30, 3, 0.1)
ba = nx.barabasi_albert_graph(100, 5)
red = nx.random_lobster(100, 0.9, 0.9)

createGraphSAGEDataset(er,'./precomputed-graphSAGE-TF/er_100_0_15_eb','edge', undirected=False)



[23 28 31 23 32 27 29 28 24 28 33 24 30 28 22 20 27 25 23 20 19 27 30 23
 25 29 29 23 31 25 32 27 23 28 28 26 26 27 20 25 24 22 22 31 28 26 24 24
 24 22 27 29 19 15 28 29 32 36 37 35 32 30 29 30 26 33 29 38 27 33 37 35
 31 28 26 28 34 29 33 35 25 25 30 36 34 25 31 26 28 26 31 28 30 26 27 29
 27 32 29 28 31 31 30 35 26 34 30 31 28 24 27 31 28 28 32 29 34 26 26 26
 29 29 23 30 20 25 30 22 22 25 36 33 31 31 28 33 27 29 33 29 29 36 27 31
 29 28 23 34 37 30 28 29 36 34 31 24 22 26 30 30 28 34 26 34 36 30 23 30
 28 35 29 33 35 29 31 36 34 35 33 34 29 27 24 23 28 34 29 25 21 32 29 24
 25 22 22 26 24 24 19 25 26 21 29 28 31 30 32 29 31 28 30 26 31 35 36 30
 24 25 26 25 31 28 30 29 29 23 32 24 27 31 33 35 31 37 35 38 33 35 34 35
 29 35 37 20 24 30 26 23 24 25 27 26 31 30 23 31 29 27 28 23 32 28 23 30
 23 25 26 30 29 25 22 18 27 22 16 21 17 35 35 37 34 36 33 28 34 36 38 30
 35 30 34 37 41 38 31 37 28 33 33 30 26 30 26 28 32 36 38 35 32 38 36 33
 34 27 34 31 35 32 33 35 33 36 33 35 35 33 31 24 27

In [21]:
er = nx.erdos_renyi_graph(100, 0.15)
createGraphSAGEDataset(er,'./precomputed-graphSAGE-TF/er_100_0_15_nb_discrete','classes', undirected=False)


### Actual Random graph datasets

In [4]:
er = nx.erdos_renyi_graph(100, 0.15)
createGraphSAGEDataset(er,'er_100_0_15_nb','node')
er = nx.erdos_renyi_graph(100, 0.45)
createGraphSAGEDataset(er,'er_100_0_45_nb','node')
er = nx.erdos_renyi_graph(1000, 0.15)
createGraphSAGEDataset(er,'er_1000_0_15_nb','node')
er = nx.erdos_renyi_graph(1000, 0.45)
createGraphSAGEDataset(er,'er_1000_0_45_nb','node')


ws = nx.watts_strogatz_graph(30, 3, 0.1)
createGraphSAGEDataset(ws,'ws_30_3_0_1_nb','node')
ws = nx.watts_strogatz_graph(100, 3, 0.1)
createGraphSAGEDataset(ws,'ws_100_3_0_1_nb','node')
ws = nx.watts_strogatz_graph(1000, 3, 0.1)
createGraphSAGEDataset(ws,'ws_1000_3_0_1_nb','node')


ws = nx.watts_strogatz_graph(1000, 10, 0.1)
createGraphSAGEDataset(ws,'ws_1000_10_0_1_nb','node')


ba = nx.barabasi_albert_graph(100, 5)
createGraphSAGEDataset(ba,'ba_100_5_nb','node')
ba = nx.barabasi_albert_graph(1000, 5)
createGraphSAGEDataset(ba,'ba_1000_5_nb','node')



#er = nx.erdos_renyi_graph(4000, 0.15)
#createGraphSAGEDataset(er,'er_4000_0_15_nb','node')
#er = nx.erdos_renyi_graph(4000, 0.35)
#createGraphSAGEDataset(er,'er_4000_0_35_nb','node')
#ws = nx.watts_strogatz_graph(4000, 3, 0.1)
#createGraphSAGEDataset(ws,'ws_4000_3_0_1_nb','node')
#ws = nx.watts_strogatz_graph(4000, 20, 0.1)
#createGraphSAGEDataset(ws,'ws_4000_20_0_1_nb','node')
#ba = nx.barabasi_albert_graph(4000, 5)
#createGraphSAGEDataset(ba,'ba_4000_5_nb','node')


In [None]:
# repeat with edge betweenness
er = nx.erdos_renyi_graph(100, 0.15)
createGraphSAGEDataset(er,'er_100_0_15_eb','edge', undirected=False)
er = nx.erdos_renyi_graph(100, 0.45)
createGraphSAGEDataset(er,'er_100_0_45_eb','edge', undirected=False)
er = nx.erdos_renyi_graph(1000, 0.15)
createGraphSAGEDataset(er,'er_1000_0_15_eb','edge', undirected=False)
er = nx.erdos_renyi_graph(1000, 0.45)
createGraphSAGEDataset(er,'er_1000_0_45_eb','edge', undirected=False)


ws = nx.watts_strogatz_graph(30, 3, 0.1)
createGraphSAGEDataset(ws,'ws_30_3_0_1_eb','edge', undirected=False)
ws = nx.watts_strogatz_graph(100, 3, 0.1)
createGraphSAGEDataset(ws,'ws_100_3_0_1_eb','edge', undirected=False)
ws = nx.watts_strogatz_graph(1000, 3, 0.1)
createGraphSAGEDataset(ws,'ws_1000_3_0_1_eb','edge', undirected=False)


ws = nx.watts_strogatz_graph(1000, 10, 0.1)
createGraphSAGEDataset(ws,'ws_1000_10_0_1_eb','edge', undirected=False)


ba = nx.barabasi_albert_graph(100, 5)
createGraphSAGEDataset(ba,'ba_100_5_eb','edge', undirected=False)
ba = nx.barabasi_albert_graph(1000, 5)
createGraphSAGEDataset(ba,'ba_1000_5_eb','edge', undirected=False)


#er = nx.erdos_renyi_graph(4000, 0.15)
#createRandomGraphDataset(er,'er_4000_0_15_eb','edge')
#er = nx.erdos_renyi_graph(4000, 0.35)
#createRandomGraphDataset(er,'er_4000_0_35_eb','edge')
#ws = nx.watts_strogatz_graph(4000, 3, 0.1)
#createRandomGraphDataset(ws,'ws_4000_3_0_1_eb','edge')
#ws = nx.watts_strogatz_graph(4000, 20, 0.1)
#createRandomGraphDataset(ws,'ws_4000_20_0_1_eb','edge')
#ba = nx.barabasi_albert_graph(4000, 5)
#createRandomGraphDataset(ba,'ba_4000_5_eb','edge')

In [13]:
# graphs for the inductive setting

for i in range(20):
    er = nx.erdos_renyi_graph(100, 0.15)
    createGraphSAGEDataset(er,'ind_er_100_0_15_eb_i_'+str(i),'edge', undirected=False)
    er = nx.erdos_renyi_graph(100, 0.45)
    createGraphSAGEDataset(er,'ind_er_100_0_45_eb_i_'+str(i),'edge', undirected=False)
    #er = nx.erdos_renyi_graph(1000, 0.35)
    #createGraphSAGEDataset(er,'ind_er_1000_0_35_eb_i_'+str(i),'edge', undirected=False)

    ws = nx.watts_strogatz_graph(30, 3, 0.1)
    createGraphSAGEDataset(ws,'ind_ws_30_3_0_1_eb_i_'+str(i),'edge', undirected=False)
    ws = nx.watts_strogatz_graph(100, 3, 0.1)
    createGraphSAGEDataset(ws,'ind_ws_100_3_0_1_eb_i_'+str(i),'edge', undirected=False)
    #ws = nx.watts_strogatz_graph(1000, 3, 0.1)
    #createGraphSAGEDataset(ws,'ws_1000_3_0_1_eb','edge', undirected=False)
    #ws = nx.watts_strogatz_graph(1000, 10, 0.1)
    #createGraphSAGEDataset(ws,'ws_1000_10_0_1_eb_i_'+str(i),'edge', undirected=False)

    ba = nx.barabasi_albert_graph(100, 5)
    createGraphSAGEDataset(ba,'ind_ba_100_5_eb_i_'+str(i),'edge', undirected=False)
    #ba = nx.barabasi_albert_graph(1000, 5)
    #createGraphSAGEDataset(ba,'ba_1000_5_eb','edge', undirected=False)


## 3. GraphSAGE TF Benchmark datasets

In [22]:


#KarateClub
#print("\nKarateClub")
#dname='KarateClub'
#dataset = torch_geometric.datasets.KarateClub()
#processDatasetsTF(dname, dataset)



#PROTEINS FROM TUDataset
print("\nTUDataset PROTEINS")
dname='TUDataset'
name='PROTEINS'
dataset = loadDataset(dname,name)
processDatasetsTF(dname+'_'+name, dataset)



      
        



TUDataset PROTEINS
Downloading https://ls11-www.cs.uni-dortmund.de/people/morris/graphkerneldatasets/PROTEINS.zip
Extracting data/TUDataset/PROTEINS.zip
Batch(batch=[37], edge_index=[2, 168], x=[37, 3], y=[1])
1
Batch(batch=[23], edge_index=[2, 102], x=[23, 3], y=[1])
2
Batch(batch=[25], edge_index=[2, 92], x=[25, 3], y=[1])
3
Batch(batch=[24], edge_index=[2, 90], x=[24, 3], y=[1])
4
Batch(batch=[23], edge_index=[2, 90], x=[23, 3], y=[1])
5
Batch(batch=[24], edge_index=[2, 92], x=[24, 3], y=[1])
6
Batch(batch=[26], edge_index=[2, 118], x=[26, 3], y=[1])
7
Batch(batch=[88], edge_index=[2, 266], x=[88, 3], y=[1])
8
Batch(batch=[23], edge_index=[2, 78], x=[23, 3], y=[1])
9
Batch(batch=[32], edge_index=[2, 106], x=[32, 3], y=[1])
10
Batch(batch=[4], edge_index=[2, 12], x=[4, 3], y=[1])
11
Batch(batch=[14], edge_index=[2, 56], x=[14, 3], y=[1])
12
Batch(batch=[42], edge_index=[2, 150], x=[42, 3], y=[1])
13
Batch(batch=[41], edge_index=[2, 146], x=[41, 3], y=[1])
14
Batch(batch=[36], edge_i

Batch(batch=[39], edge_index=[2, 164], x=[39, 3], y=[1])
149
Batch(batch=[29], edge_index=[2, 82], x=[29, 3], y=[1])
150
Batch(batch=[22], edge_index=[2, 80], x=[22, 3], y=[1])
151
Batch(batch=[11], edge_index=[2, 44], x=[11, 3], y=[1])
152
Batch(batch=[8], edge_index=[2, 32], x=[8, 3], y=[1])
153
Batch(batch=[13], edge_index=[2, 44], x=[13, 3], y=[1])
154
Batch(batch=[18], edge_index=[2, 66], x=[18, 3], y=[1])
155
Batch(batch=[12], edge_index=[2, 44], x=[12, 3], y=[1])
156
Batch(batch=[8], edge_index=[2, 30], x=[8, 3], y=[1])
157
Batch(batch=[40], edge_index=[2, 126], x=[40, 3], y=[1])
158
Batch(batch=[12], edge_index=[2, 50], x=[12, 3], y=[1])
159
Batch(batch=[22], edge_index=[2, 96], x=[22, 3], y=[1])
160
Batch(batch=[22], edge_index=[2, 84], x=[22, 3], y=[1])
161
Batch(batch=[18], edge_index=[2, 64], x=[18, 3], y=[1])
162
Batch(batch=[12], edge_index=[2, 44], x=[12, 3], y=[1])
163
Batch(batch=[17], edge_index=[2, 72], x=[17, 3], y=[1])
164
Batch(batch=[14], edge_index=[2, 52], x=[1

Batch(batch=[62], edge_index=[2, 208], x=[62, 3], y=[1])
291
Batch(batch=[60], edge_index=[2, 200], x=[60, 3], y=[1])
292
Batch(batch=[96], edge_index=[2, 218], x=[96, 3], y=[1])
293
Batch(batch=[54], edge_index=[2, 198], x=[54, 3], y=[1])
294
Batch(batch=[124], edge_index=[2, 278], x=[124, 3], y=[1])
295
Batch(batch=[126], edge_index=[2, 282], x=[126, 3], y=[1])
296
Batch(batch=[122], edge_index=[2, 298], x=[122, 3], y=[1])
297
Batch(batch=[24], edge_index=[2, 102], x=[24, 3], y=[1])
298
Batch(batch=[41], edge_index=[2, 148], x=[41, 3], y=[1])
299
Batch(batch=[49], edge_index=[2, 186], x=[49, 3], y=[1])
300
Batch(batch=[44], edge_index=[2, 178], x=[44, 3], y=[1])
301
Batch(batch=[42], edge_index=[2, 168], x=[42, 3], y=[1])
302
Batch(batch=[41], edge_index=[2, 166], x=[41, 3], y=[1])
303
Batch(batch=[42], edge_index=[2, 164], x=[42, 3], y=[1])
304
Batch(batch=[46], edge_index=[2, 186], x=[46, 3], y=[1])
305
Batch(batch=[55], edge_index=[2, 196], x=[55, 3], y=[1])
306
Batch(batch=[50], 

Batch(batch=[28], edge_index=[2, 108], x=[28, 3], y=[1])
429
Batch(batch=[37], edge_index=[2, 114], x=[37, 3], y=[1])
430
Batch(batch=[18], edge_index=[2, 74], x=[18, 3], y=[1])
431
Batch(batch=[19], edge_index=[2, 78], x=[19, 3], y=[1])
432
Batch(batch=[22], edge_index=[2, 92], x=[22, 3], y=[1])
433
Batch(batch=[43], edge_index=[2, 140], x=[43, 3], y=[1])
434
Batch(batch=[24], edge_index=[2, 92], x=[24, 3], y=[1])
435
Batch(batch=[15], edge_index=[2, 66], x=[15, 3], y=[1])
436
Batch(batch=[14], edge_index=[2, 46], x=[14, 3], y=[1])
437
Batch(batch=[48], edge_index=[2, 198], x=[48, 3], y=[1])
438
Batch(batch=[46], edge_index=[2, 198], x=[46, 3], y=[1])
439
Batch(batch=[26], edge_index=[2, 100], x=[26, 3], y=[1])
440
Batch(batch=[25], edge_index=[2, 112], x=[25, 3], y=[1])
441
Batch(batch=[44], edge_index=[2, 190], x=[44, 3], y=[1])
442
Batch(batch=[39], edge_index=[2, 152], x=[39, 3], y=[1])
443
Batch(batch=[39], edge_index=[2, 158], x=[39, 3], y=[1])
444
Batch(batch=[27], edge_index=[

Batch(batch=[44], edge_index=[2, 170], x=[44, 3], y=[1])
569
Batch(batch=[45], edge_index=[2, 156], x=[45, 3], y=[1])
570
Batch(batch=[46], edge_index=[2, 164], x=[46, 3], y=[1])
571
Batch(batch=[47], edge_index=[2, 158], x=[47, 3], y=[1])
572
Batch(batch=[47], edge_index=[2, 164], x=[47, 3], y=[1])
573
Batch(batch=[42], edge_index=[2, 180], x=[42, 3], y=[1])
574
Batch(batch=[51], edge_index=[2, 220], x=[51, 3], y=[1])
575
Batch(batch=[27], edge_index=[2, 102], x=[27, 3], y=[1])
576
Batch(batch=[27], edge_index=[2, 104], x=[27, 3], y=[1])
577
Batch(batch=[60], edge_index=[2, 206], x=[60, 3], y=[1])
578
Batch(batch=[28], edge_index=[2, 106], x=[28, 3], y=[1])
579
Batch(batch=[38], edge_index=[2, 150], x=[38, 3], y=[1])
580
Batch(batch=[23], edge_index=[2, 106], x=[23, 3], y=[1])
581
Batch(batch=[18], edge_index=[2, 74], x=[18, 3], y=[1])
582
Batch(batch=[45], edge_index=[2, 194], x=[45, 3], y=[1])
583
Batch(batch=[21], edge_index=[2, 90], x=[21, 3], y=[1])
584
Batch(batch=[35], edge_ind

In [24]:
#ENZYMES FROM TUDataset
print("\nTUDataset EnZYMES")
dname='TUDataset'
name='ENZYMES'
dataset = loadDataset(dname,name)
processDatasetsTF(dname+'_'+name, dataset)


TUDataset EnZYMES
Downloading https://ls11-www.cs.uni-dortmund.de/people/morris/graphkerneldatasets/ENZYMES.zip
Extracting data/TUDataset/ENZYMES.zip
Batch(batch=[37], edge_index=[2, 168], x=[37, 3], y=[1])
1
Batch(batch=[23], edge_index=[2, 102], x=[23, 3], y=[1])
2
Batch(batch=[25], edge_index=[2, 92], x=[25, 3], y=[1])
3
Batch(batch=[24], edge_index=[2, 90], x=[24, 3], y=[1])
4
Batch(batch=[23], edge_index=[2, 90], x=[23, 3], y=[1])
5
Batch(batch=[24], edge_index=[2, 92], x=[24, 3], y=[1])
6
Batch(batch=[26], edge_index=[2, 118], x=[26, 3], y=[1])
7
Batch(batch=[88], edge_index=[2, 266], x=[88, 3], y=[1])
8
Batch(batch=[23], edge_index=[2, 78], x=[23, 3], y=[1])
9
Batch(batch=[32], edge_index=[2, 106], x=[32, 3], y=[1])
10
Batch(batch=[4], edge_index=[2, 12], x=[4, 3], y=[1])
11
Batch(batch=[14], edge_index=[2, 56], x=[14, 3], y=[1])
12
Batch(batch=[42], edge_index=[2, 150], x=[42, 3], y=[1])
13
Batch(batch=[41], edge_index=[2, 146], x=[41, 3], y=[1])
14
Batch(batch=[36], edge_inde

Batch(batch=[40], edge_index=[2, 168], x=[40, 3], y=[1])
147
Batch(batch=[39], edge_index=[2, 160], x=[39, 3], y=[1])
148
Batch(batch=[39], edge_index=[2, 164], x=[39, 3], y=[1])
149
Batch(batch=[29], edge_index=[2, 82], x=[29, 3], y=[1])
150
Batch(batch=[22], edge_index=[2, 80], x=[22, 3], y=[1])
151
Batch(batch=[11], edge_index=[2, 44], x=[11, 3], y=[1])
152
Batch(batch=[8], edge_index=[2, 32], x=[8, 3], y=[1])
153
Batch(batch=[13], edge_index=[2, 44], x=[13, 3], y=[1])
154
Batch(batch=[18], edge_index=[2, 66], x=[18, 3], y=[1])
155
Batch(batch=[12], edge_index=[2, 44], x=[12, 3], y=[1])
156
Batch(batch=[8], edge_index=[2, 30], x=[8, 3], y=[1])
157
Batch(batch=[40], edge_index=[2, 126], x=[40, 3], y=[1])
158
Batch(batch=[12], edge_index=[2, 50], x=[12, 3], y=[1])
159
Batch(batch=[22], edge_index=[2, 96], x=[22, 3], y=[1])
160
Batch(batch=[22], edge_index=[2, 84], x=[22, 3], y=[1])
161
Batch(batch=[18], edge_index=[2, 64], x=[18, 3], y=[1])
162
Batch(batch=[12], edge_index=[2, 44], x=

Batch(batch=[39], edge_index=[2, 160], x=[39, 3], y=[1])
289
Batch(batch=[38], edge_index=[2, 148], x=[38, 3], y=[1])
290
Batch(batch=[62], edge_index=[2, 208], x=[62, 3], y=[1])
291
Batch(batch=[60], edge_index=[2, 200], x=[60, 3], y=[1])
292
Batch(batch=[96], edge_index=[2, 218], x=[96, 3], y=[1])
293
Batch(batch=[54], edge_index=[2, 198], x=[54, 3], y=[1])
294
Batch(batch=[124], edge_index=[2, 278], x=[124, 3], y=[1])
295
Batch(batch=[126], edge_index=[2, 282], x=[126, 3], y=[1])
296
Batch(batch=[122], edge_index=[2, 298], x=[122, 3], y=[1])
297
Batch(batch=[24], edge_index=[2, 102], x=[24, 3], y=[1])
298
Batch(batch=[41], edge_index=[2, 148], x=[41, 3], y=[1])
299
Batch(batch=[49], edge_index=[2, 186], x=[49, 3], y=[1])
300
Batch(batch=[44], edge_index=[2, 178], x=[44, 3], y=[1])
301
Batch(batch=[42], edge_index=[2, 168], x=[42, 3], y=[1])
302
Batch(batch=[41], edge_index=[2, 166], x=[41, 3], y=[1])
303
Batch(batch=[42], edge_index=[2, 164], x=[42, 3], y=[1])
304
Batch(batch=[46], 

429
Batch(batch=[37], edge_index=[2, 114], x=[37, 3], y=[1])
430
Batch(batch=[18], edge_index=[2, 74], x=[18, 3], y=[1])
431
Batch(batch=[19], edge_index=[2, 78], x=[19, 3], y=[1])
432
Batch(batch=[22], edge_index=[2, 92], x=[22, 3], y=[1])
433
Batch(batch=[43], edge_index=[2, 140], x=[43, 3], y=[1])
434
Batch(batch=[24], edge_index=[2, 92], x=[24, 3], y=[1])
435
Batch(batch=[15], edge_index=[2, 66], x=[15, 3], y=[1])
436
Batch(batch=[14], edge_index=[2, 46], x=[14, 3], y=[1])
437
Batch(batch=[48], edge_index=[2, 198], x=[48, 3], y=[1])
438
Batch(batch=[46], edge_index=[2, 198], x=[46, 3], y=[1])
439
Batch(batch=[26], edge_index=[2, 100], x=[26, 3], y=[1])
440
Batch(batch=[25], edge_index=[2, 112], x=[25, 3], y=[1])
441
Batch(batch=[44], edge_index=[2, 190], x=[44, 3], y=[1])
442
Batch(batch=[39], edge_index=[2, 152], x=[39, 3], y=[1])
443
Batch(batch=[39], edge_index=[2, 158], x=[39, 3], y=[1])
444
Batch(batch=[27], edge_index=[2, 102], x=[27, 3], y=[1])
445
Batch(batch=[26], edge_ind

Batch(batch=[47], edge_index=[2, 164], x=[47, 3], y=[1])
573
Batch(batch=[42], edge_index=[2, 180], x=[42, 3], y=[1])
574
Batch(batch=[51], edge_index=[2, 220], x=[51, 3], y=[1])
575
Batch(batch=[27], edge_index=[2, 102], x=[27, 3], y=[1])
576
Batch(batch=[27], edge_index=[2, 104], x=[27, 3], y=[1])
577
Batch(batch=[60], edge_index=[2, 206], x=[60, 3], y=[1])
578
Batch(batch=[28], edge_index=[2, 106], x=[28, 3], y=[1])
579
Batch(batch=[38], edge_index=[2, 150], x=[38, 3], y=[1])
580
Batch(batch=[23], edge_index=[2, 106], x=[23, 3], y=[1])
581
Batch(batch=[18], edge_index=[2, 74], x=[18, 3], y=[1])
582
Batch(batch=[45], edge_index=[2, 194], x=[45, 3], y=[1])
583
Batch(batch=[21], edge_index=[2, 90], x=[21, 3], y=[1])
584
Batch(batch=[35], edge_index=[2, 142], x=[35, 3], y=[1])
585
Batch(batch=[24], edge_index=[2, 102], x=[24, 3], y=[1])
586
Batch(batch=[21], edge_index=[2, 84], x=[21, 3], y=[1])
587
Batch(batch=[21], edge_index=[2, 84], x=[21, 3], y=[1])
588
Batch(batch=[31], edge_index

In [None]:


#---------------------------------------------------------------------


#MUTAG
#print("\n MUTAG")
#dname='Entities'
#name='MUTAG'
#dataset = loadDataset(dname,name)
#processDatasetsTF(dname+'_'+name,dataset)



#PPI
#print("\PPI PPI")
#dname='PPI'
#dataset = loadDataset(dname)
#processDatasetsTF(dname,dataset)



  


#PROTEINS FROM TUDataset
print("\nTUDataset PROTEINS")
dname='TUDataset'
name='PROTEINS'
dataset = loadDataset(dname,name)
processDatasetsTF(dname+'_'+name, dataset)

#QM7b
print("\QM7B QM7B")
dataset = loadDataset('QM7b')
processDatasetsTF(dname,dataset)

# Planetoid Cora
print("\n Planetoid Cora")
dname='Planetoid'
name='Cora'
dataset = loadDataset(dname,name)
processDatasetsTF(dname+'_'+name,dataset)



