# Description
This notebook will experiment with:
- train & compare differnt GNN models for graph classificaiton in common benchmarks (PPI, Proteins, ENZYMES,..)
- compare results to publication results

Most of the experiments will be done in PyTorch/PyTorch Geometric, but some models are implemented in Tensor Flow.

In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import MessagePassing
#from torch_geometric.nn.conv.gated_graph_conv import GatedGraphConv
from torch_geometric.nn.glob.glob import global_mean_pool, global_add_pool
import torch.nn as nn
from torch.nn import Sequential as Seq, Linear as Lin, ReLU
from torch_scatter import scatter_mean
from torch_geometric.nn import MetaLayer

from TFM_graph_classification import *

# 1. Models

In [2]:

class Net1(torch.nn.Module):
    def __init__(self, d1=50,d2=20,num_classes=6, num_layers=2, aggr_type='mean'):
        super(Net1, self).__init__()
        self.ggnn = GatedGraphConv(out_channels=d1, num_layers=num_layers,aggr=aggr_type, bias=True)
        self.fc1 = nn.Linear(d1, d2)
        self.fc2 = nn.Linear(d2, num_classes)
        self.global_pool = global_mean_pool
        
        

    def forward(self, data):
        x, edge_index, batch_vector = data.x, data.edge_index, data.batch

        x = self.ggnn(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training) # until here the output is for each node
        
        x = self.global_pool(x, batch_vector) # this makes the output to be graph level?
        #x = self.fc1(x)
        x = F.relu(self.fc1(x))
        #x = F.relu(self.fc2(x))
        x = self.fc2(x)
        #x = self.pool1(x, batch )
        x = F.log_softmax(x, dim=1)
        #x = torch.argmax(x, dim=1)  # we output softmax to use the nll_loss
        
        return x
    
class Net2(torch.nn.Module):
    def __init__(self, d1=50,d2=20,num_classes=6, num_layers=2, aggr_type='mean'):
        super(Net2, self).__init__()
        self.ggnn = GatedGraphConv(out_channels=d1, num_layers=num_layers,aggr=aggr_type, bias=True)
        self.fc1 = nn.Linear(d1, num_classes)
        self.global_pool = global_mean_pool
        
    def forward(self, data):
        x, edge_index, batch_vector = data.x, data.edge_index, data.batch

        x = self.ggnn(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training) # until here the output is for each node
        
        x = self.global_pool(x, batch_vector) # this makes the output to be graph level?
        
        x = self.fc1(x)
        x = F.log_softmax(x, dim=1)
        return x
    
    
class Net3(torch.nn.Module):
    def __init__(self, d1=50,d2=20, d3=10,num_classes=6, num_layers=2, aggr_type='mean'):
        super(Net3, self).__init__()
        self.ggnn = GatedGraphConv(out_channels=d1, num_layers=num_layers,aggr=aggr_type, bias=True)
        self.fc1 = nn.Linear(d1, d2)
        self.dense1_bn = nn.BatchNorm1d(d2)
        self.fc2 = nn.Linear(d2, d3)
        self.dense2_bn = nn.BatchNorm1d(d3)
        self.fc3 = nn.Linear(d3, num_classes)
        self.global_pool = global_mean_pool
        
    def forward(self, data):
        x, edge_index, batch_vector = data.x, data.edge_index, data.batch

        x = self.ggnn(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training) # until here the output is for each node
        
        x = self.global_pool(x, batch_vector) # this makes the output to be graph level?
        x = F.relu(self.dense1_bn(self.fc1(x)))
        x = F.relu(self.dense2_bn(self.fc2(x)))
        x = self.fc3(x)
        x = F.log_softmax(x, dim=1)
        return x
    
class Net4(torch.nn.Module):
    def __init__(self, d1=50,d2=20,num_classes=6, num_layers=2, aggr_type='mean'):
        super(Net4, self).__init__()
        self.ggnn = GatedGraphConv(out_channels=d1, num_layers=num_layers,aggr=aggr_type, bias=True)
        self.fc1 = nn.Linear(d1, d2)
        self.dense1_bn = nn.BatchNorm1d(d2)
        self.fc2 = nn.Linear(d2, num_classes)
        self.global_pool = global_mean_pool
        
        

    def forward(self, data):
        x, edge_index, batch_vector = data.x, data.edge_index, data.batch

        x = self.ggnn(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training) # until here the output is for each node
        
        x = self.global_pool(x, batch_vector) # this makes the output to be graph level?
        #x = self.fc1(x)
        x = F.relu(self.dense1_bn(self.fc1(x)))
        #x = F.relu(self.fc2(x))
        x = self.fc2(x)
        #x = self.pool1(x, batch )
        x = F.log_softmax(x, dim=1)
        #x = torch.argmax(x, dim=1)  # we output softmax to use the nll_loss
        
        return x
    
class META1(torch.nn.Module):
    def __init__(self, d1=3, d2=50, d3=15, d4 =15,d5=10,num_classes=6):
        super(META1, self).__init__()

        self.edge_mlp = Seq(Lin(d1*3, d2), ReLU(), Lin(d2, d3))
        self.node_mlp = Seq(Lin(d1*6, d2), ReLU(), Lin(d2, d3))
        self.global_mlp = Seq(Lin(d3+1, d2), ReLU(), Lin(d2, d3))
        
        self.fc1 = nn.Linear(d4, d5)
        self.dense1_bn = nn.BatchNorm1d(d5)
        self.fc2 = nn.Linear(d5, num_classes)
        self.dense2_bn = nn.BatchNorm1d(num_classes)
        self.global_pool = global_mean_pool

        def edge_model(source, target, edge_attr, u):
            # source, target: [E, F_x], where E is the number of edges.
            # edge_attr: [E, F_e]
            # u: [B, F_u], where B is the number of graphs.
            #print("edge_model")
            #print(source.size())
            #print(target.size())
            #print(edge_attr.size())
            out = torch.cat([source, target, edge_attr], dim=1)
            return self.edge_mlp(out)

        def node_model(x, edge_index, edge_attr, u):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            row, col = edge_index
            
            #print("node_model")
            #print(row.size())
            #print(col.size())
            #print(x[col].size())
            #print(edge_attr.size())
            
            out = torch.cat([x[col], edge_attr], dim=1)
            out = self.node_mlp(out)
            return scatter_mean(out, row, dim=0, dim_size=x.size(0))

        def global_model(x, edge_index, edge_attr, u, batch):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            # batch: [N] with max entry B - 1.
            
            #print("global_Model")
            #print("u.size():")
            #print(u.size())
            #print("scatter_mean(x,batch,..):")
            #smean = scatter_mean(x, batch, dim=0)
            #print(smean.size())
            
            out = torch.cat([u, scatter_mean(x, batch, dim=0)], dim=1)
            
            #print("out.size():")
            #print(out.size())
            return self.global_mlp(out)

        self.op = MetaLayer(edge_model, node_model, global_model)

    def forward(self, data):
        
        x, edge_index, edge_attr, u, batch = data.x, data.edge_index, data.edge_attr, data.u, data.batch        
        
        # output of meta is x,edge_attr, u
        x2, edge_attr2, u2 =  self.op(x, edge_index, edge_attr, u, batch)
        
        # idea1 is to cat x2, edge_attr2 and u2?
        # idea2 is to update edge_attr and u...
        data.x = x2
        data.edge_attr = edge_attr2
        data.u = u2

        # version using only u
        x = F.relu(self.dense1_bn(self.fc1(u2)))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x
        
    
    
class META2(torch.nn.Module):
    def __init__(self, d1=3, d2=50, d3=15, d4 =15,d5=10,num_classes=6):
        super(META2, self).__init__()

        self.edge_mlp = Seq(Lin(d1*3, d2), ReLU(), Lin(d2, d3))
        self.node_mlp = Seq(Lin(d1*6, d2), ReLU(), Lin(d2, d3))
        self.global_mlp = Seq(Lin(d3+1, d2), ReLU(), Lin(d2, d3))
        
        self.fc1 = nn.Linear(d4, d5)
        self.dense1_bn = nn.BatchNorm1d(d5)
        self.fc2 = nn.Linear(d5, num_classes)
        self.dense2_bn = nn.BatchNorm1d(num_classes)
        self.global_pool = global_mean_pool

        def edge_model(source, target, edge_attr, u):
            # source, target: [E, F_x], where E is the number of edges.
            # edge_attr: [E, F_e]
            # u: [B, F_u], where B is the number of graphs.
            #print("edge_model")
            #print(source.size())
            #print(target.size())
            #print(edge_attr.size())
            out = torch.cat([source, target, edge_attr], dim=1)
            return self.edge_mlp(out)

        def node_model(x, edge_index, edge_attr, u):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]

            
            row, col = edge_index
            
            #print("node_model")
            #print(row.size())
            #print(col.size())
            #print(x[col].size())
            #print(edge_attr.size())
            
            out = torch.cat([x[col], edge_attr], dim=1)
            out = self.node_mlp(out)
            return scatter_mean(out, row, dim=0, dim_size=x.size(0))

        def global_model(x, edge_index, edge_attr, u, batch):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            # batch: [N] with max entry B - 1.
            
            #print("global_Model")
            #print("u.size():")
            #print(u.size())
            #print("scatter_mean(x,batch,..):")
            #smean = scatter_mean(x, batch, dim=0)
            #print(smean.size())
            
            out = torch.cat([u, scatter_mean(x, batch, dim=0)], dim=1)
            
            #print("out.size():")
            #print(out.size())
            return self.global_mlp(out)

        self.op = MetaLayer(edge_model, node_model, global_model)

    def forward(self, data):
        
        x, edge_index, edge_attr, u, batch = data.x, data.edge_index, data.edge_attr, data.u, data.batch        
        
        # output of meta is x,edge_attr, u
        x2, edge_attr2, u2 =  self.op(x, edge_index, edge_attr, u, batch)
        
        # idea1 is to cat x2, edge_attr2 and u2?
        # idea2 is to update edge_attr and u...
        data.x = x2
        data.edge_attr = edge_attr2
        data.u = u2
        
        # version using only x 
        x = self.global_pool(x2,batch) # separate by graph level
        x = F.relu(self.dense1_bn(self.fc1(x)))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x
    

    
class META3(torch.nn.Module):
    def __init__(self, d1=3, d2=50, d3=15, d4 =15,d5=10,num_classes=6):
        super(META3, self).__init__()

        self.edge_mlp = Seq(Lin(d1*3, d2), ReLU(), Lin(d2, d3))
        self.node_mlp = Seq(Lin(d1*6, d2), ReLU(), Lin(d2, d3))
        self.global_mlp = Seq(Lin(d3+1, d2), ReLU(), Lin(d2, d3))
        
        self.fc1 = nn.Linear(d4, d5)
        self.dense1_bn = nn.BatchNorm1d(d5)
        self.fc2 = nn.Linear(d5, num_classes)
        self.dense2_bn = nn.BatchNorm1d(num_classes)
        self.global_pool = global_mean_pool

        def edge_model(source, target, edge_attr, u):
            # source, target: [E, F_x], where E is the number of edges.
            # edge_attr: [E, F_e]
            # u: [B, F_u], where B is the number of graphs.
            #print("edge_model")
            #print(source.size())
            #print(target.size())
            #print(edge_attr.size())
            out = torch.cat([source, target, edge_attr], dim=1)
            return self.edge_mlp(out)

        def node_model(x, edge_index, edge_attr, u):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]

            
            row, col = edge_index
            
            #print("node_model")
            #print(row.size())
            #print(col.size())
            #print(x[col].size())
            #print(edge_attr.size())
            
            out = torch.cat([x[col], edge_attr], dim=1)
            out = self.node_mlp(out)
            return scatter_mean(out, row, dim=0, dim_size=x.size(0))

        def global_model(x, edge_index, edge_attr, u, batch):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            # batch: [N] with max entry B - 1.
            
            #print("global_Model")
            #print("u.size():")
            #print(u.size())
            #print("scatter_mean(x,batch,..):")
            #smean = scatter_mean(x, batch, dim=0)
            #print(smean.size())
            
            out = torch.cat([u, scatter_mean(x, batch, dim=0)], dim=1)
            
            #print("out.size():")
            #print(out.size())
            return self.global_mlp(out)

        self.op = MetaLayer(edge_model, node_model, global_model)

    def forward(self, data):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        x, edge_index, edge_attr, u, batch = data.x, data.edge_index, data.edge_attr, data.u, data.batch        
        
        # output of meta is x,edge_attr, u
        x2, edge_attr2, u2 =  self.op(x, edge_index, edge_attr, u, batch)
        
        # idea1 is to cat x2, edge_attr2 and u2?
        # idea2 is to update edge_attr and u...
        data.x = x2
        data.edge_attr = edge_attr2
        data.u = u2


        # version using x and  u
        x = F.relu(torch.cat([x2,u2], dim=0))
        x = F.dropout(x, training=self.training) # until here the output is for each node
        ubatch = list(set([ elem.item() for elem in batch]))
        #print(ubatch)
        x = self.global_pool(x, torch.cat([batch,torch.LongTensor( ubatch).to(device) ],dim=0)) # this makes the output to be graph level?
        x = F.relu(self.dense1_bn(self.fc1(x)))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x
    
class META4(torch.nn.Module):
    def __init__(self, d1=3, d2=50, d3=15, d4 =15,d5=10,num_classes=6):
        super(META4, self).__init__()

        self.edge_mlp = Seq(Lin(d1*3, d2), ReLU(), Lin(d2, d3))
        self.node_mlp = Seq(Lin(d1*6, d2), ReLU(), Lin(d2, d3))
        self.global_mlp = Seq(Lin(16, d2), ReLU(), Lin(d2, d3))
        
        self.fc1 = nn.Linear(d4, d5)
        self.dense1_bn = nn.BatchNorm1d(d5)
        self.fc2 = nn.Linear(d5, num_classes)
        self.dense2_bn = nn.BatchNorm1d(num_classes)
        self.global_pool = global_mean_pool

        def edge_model(source, target, edge_attr, u):
            # source, target: [E, F_x], where E is the number of edges.
            # edge_attr: [E, F_e]
            # u: [B, F_u], where B is the number of graphs.
            #print("edge_model")
            #print(source.size())
            #print(target.size())
            #print(edge_attr.size())
            out = torch.cat([source, target, edge_attr], dim=1)
            return self.edge_mlp(out)

        def node_model(x, edge_index, edge_attr, u):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]

            
            row, col = edge_index
            
            #print("node_model")
            #print(row.size())
            #print(col.size())
            #print(x[col].size())
            #print(edge_attr.size())
            
            out = torch.cat([x[col], edge_attr], dim=1)
            out = self.node_mlp(out)
            return scatter_mean(out, row, dim=0, dim_size=x.size(0))

        def global_model(x, edge_index, edge_attr, u, batch):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            # batch: [N] with max entry B - 1.
            
            #print("global_Model")
            #print("u.size():")
            #print(u.size())
            #print("scatter_mean(x,batch,..):")
            #smean = scatter_mean(x, batch, dim=0)
            #print(smean.size())
            
            out = torch.cat([u, scatter_mean(x, batch, dim=0)], dim=1)
            
            #print("out.size():")
            #print(out.size())
            return self.global_mlp(out)

        self.op = MetaLayer(edge_model, node_model, global_model)

    def forward(self, data):
        
        x, edge_index, edge_attr, u, batch = data.x, data.edge_index, data.edge_attr, data.u, data.batch        
        
        # output of meta is x,edge_attr, u
        x2, edge_attr2, u2 =  self.op(x, edge_index, edge_attr, u, batch)
        
        # idea1 is to cat x2, edge_attr2 and u2?
        # idea2 is to update edge_attr and u...
        data.x = x2
        data.edge_attr = edge_attr2
        data.u = u2

    
        # version using x and  u and edge_attr
        x = F.relu(torch.cat([x2,u2], dim=0))
        #x = x2
        x = F.dropout(x, training=self.training) # until here the output is for each node
        
        x = self.global_pool(x, torch.cat([batch, ],dim=0)) # this makes the output to be graph level?
        x = F.relu(self.dense1_bn(self.fc1(x)))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x
    
    
class META5(torch.nn.Module):
    """
        Not using edge attribute
    """
    def __init__(self, d1=3, d2=50, d3=15):
        super(META5, self).__init__()

        self.edge_mlp = Seq(Lin(d1*2, d2), ReLU(), Lin(d2, d3))
        self.node_mlp = Seq(Lin(d1, d2), ReLU(), Lin(d2, d3))
        self.global_mlp = Seq(Lin(2, d2), ReLU(), Lin(d2, d3))

        def edge_model(source, target, edge_attr, u):
            # source, target: [E, F_x], where E is the number of edges.
            # edge_attr: [E, F_e]
            # u: [B, F_u], where B is the number of graphs.
            out = torch.cat([source, target], dim=1)
            #print("edge_model")
            #print(out.size())
            return self.edge_mlp(out)

        def node_model(x, edge_index, edge_attr, u):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            row, col = edge_index
            out = torch.cat([x[col]], dim=1)
            out = self.node_mlp(out)
            return scatter_mean(out, row, dim=0, dim_size=x.size(0))

        def global_model(x, edge_index, edge_attr, u, batch):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            # batch: [N] with max entry B - 1.
            out = torch.cat([u, scatter_mean(x, batch, dim=0)], dim=1)
            
            return self.global_mlp(out)

        self.op = MetaLayer(edge_model, node_model, global_model)

    def forward(self, x, edge_index, edge_attr, u, batch):
        #print("Forward: ")
        #print(x.size())
        return self.op(x, edge_index, edge_attr, u, batch)

In [4]:
# models for hyperparameter search
model_list =[
    {'epochs': 200,
    'model': Net1,
    'kwargs':{'d1': 50,'d2': 20,'num_layers':2, 'aggr_type':'mean'}, 
    'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
    {'epochs': 100,
    'model': Net1,
    'kwargs':{'d1': 50,'d2': 20,'num_layers':2, 'aggr_type':'mean'}, 
    'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
    {'epochs': 200,
    'model': Net1,
    'kwargs':{'d1': 50,'d2': 20,'num_layers':2, 'aggr_type':'add'}, 
    'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
    {'epochs': 100,
    'model': Net1,
    'kwargs':{'d1': 50,'d2': 20,'num_layers':2, 'aggr_type':'add'}, 
    'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
    
    {'epochs': 200,
    'model': Net1,
    'kwargs':{'d1': 100,'d2': 20,'num_layers':2, 'aggr_type':'mean'}, 
    'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
    {'epochs': 100,
    'model': Net1,
    'kwargs':{'d1': 100,'d2': 20,'num_layers':2, 'aggr_type':'mean'}, 
    'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
    {'epochs': 200,
    'model': Net1,
    'kwargs':{'d1': 100,'d2': 50,'num_layers':2, 'aggr_type':'mean'}, 
    'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
    {'epochs': 100,
    'model': Net1,
    'kwargs':{'d1': 100,'d2': 50,'num_layers':2, 'aggr_type':'mean'}, 
    'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
]

model_list2 = []
for modelclass in [Net1, Net2, Net3,Net4]:
    for d1 in [25,50,100,200]:
        for d2 in [20,50]:
            for aggr_type in ['mean','add']:
                for epochs in [10,20,300]:
                    model_list2.append(
                        {
                        'model': modelclass,
                        'epochs': epochs,
                        'kwargs':{'d1': d1,'d2': d2,'num_layers':2, 
                                  'aggr_type':aggr_type}, 
                        'learning_rate': 0.01, 'weight_decay':5e-4, 
                        'batch_size': 32},
                    )
                    
                    
model_list3 =[
    {'epochs': 200,
    'model': META1,
    'kwargs':{'d1': 3,'d2': 20, 'd3': 15, 'd4': 15, 'd5':10}, 
    'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
    {'epochs': 200,
    'model': META2,
    'kwargs':{'d1': 3,'d2': 20, 'd3': 15, 'd4': 15, 'd5':10}, 
    'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
    {'epochs': 200,
    'model': META3,
    'kwargs':{'d1': 3,'d2': 20, 'd3': 15, 'd4': 15, 'd5':10}, 
    'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
    #{'epochs': 200,
    #'model': META4,
    #'kwargs':{'d1': 3,'d2': 20, 'd3': 15}, 
    #'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
]
   

model_list4 = []
for modelclass in [META3, META2, META1]:
    for d1 in [3]:
        for d2 in [10,20,50,100]:
            for d3 in [15]:
                for d4 in [15,30,60]:
                    for d5 in [10,20]:
                        for epochs in [100,200,400,800]:
                            model_list4.append(
                                {'epochs': epochs,
                                'model': modelclass,
                                'kwargs':{'d1': d1,'d2': d2, 'd3': d3, 'd4': d4, 'd5':d5}, 
                                'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 32},
                            )

model_list = model_list2
model_list = model_list[:2]
model_list = model_list3[2:3]
model_list = model_list4

In [5]:
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
dataset = dataset.shuffle()
k = 3
n = len(dataset)
print(" n:",n," k folds=",k)
train_dataset, test_dataset = balancedDatasetSplit_slice(dataset, prop=0.8)
print("Datasets balancing: ")
printDatasetBalance(dataset )
printDatasetBalance(train_dataset )
printDatasetBalance(test_dataset )
print()

 n: 600  k folds= 3
Datasets balancing: 
{0: 100, 1: 100, 2: 100, 3: 100, 4: 100, 5: 100}
{0: 80, 1: 80, 2: 80, 3: 80, 4: 80, 5: 80}
{0: 20, 1: 20, 2: 20, 3: 20, 4: 20, 5: 20}



In [6]:
# get  edge_index, edge_attr and global attributes..
print(dir(dataset[1]))
print(dataset[1].num_features)
print(dataset[1].num_edges)
print()


loader = DataLoader(dataset, batch_size=32, shuffle=True)
for data in loader:
    print(dir(data))
    
    print(data.x)
    print(data.edge_attr)
    print(data.edge_index)
    print(data.batch)
    print(data.y)
    
    break
    

['__call__', '__class__', '__contains__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'apply', 'cat_dim', 'clone', 'contains_isolated_nodes', 'contains_self_loops', 'contiguous', 'edge_attr', 'edge_index', 'from_dict', 'is_coalesced', 'is_directed', 'is_undirected', 'keys', 'num_edges', 'num_features', 'num_nodes', 'pos', 'to', 'x', 'y']
3
208

['__call__', '__class__', '__contains__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__red

In [7]:


modelsdict = modelSelection(model_list,k, train_dataset)
reportModelSelectionResult(modelsdict)

 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 100  val loss= 0.05306003739436468  val accuracy= 0.32142857142857145  val microF1= 0.3162393162393162  val macroF1= 0.30535526618296427
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 200  val loss= 0.05067132040858269  val accuracy= 0.42857142857142855  val microF1= 0.3696581196581197  val macroF1= 0.36753955061359095
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 400  val loss= 0.05175297955671946  val accuracy= 0.36904761904761907  val microF1= 0.37393162393162394  val macroF1= 0.3664226123924525
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 800  val loss= 0.05074156199892362  val accuracy= 0.369047619047619  val microF1= 0.3974358974358974  val macroF1= 0.4019578914589401
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 100  val loss= 0.05050691465536753  val accur

Traceback (most recent call last):
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 783, in modelSelection
    train_model(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 464, in train_model
    return train_model_META(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 448, in train_model_META
    out = model(data)
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "<ipython-input-2-3b200bb6aac2>", line 363, in forward
    ubatch = list(set([ elem.item() for elem in batch]))
  File "<ipython-input-2-3b200bb6aac2>", line 363, in <listcomp>
    ubatch = list(set([ elem.item() for el

Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 30, 'd5': 10}  epochs: 800  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 30, 'd5': 20}  epochs: 100  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 30, 'd5': 20}  epochs: 200  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 30, 'd5': 20}  epochs: 400  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0


Traceback (most recent call last):
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 783, in modelSelection
    train_model(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 464, in train_model
    return train_model_META(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 448, in train_model_META
    out = model(data)
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "<ipython-input-2-3b200bb6aac2>", line 366, in forward
    x = F.relu(self.dense1_bn(self.fc1(x)))
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 4

Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 30, 'd5': 20}  epochs: 800  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 60, 'd5': 10}  epochs: 100  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 60, 'd5': 10}  epochs: 200  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 60, 'd5': 10}  epochs: 400  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0


Traceback (most recent call last):
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 783, in modelSelection
    train_model(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 464, in train_model
    return train_model_META(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 448, in train_model_META
    out = model(data)
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "<ipython-input-2-3b200bb6aac2>", line 366, in forward
    x = F.relu(self.dense1_bn(self.fc1(x)))
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 4

Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 60, 'd5': 10}  epochs: 800  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 60, 'd5': 20}  epochs: 100  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 60, 'd5': 20}  epochs: 200  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 60, 'd5': 20}  epochs: 400  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0


Traceback (most recent call last):
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 783, in modelSelection
    train_model(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 464, in train_model
    return train_model_META(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 448, in train_model_META
    out = model(data)
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "<ipython-input-2-3b200bb6aac2>", line 366, in forward
    x = F.relu(self.dense1_bn(self.fc1(x)))
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 4

Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 60, 'd5': 20}  epochs: 800  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0


Traceback (most recent call last):
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 783, in modelSelection
    train_model(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 464, in train_model
    return train_model_META(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 448, in train_model_META
    out = model(data)
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "<ipython-input-2-3b200bb6aac2>", line 366, in forward
    x = F.relu(self.dense1_bn(self.fc1(x)))
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 4

 trained model:  META3 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 100  val loss= 0.052314007033904396  val accuracy= 0.2976190476190476  val microF1= 0.3482905982905982  val macroF1= 0.34780534951141506
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 200  val loss= 0.10477696359157562  val accuracy= 0.75  val microF1= 0.7371794871794872  val macroF1= 0.7552799025917887


Traceback (most recent call last):
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 784, in modelSelection
    val_loss_model(model, loader_val, optimizer, val_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 573, in val_loss_model
    return val_loss_model_META(model, loader, optimizer, val_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 566, in val_loss_model_META
    measures = F1Score(total_acc, total_gt)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 193, in F1Score
    if pred[j] not in preddict[i] and pred[j] not in targetdict[i]:
KeyboardInterrupt


Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 400  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0


Traceback (most recent call last):
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 783, in modelSelection
    train_model(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 464, in train_model
    return train_model_META(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 448, in train_model_META
    out = model(data)
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "<ipython-input-2-3b200bb6aac2>", line 363, in forward
    ubatch = list(set([ elem.item() for elem in batch]))
  File "<ipython-input-2-3b200bb6aac2>", line 363, in <listcomp>
    ubatch = list(set([ elem.item() for el

Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 800  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 100  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0


Traceback (most recent call last):
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 783, in modelSelection
    train_model(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 464, in train_model
    return train_model_META(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 453, in train_model_META
    loss.backward()
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/tensor.py", line 102, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/autograd/__init__.py", line 90, in backward
    allow_unreachable=True)  # allow_unreachable flag
KeyboardInterrupt
Trac

Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 200  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 400  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0


Traceback (most recent call last):
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 783, in modelSelection
    train_model(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 464, in train_model
    return train_model_META(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 448, in train_model_META
    out = model(data)
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "<ipython-input-2-3b200bb6aac2>", line 351, in forward
    x2, edge_attr2, u2 =  self.op(x, edge_index, edge_attr, u, batch)
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/nn/

Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 800  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0
Problem training model META3


Traceback (most recent call last):
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 783, in modelSelection
    train_model(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 464, in train_model
    return train_model_META(model, loader, optimizer, train_loss_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 454, in train_model_META
    optimizer.step()
  File "/home/pau/.pyenv/versions/3.6.7/envs/gnn-pytorch/lib/python3.6/site-packages/torch/optim/adam.py", line 103, in step
    bias_correction1 = 1 - beta1 ** state['step']
KeyboardInterrupt

KeyboardInterrupt



In [None]:
bmodel = final_model_train(modelsdict['best_models']['loss'], train_dataset)
testresult = testModel(bmodel, test_dataset)
modelsdict['testing'][bmodel.__class__.__name__+'loss']=testresult

In [None]:
bmodel = final_model_train(modelsdict['best_models']['accuracy'], train_dataset)
testresult = testModel(bmodel, test_dataset)
modelsdict['testing'][bmodel.__class__.__name__+'accuracy']=testresult

In [None]:
bmodel = final_model_train(modelsdict['best_models']['microF1'], train_dataset)
testresult = testModel(bmodel, test_dataset)
modelsdict['testing'][bmodel.__class__.__name__+'microF1']=testresult

In [None]:
bmodel = final_model_train(modelsdict['best_models']['macroF1'], train_dataset)
testresult = testModel(bmodel, test_dataset)
modelsdict['testing'][bmodel.__class__.__name__+'macroF1']=testresult

In [None]:
reportAllTest(modelsdict)
saveResults(modelsdict)
# review microF1 & macroF1
# review Random Baseline

### Results
1. encapsulate all training, model selection,.. everything
2. present results with Pandas tables, and histograms
3. save models and results to disk, and load them later for testing
4. transform into a python module or package

### Pending:

- prepare another notebook using the python module (prepare local and on collab)
- test other GNN layers: GAT, GCN, GraphSAGE, Metalayer
- do a good HP search
- look for published architectures?
- compare with published benchmarks
