# Description
This notebook will experiment with:
- train & compare differnt GNN models for graph classificaiton in common benchmarks (PPI, Proteins, ENZYMES,..)
- compare results to publication results

Most of the experiments will be done in PyTorch/PyTorch Geometric, but some models are implemented in Tensor Flow.

In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import MessagePassing
#from torch_geometric.nn.conv.gated_graph_conv import GatedGraphConv
from torch_geometric.nn.glob.glob import global_mean_pool, global_add_pool
import torch.nn as nn
from torch.nn import Sequential as Seq, Linear as Lin, ReLU
from torch_scatter import scatter_mean
from torch_geometric.nn import MetaLayer
from torch_geometric.datasets import TUDataset, QM9, QM7b, PPI, Planetoid, KarateClub


from TFM_graph_classification import *

# 1. Models

In [2]:

class GGNN1(torch.nn.Module):
    def __init__(self, d1=50,d2=20,num_classes=6, num_layers=2, aggr_type='mean'):
        super(GGNN1, self).__init__()
        self.ggnn = GatedGraphConv(out_channels=d1, num_layers=num_layers,aggr=aggr_type, bias=True)
        self.fc1 = nn.Linear(d1, d2)
        self.fc2 = nn.Linear(d2, num_classes)
        self.global_pool = global_mean_pool
        
        

    def forward(self, data):
        x, edge_index, batch_vector = data.x, data.edge_index, data.batch

        x = self.ggnn(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training) # until here the output is for each node
        
        x = self.global_pool(x, batch_vector) # this makes the output to be graph level?
        #x = self.fc1(x)
        x = F.relu(self.fc1(x))
        #x = F.relu(self.fc2(x))
        x = self.fc2(x)
        #x = self.pool1(x, batch )
        x = F.log_softmax(x, dim=1)
        #x = torch.argmax(x, dim=1)  # we output softmax to use the nll_loss
        
        return x
    
class GGNN2(torch.nn.Module):
    def __init__(self, d1=50,d2=20,num_classes=6, num_layers=2, aggr_type='mean'):
        super(GGNN2, self).__init__()
        self.ggnn = GatedGraphConv(out_channels=d1, num_layers=num_layers,aggr=aggr_type, bias=True)
        self.fc1 = nn.Linear(d1, num_classes)
        self.global_pool = global_mean_pool
        
    def forward(self, data):
        x, edge_index, batch_vector = data.x, data.edge_index, data.batch

        x = self.ggnn(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training) # until here the output is for each node
        
        x = self.global_pool(x, batch_vector) # this makes the output to be graph level?
        
        x = self.fc1(x)
        x = F.log_softmax(x, dim=1)
        return x
    
    
class GGNN3(torch.nn.Module):
    def __init__(self, d1=50,d2=20, d3=10,num_classes=6, num_layers=2, aggr_type='mean'):
        super(GGNN3, self).__init__()
        self.ggnn = GatedGraphConv(out_channels=d1, num_layers=num_layers,aggr=aggr_type, bias=True)
        self.fc1 = nn.Linear(d1, d2)
        self.dense1_bn = nn.BatchNorm1d(d2)
        self.fc2 = nn.Linear(d2, d3)
        self.dense2_bn = nn.BatchNorm1d(d3)
        self.fc3 = nn.Linear(d3, num_classes)
        self.global_pool = global_mean_pool
        
    def forward(self, data):
        x, edge_index, batch_vector = data.x, data.edge_index, data.batch

        x = self.ggnn(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training) # until here the output is for each node
        
        x = self.global_pool(x, batch_vector) # this makes the output to be graph level?
        x = F.relu(self.dense1_bn(self.fc1(x)))
        x = F.relu(self.dense2_bn(self.fc2(x)))
        x = self.fc3(x)
        x = F.log_softmax(x, dim=1)
        return x
    
class GGNN4(torch.nn.Module):
    def __init__(self, d1=50,d2=20,num_classes=6, num_layers=2, aggr_type='mean'):
        super(GGNN4, self).__init__()
        self.ggnn = GatedGraphConv(out_channels=d1, num_layers=num_layers,aggr=aggr_type, bias=True)
        self.fc1 = nn.Linear(d1, d2)
        self.dense1_bn = nn.BatchNorm1d(d2)
        self.fc2 = nn.Linear(d2, num_classes)
        self.global_pool = global_mean_pool
        
        
    def forward(self, data):
        x, edge_index, batch_vector = data.x, data.edge_index, data.batch

        x = self.ggnn(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training) # until here the output is for each node
        
        x = self.global_pool(x, batch_vector) # this makes the output to be graph level?
        #x = self.fc1(x)
        x = F.relu(self.dense1_bn(self.fc1(x)))
        #x = F.relu(self.fc2(x))
        x = self.fc2(x)
        #x = self.pool1(x, batch )
        x = F.log_softmax(x, dim=1)
        #x = torch.argmax(x, dim=1)  # we output softmax to use the nll_loss
        
        return x
    
class META1(torch.nn.Module):
    def __init__(self, d1=3, d2=50, d3=15, d4 =15,d5=10,num_classes=6):
        super(META1, self).__init__()

        self.edge_mlp = Seq(Lin(d1*3, d2), ReLU(), Lin(d2, d3))
        self.node_mlp = Seq(Lin(d1*6, d2), ReLU(), Lin(d2, d3))
        self.global_mlp = Seq(Lin(d3+1, d2), ReLU(), Lin(d2, d3))
        
        self.fc1 = nn.Linear(d4, d5)
        self.dense1_bn = nn.BatchNorm1d(d5)
        self.fc2 = nn.Linear(d5, num_classes)
        self.dense2_bn = nn.BatchNorm1d(num_classes)
        self.global_pool = global_mean_pool

        def edge_model(source, target, edge_attr, u):
            # source, target: [E, F_x], where E is the number of edges.
            # edge_attr: [E, F_e]
            # u: [B, F_u], where B is the number of graphs.
            #print("edge_model")
            #print(source.size())
            #print(target.size())
            #print(edge_attr.size())
            out = torch.cat([source, target, edge_attr], dim=1)
            return self.edge_mlp(out)

        def node_model(x, edge_index, edge_attr, u):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            row, col = edge_index
            
            #print("node_model")
            #print(row.size())
            #print(col.size())
            #print(x[col].size())
            #print(edge_attr.size())
            
            out = torch.cat([x[col], edge_attr], dim=1)
            out = self.node_mlp(out)
            return scatter_mean(out, row, dim=0, dim_size=x.size(0))

        def global_model(x, edge_index, edge_attr, u, batch):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            # batch: [N] with max entry B - 1.
            
            #print("global_Model")
            #print("u.size():")
            #print(u.size())
            #print("scatter_mean(x,batch,..):")
            #smean = scatter_mean(x, batch, dim=0)
            #print(smean.size())
            
            out = torch.cat([u, scatter_mean(x, batch, dim=0)], dim=1)
            
            #print("out.size():")
            #print(out.size())
            return self.global_mlp(out)

        self.op = MetaLayer(edge_model, node_model, global_model)

    def forward(self, data):
        
        x, edge_index, edge_attr, u, batch = data.x, data.edge_index, data.edge_attr, data.u, data.batch        
        
        # output of meta is x,edge_attr, u
        x2, edge_attr2, u2 =  self.op(x, edge_index, edge_attr, u, batch)
        
        # idea1 is to cat x2, edge_attr2 and u2?
        # idea2 is to update edge_attr and u...
        data.x = x2
        data.edge_attr = edge_attr2
        data.u = u2

        # version using only u
        x = F.relu(self.dense1_bn(self.fc1(u2)))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x
        
    
    
class META2(torch.nn.Module):
    def __init__(self, d1=3, d2=50, d3=15, d4 =15,d5=10,num_classes=6):
        super(META2, self).__init__()

        self.edge_mlp = Seq(Lin(d1*3, d2), ReLU(), Lin(d2, d3))
        self.node_mlp = Seq(Lin(d1*6, d2), ReLU(), Lin(d2, d3))
        self.global_mlp = Seq(Lin(d3+1, d2), ReLU(), Lin(d2, d3))
        
        self.fc1 = nn.Linear(d4, d5)
        self.dense1_bn = nn.BatchNorm1d(d5)
        self.fc2 = nn.Linear(d5, num_classes)
        self.dense2_bn = nn.BatchNorm1d(num_classes)
        self.global_pool = global_mean_pool

        def edge_model(source, target, edge_attr, u):
            # source, target: [E, F_x], where E is the number of edges.
            # edge_attr: [E, F_e]
            # u: [B, F_u], where B is the number of graphs.
            #print("edge_model")
            #print(source.size())
            #print(target.size())
            #print(edge_attr.size())
            out = torch.cat([source, target, edge_attr], dim=1)
            return self.edge_mlp(out)

        def node_model(x, edge_index, edge_attr, u):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]

            
            row, col = edge_index
            
            #print("node_model")
            #print(row.size())
            #print(col.size())
            #print(x[col].size())
            #print(edge_attr.size())
            
            out = torch.cat([x[col], edge_attr], dim=1)
            out = self.node_mlp(out)
            return scatter_mean(out, row, dim=0, dim_size=x.size(0))

        def global_model(x, edge_index, edge_attr, u, batch):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            # batch: [N] with max entry B - 1.
            
            #print("global_Model")
            #print("u.size():")
            #print(u.size())
            #print("scatter_mean(x,batch,..):")
            #smean = scatter_mean(x, batch, dim=0)
            #print(smean.size())
            
            out = torch.cat([u, scatter_mean(x, batch, dim=0)], dim=1)
            
            #print("out.size():")
            #print(out.size())
            return self.global_mlp(out)

        self.op = MetaLayer(edge_model, node_model, global_model)

    def forward(self, data):
        
        x, edge_index, edge_attr, u, batch = data.x, data.edge_index, data.edge_attr, data.u, data.batch        
        
        # output of meta is x,edge_attr, u
        x2, edge_attr2, u2 =  self.op(x, edge_index, edge_attr, u, batch)
        
        # idea1 is to cat x2, edge_attr2 and u2?
        # idea2 is to update edge_attr and u...
        data.x = x2
        data.edge_attr = edge_attr2
        data.u = u2
        
        # version using only x 
        x = self.global_pool(x2,batch) # separate by graph level
        x = F.relu(self.dense1_bn(self.fc1(x)))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x
    

    
class META3(torch.nn.Module):
    def __init__(self, d1=3, d2=50, d3=15, d4 =15,d5=10,num_classes=6):
        super(META3, self).__init__()

        self.edge_mlp = Seq(Lin(d1*3, d2), ReLU(), Lin(d2, d3))
        self.node_mlp = Seq(Lin(d1*6, d2), ReLU(), Lin(d2, d3))
        self.global_mlp = Seq(Lin(d3+1, d2), ReLU(), Lin(d2, d3))
        
        self.fc1 = nn.Linear(d4, d5)
        self.dense1_bn = nn.BatchNorm1d(d5)
        self.fc2 = nn.Linear(d5, num_classes)
        self.dense2_bn = nn.BatchNorm1d(num_classes)
        self.global_pool = global_mean_pool

        def edge_model(source, target, edge_attr, u):
            # source, target: [E, F_x], where E is the number of edges.
            # edge_attr: [E, F_e]
            # u: [B, F_u], where B is the number of graphs.
            #print("edge_model")
            #print(source.size())
            #print(target.size())
            #print(edge_attr.size())
            out = torch.cat([source, target, edge_attr], dim=1)
            return self.edge_mlp(out)

        def node_model(x, edge_index, edge_attr, u):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]

            
            row, col = edge_index
            
            #print("node_model")
            #print(row.size())
            #print(col.size())
            #print(x[col].size())
            #print(edge_attr.size())
            
            out = torch.cat([x[col], edge_attr], dim=1)
            out = self.node_mlp(out)
            return scatter_mean(out, row, dim=0, dim_size=x.size(0))

        def global_model(x, edge_index, edge_attr, u, batch):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            # batch: [N] with max entry B - 1.
            
            #print("global_Model")
            #print("u.size():")
            #print(u.size())
            #print("scatter_mean(x,batch,..):")
            #smean = scatter_mean(x, batch, dim=0)
            #print(smean.size())
            
            out = torch.cat([u, scatter_mean(x, batch, dim=0)], dim=1)
            
            #print("out.size():")
            #print(out.size())
            return self.global_mlp(out)

        self.op = MetaLayer(edge_model, node_model, global_model)

    def forward(self, data):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        x, edge_index, edge_attr, u, batch = data.x, data.edge_index, data.edge_attr, data.u, data.batch        
        
        # output of meta is x,edge_attr, u
        x2, edge_attr2, u2 =  self.op(x, edge_index, edge_attr, u, batch)
        
        # idea1 is to cat x2, edge_attr2 and u2?
        # idea2 is to update edge_attr and u...
        data.x = x2
        data.edge_attr = edge_attr2
        data.u = u2


        # version using x and  u
        x = F.relu(torch.cat([x2,u2], dim=0))
        x = F.dropout(x, training=self.training) # until here the output is for each node
        ubatch = list(set([ elem.item() for elem in batch]))
        #print(ubatch)
        x = self.global_pool(x, torch.cat([batch,torch.LongTensor( ubatch).to(device) ],dim=0)) # this makes the output to be graph level?
        x = F.relu(self.dense1_bn(self.fc1(x)))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x
    
class META4(torch.nn.Module):
    def __init__(self, d1=3, d2=50, d3=15, d4 =15,d5=10,num_classes=6):
        super(META4, self).__init__()

        self.edge_mlp = Seq(Lin(d1*3, d2), ReLU(), Lin(d2, d3))
        self.node_mlp = Seq(Lin(d1*6, d2), ReLU(), Lin(d2, d3))
        self.global_mlp = Seq(Lin(16, d2), ReLU(), Lin(d2, d3))
        
        self.fc1 = nn.Linear(d4, d5)
        self.dense1_bn = nn.BatchNorm1d(d5)
        self.fc2 = nn.Linear(d5, num_classes)
        self.dense2_bn = nn.BatchNorm1d(num_classes)
        self.global_pool = global_mean_pool

        def edge_model(source, target, edge_attr, u):
            # source, target: [E, F_x], where E is the number of edges.
            # edge_attr: [E, F_e]
            # u: [B, F_u], where B is the number of graphs.
            #print("edge_model")
            #print(source.size())
            #print(target.size())
            #print(edge_attr.size())
            out = torch.cat([source, target, edge_attr], dim=1)
            return self.edge_mlp(out)

        def node_model(x, edge_index, edge_attr, u):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]

            
            row, col = edge_index
            
            #print("node_model")
            #print(row.size())
            #print(col.size())
            #print(x[col].size())
            #print(edge_attr.size())
            
            out = torch.cat([x[col], edge_attr], dim=1)
            out = self.node_mlp(out)
            return scatter_mean(out, row, dim=0, dim_size=x.size(0))

        def global_model(x, edge_index, edge_attr, u, batch):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            # batch: [N] with max entry B - 1.
            
            #print("global_Model")
            #print("u.size():")
            #print(u.size())
            #print("scatter_mean(x,batch,..):")
            #smean = scatter_mean(x, batch, dim=0)
            #print(smean.size())
            
            out = torch.cat([u, scatter_mean(x, batch, dim=0)], dim=1)
            
            #print("out.size():")
            #print(out.size())
            return self.global_mlp(out)

        self.op = MetaLayer(edge_model, node_model, global_model)

    def forward(self, data):
        
        x, edge_index, edge_attr, u, batch = data.x, data.edge_index, data.edge_attr, data.u, data.batch        
        
        # output of meta is x,edge_attr, u
        x2, edge_attr2, u2 =  self.op(x, edge_index, edge_attr, u, batch)
        
        # idea1 is to cat x2, edge_attr2 and u2?
        # idea2 is to update edge_attr and u...
        data.x = x2
        data.edge_attr = edge_attr2
        data.u = u2

    
        # version using x and  u and edge_attr
        x = F.relu(torch.cat([x2,u2], dim=0))
        #x = x2
        x = F.dropout(x, training=self.training) # until here the output is for each node
        
        x = self.global_pool(x, torch.cat([batch, ],dim=0)) # this makes the output to be graph level?
        x = F.relu(self.dense1_bn(self.fc1(x)))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x
    
    
class META5(torch.nn.Module):
    """
        Not using edge attribute
    """
    def __init__(self, d1=3, d2=50, d3=15):
        super(META5, self).__init__()

        self.edge_mlp = Seq(Lin(d1*2, d2), ReLU(), Lin(d2, d3))
        self.node_mlp = Seq(Lin(d1, d2), ReLU(), Lin(d2, d3))
        self.global_mlp = Seq(Lin(2, d2), ReLU(), Lin(d2, d3))

        def edge_model(source, target, edge_attr, u):
            # source, target: [E, F_x], where E is the number of edges.
            # edge_attr: [E, F_e]
            # u: [B, F_u], where B is the number of graphs.
            out = torch.cat([source, target], dim=1)
            #print("edge_model")
            #print(out.size())
            return self.edge_mlp(out)

        def node_model(x, edge_index, edge_attr, u):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            row, col = edge_index
            out = torch.cat([x[col]], dim=1)
            out = self.node_mlp(out)
            return scatter_mean(out, row, dim=0, dim_size=x.size(0))

        def global_model(x, edge_index, edge_attr, u, batch):
            # x: [N, F_x], where N is the number of nodes.
            # edge_index: [2, E] with max entry N - 1.
            # edge_attr: [E, F_e]
            # u: [B, F_u]
            # batch: [N] with max entry B - 1.
            out = torch.cat([u, scatter_mean(x, batch, dim=0)], dim=1)
            
            return self.global_mlp(out)

        self.op = MetaLayer(edge_model, node_model, global_model)

    def forward(self, x, edge_index, edge_attr, u, batch):
        #print("Forward: ")
        #print(x.size())
        return self.op(x, edge_index, edge_attr, u, batch)

In [3]:

model_list21 = []
for modelclass in [GGNN1]:
    for d1 in [25,50]:
        for d2 in [20,50]:
            for aggr_type in ['mean','add']:
                for epochs in [300,600]:
                    for num_layers in [2,4,8]:
                        for lr in [0.01]:
                            for wd in [5e-4]:
                                model_list21.append(
                                    {
                                    'model': modelclass,
                                    'epochs': epochs,
                                    'kwargs':{'d1': d1,'d2': d2,'num_layers':num_layers, 
                                              'aggr_type':aggr_type}, 
                                    'learning_rate': lr, 'weight_decay':wd, 
                                    'batch_size': 32},
                                )
model_list22 = []
for modelclass in [GGNN2]:
    for d1 in [25,50]:
        for d2 in [20,50]:
            for aggr_type in ['mean','add']:
                for epochs in [300,600]:
                    for num_layers in [2,4,8]:
                        for lr in [0.01]:
                            for wd in [5e-4]:
                                model_list22.append(
                                    {
                                    'model': modelclass,
                                    'epochs': epochs,
                                    'kwargs':{'d1': d1,'d2': d2,'num_layers':num_layers, 
                                              'aggr_type':aggr_type}, 
                                    'learning_rate': lr, 'weight_decay':wd, 
                                    'batch_size': 32},
                                )
                                
model_list23 = []
for modelclass in [GGNN3]:
    for d1 in [25,50]:
        for d2 in [20,50]:
            for aggr_type in ['mean','add']:
                for epochs in [300,600]:
                    for num_layers in [2,4,8]:
                        for lr in [0.01]:
                            for wd in [5e-4]:
                                model_list23.append(
                                    {
                                    'model': modelclass,
                                    'epochs': epochs,
                                    'kwargs':{'d1': d1,'d2': d2,'num_layers':num_layers, 
                                              'aggr_type':aggr_type}, 
                                    'learning_rate': lr, 'weight_decay':wd, 
                                    'batch_size': 32},
                                )
                                
model_list24 = []
for modelclass in [GGNN4]:
    for d1 in [25,50]:
        for d2 in [20,50]:
            for aggr_type in ['mean','add']:
                for epochs in [300,600]:
                    for num_layers in [2,4,8]:
                        for lr in [0.01]:
                            for wd in [5e-4]:
                                model_list24.append(
                                    {
                                    'model': modelclass,
                                    'epochs': epochs,
                                    'kwargs':{'d1': d1,'d2': d2,'num_layers':num_layers, 
                                              'aggr_type':aggr_type}, 
                                    'learning_rate': lr, 'weight_decay':wd, 
                                    'batch_size': 32},
                                )
                                
model_list25 = []
for modelclass in [META3,]:
    for d1 in [3]:
        for d2 in [10,20,50]:
            for d3 in [15]:
                for d4 in [15]:
                    for d5 in [10,20]:
                        for epochs in [300,600]:
                            model_list25.append(
                                {'epochs': epochs,
                                'model': modelclass,
                                'kwargs':{'d1': d1,'d2': d2, 'd3': d3, 'd4': d4, 'd5':d5}, 
                                'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 64},
                            )
                                
model_list26 = []
for modelclass in [META2,]:
    for d1 in [3]:
        for d2 in [10,20,50]:
            for d3 in [15]:
                for d4 in [15]:
                    for d5 in [10,20]:
                        for epochs in [300,600]:
                            model_list26.append(
                                {'epochs': epochs,
                                'model': modelclass,
                                'kwargs':{'d1': d1,'d2': d2, 'd3': d3, 'd4': d4, 'd5':d5}, 
                                'learning_rate': 0.01, 'weight_decay':5e-4, 'batch_size': 64},
                            )
                              
                               



In [4]:
thename='PROTEINS'
dataset = TUDataset(root='/tmp/'+thename, name=thename)
dataset = dataset.shuffle()
k = 3
n = len(dataset)
print(" n:",n," k folds=",k)
train_dataset, test_dataset = balancedDatasetSplit_slice(dataset, prop=0.8)
print("Datasets balancing: ")
printDatasetBalance(dataset )
printDatasetBalance(train_dataset )
printDatasetBalance(test_dataset )
print()




 n: 1113  k folds= 3
Datasets balancing: 
{0: 663, 1: 450}
{0: 530, 1: 360}
{0: 133, 1: 90}

 trained model:  GGNN1 {'d1': 25, 'd2': 20, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.021165318166216213  val accuracy= 0.6666666666666666  val microF1= 0.7117117117117117  val macroF1= 0.7016542824156032
 trained model:  GGNN1 {'d1': 25, 'd2': 20, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.02079047014315923  val accuracy= 0.6666666666666666  val microF1= 0.6936936936936937  val macroF1= 0.6771271184683938
 trained model:  GGNN1 {'d1': 25, 'd2': 20, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.022811940560738247  val accuracy= 0.5833333333333334  val microF1= 0.596846846846847  val macroF1= 0.45573775631242713
 trained model:  GGNN1 {'d1': 25, 'd2': 20, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.021463000526030857  val accuracy= 0.875  val microF1= 0.7353603603603603  val macroF1= 0.720512132252419
 trained model

 trained model:  GGNN1 {'d1': 50, 'd2': 50, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.021363036707043648  val accuracy= 0.625  val microF1= 0.6531531531531533  val macroF1= 0.5735633628278833
 trained model:  GGNN1 {'d1': 50, 'd2': 50, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.022740958879391353  val accuracy= 0.5833333333333334  val microF1= 0.5945945945945946  val macroF1= 0.37288135593220345
 trained model:  GGNN1 {'d1': 50, 'd2': 50, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.019932031631469727  val accuracy= 0.5416666666666666  val microF1= 0.7218468468468467  val macroF1= 0.7065767776543134
 trained model:  GGNN1 {'d1': 50, 'd2': 50, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.019462216024597485  val accuracy= 0.7916666666666666  val microF1= 0.7229729729729729  val macroF1= 0.7068958342007553
 trained model:  GGNN1 {'d1': 50, 'd2': 50, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 600  val loss

In [17]:

modelsdict1 = modelSelection(model_list21,k, train_dataset, balanced=False)
reportModelSelectionResult(modelsdict1)
#!cp -r models /content/drive/My\ Drive/TFM/graph_classification/
#!cp -r results /content/drive/My\ Drive/TFM/graph_classification/



 trained model:  GGNN1 {'d1': 25, 'd2': 20, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.019131358712911606  val accuracy= 0.7916666666666666  val microF1= 0.7432432432432433  val macroF1= 0.732588097620924
 trained model:  GGNN1 {'d1': 25, 'd2': 20, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.020590096712112427  val accuracy= 0.6666666666666666  val microF1= 0.6914414414414414  val macroF1= 0.6816959011782355
 trained model:  GGNN1 {'d1': 25, 'd2': 20, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.022912309194604557  val accuracy= 0.5833333333333334  val microF1= 0.5945945945945946  val macroF1= 0.37288135593220345
 trained model:  GGNN1 {'d1': 25, 'd2': 20, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.02257305011153221  val accuracy= 0.6666666666666666  val microF1= 0.7286036036036037  val macroF1= 0.7160370694372511
 trained model:  GGNN1 {'d1': 25, 'd2': 20, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 60

 trained model:  GGNN1 {'d1': 50, 'd2': 50, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.021439513191580772  val accuracy= 0.5833333333333334  val microF1= 0.6677927927927927  val macroF1= 0.6536367795328625
 trained model:  GGNN1 {'d1': 50, 'd2': 50, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.022908736641208332  val accuracy= 0.625  val microF1= 0.5945945945945946  val macroF1= 0.37288135593220345
 trained model:  GGNN1 {'d1': 50, 'd2': 50, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.0213920921087265  val accuracy= 0.625  val microF1= 0.713963963963964  val macroF1= 0.7015546165356698
 trained model:  GGNN1 {'d1': 50, 'd2': 50, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.02169240452349186  val accuracy= 0.75  val microF1= 0.6385135135135135  val macroF1= 0.5440757897245487
 trained model:  GGNN1 {'d1': 50, 'd2': 50, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.02283031182984511  val accu

In [5]:

modelsdict2 = modelSelection(model_list22,k, train_dataset)
reportModelSelectionResult(modelsdict2)
#!cp -r models /content/drive/My\ Drive/TFM/graph_classification/
#!cp -r results /content/drive/My\ Drive/TFM/graph_classification/




 trained model:  GGNN2 {'d1': 25, 'd2': 20, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.022138381376862526  val accuracy= 0.6333333333333333  val microF1= 0.6181157572667005  val macroF1= 0.6233140209962138
 trained model:  GGNN2 {'d1': 25, 'd2': 20, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.021998091290394466  val accuracy= 0.6083333333333333  val microF1= 0.6660717321094679  val macroF1= 0.6715405623687148
 trained model:  GGNN2 {'d1': 25, 'd2': 20, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.02313585641483466  val accuracy= 0.7000000000000001  val microF1= 0.49815145334013256  val macroF1= 0.4910184736913468
 trained model:  GGNN2 {'d1': 25, 'd2': 20, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.02165457730491956  val accuracy= 0.6833333333333332  val microF1= 0.6957547169811321  val macroF1= 0.6880135096360268
 trained model:  GGNN2 {'d1': 25, 'd2': 20, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 60

 trained model:  GGNN2 {'d1': 50, 'd2': 50, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.02265792340040207  val accuracy= 0.47500000000000003  val microF1= 0.6005864354920959  val macroF1= 0.6171070683188044
 trained model:  GGNN2 {'d1': 50, 'd2': 50, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.023780447741349537  val accuracy= 0.525  val microF1= 0.5467661057283699  val macroF1= 0.5061740105479332
 trained model:  GGNN2 {'d1': 50, 'd2': 50, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.02050648753841718  val accuracy= 0.4916666666666667  val microF1= 0.6802226755056943  val macroF1= 0.6799192349644736
 trained model:  GGNN2 {'d1': 50, 'd2': 50, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.02158302813768387  val accuracy= 0.8416666666666667  val microF1= 0.6263810980792113  val macroF1= 0.5550372103023963
 trained model:  GGNN2 {'d1': 50, 'd2': 50, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 600  val loss= 0

In [6]:
modelsdict3 = modelSelection(model_list23,k, train_dataset)
reportModelSelectionResult(modelsdict3)
#!cp -r models /content/drive/My\ Drive/TFM/graph_classification/
#!cp -r results /content/drive/My\ Drive/TFM/graph_classification/



 trained model:  GGNN3 {'d1': 25, 'd2': 20, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.0198644424478213  val accuracy= 0.6583333333333333  val microF1= 0.7092469828318885  val macroF1= 0.6924892286615698
 trained model:  GGNN3 {'d1': 25, 'd2': 20, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.02572790967921416  val accuracy= 0.525  val microF1= 0.5649116097229304  val macroF1= 0.39902788327984506
 trained model:  GGNN3 {'d1': 25, 'd2': 20, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.02749832657476266  val accuracy= 0.48333333333333334  val microF1= 0.5743668196498385  val macroF1= 0.5989459729451432
 trained model:  GGNN3 {'d1': 25, 'd2': 20, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.020576929052670796  val accuracy= 0.7666666666666666  val microF1= 0.6818162502124766  val macroF1= 0.6529564832346332
 trained model:  GGNN3 {'d1': 25, 'd2': 20, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 600  val loss= 0

 trained model:  GGNN3 {'d1': 50, 'd2': 50, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.023100221529603004  val accuracy= 0.6749999999999999  val microF1= 0.6606323304436512  val macroF1= 0.5756081921265678
 trained model:  GGNN3 {'d1': 50, 'd2': 50, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.02776806304852168  val accuracy= 0.525  val microF1= 0.46931837497875234  val macroF1= 0.5059990575344765
 trained model:  GGNN3 {'d1': 50, 'd2': 50, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.02217839037378629  val accuracy= 0.7000000000000001  val microF1= 0.6635645079041306  val macroF1= 0.6599952036204638
 trained model:  GGNN3 {'d1': 50, 'd2': 50, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.026496889690558117  val accuracy= 0.39166666666666666  val microF1= 0.43553459119496857  val macroF1= 0.37924683467699055
 trained model:  GGNN3 {'d1': 50, 'd2': 50, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 600  val los

In [7]:

modelsdict4 = modelSelection(model_list24,k, train_dataset)
reportModelSelectionResult(modelsdict4)
#!cp -r models /content/drive/My\ Drive/TFM/graph_classification/
#!cp -r results /content/drive/My\ Drive/TFM/graph_classification/



 trained model:  GGNN4 {'d1': 25, 'd2': 20, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.019712908814350765  val accuracy= 0.6916666666666668  val microF1= 0.6950535441101479  val macroF1= 0.6916366924776857
 trained model:  GGNN4 {'d1': 25, 'd2': 20, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.024213384216030438  val accuracy= 0.5499999999999999  val microF1= 0.6038373278939316  val macroF1= 0.6136665825498611
 trained model:  GGNN4 {'d1': 25, 'd2': 20, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.029032019898295403  val accuracy= 0.39999999999999997  val microF1= 0.5660377358490566  val macroF1= 0.3592592592592592
 trained model:  GGNN4 {'d1': 25, 'd2': 20, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.020998502150177956  val accuracy= 0.7333333333333334  val microF1= 0.7188934217236104  val macroF1= 0.7182806385900807
 trained model:  GGNN4 {'d1': 25, 'd2': 20, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 

 trained model:  GGNN4 {'d1': 50, 'd2': 50, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.02611234039068222  val accuracy= 0.5583333333333333  val microF1= 0.6133350331463538  val macroF1= 0.5297177149009978
 trained model:  GGNN4 {'d1': 50, 'd2': 50, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 300  val loss= 0.02923895666996638  val accuracy= 0.47500000000000003  val microF1= 0.43396226415094336  val macroF1= 0.36899599395037846
 trained model:  GGNN4 {'d1': 50, 'd2': 50, 'num_layers': 2, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.020430763562520344  val accuracy= 0.7416666666666667  val microF1= 0.7076534081251062  val macroF1= 0.6840628155466053
 trained model:  GGNN4 {'d1': 50, 'd2': 50, 'num_layers': 4, 'aggr_type': 'mean'}  epochs: 600  val loss= 0.025545764714479446  val accuracy= 0.5750000000000001  val microF1= 0.5964218935917049  val macroF1= 0.5242147965936481
 trained model:  GGNN4 {'d1': 50, 'd2': 50, 'num_layers': 8, 'aggr_type': 'mean'}  epochs: 

In [8]:

modelsdict5 = modelSelection(model_list25,k, train_dataset)
reportModelSelectionResult(modelsdict5)
#!cp -r models /content/drive/My\ Drive/TFM/graph_classification/
#!cp -r results /content/drive/My\ Drive/TFM/graph_classification/



 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 300  val loss= 0.009408975951373577  val accuracy= 0.725  val microF1= 0.74275454699983  val macroF1= 0.7338612667380917
Problem training model META3
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 600  val loss= 0.0  val accuracy= 0.0  val microF1= 0.0  val macroF1= 0.0


Traceback (most recent call last):
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 917, in modelSelection
    val_loss_model(model, loader_val, optimizer, val_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 699, in val_loss_model
    return val_loss_model_META(model, loader, optimizer, val_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 692, in val_loss_model_META
    measures = F1Score(total_acc, total_gt)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 171, in F1Score
    preddict[pred[i]].append(i)
KeyError: 4


 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 300  val loss= 0.009422309075792631  val accuracy= 0.8250000000000001  val microF1= 0.7314932857385688  val macroF1= 0.7272838415919992
 trained model:  META3 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 600  val loss= 0.009759203530848026  val accuracy= 0.75  val microF1= 0.7200195478497365  val macroF1= 0.7172332220506608
 trained model:  META3 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 300  val loss= 0.009310266934335232  val accuracy= 0.7583333333333333  val microF1= 0.7472590515043347  val macroF1= 0.7371908787805023
 trained model:  META3 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 600  val loss= 0.009358911464611689  val accuracy= 0.7416666666666666  val microF1= 0.73419173890872  val macroF1= 0.7275254721826167
 trained model:  META3 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 300  val loss= 0.009664067377646765  val accuracy= 0.708333333

In [9]:

modelsdict6 = modelSelection(model_list26,k, train_dataset)
reportModelSelectionResult(modelsdict6)
#!cp -r models /content/drive/My\ Drive/TFM/graph_classification/
#!cp -r results /content/drive/My\ Drive/TFM/graph_classification/


 trained model:  META2 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 300  val loss= 0.010608522221446037  val accuracy= 0.6666666666666666  val microF1= 0.6880843107258201  val macroF1= 0.6748070697624392
 trained model:  META2 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 600  val loss= 0.011150006204843521  val accuracy= 0.6416666666666667  val microF1= 0.6513895971443141  val macroF1= 0.654121828433381
 trained model:  META2 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 300  val loss= 0.010687325149774551  val accuracy= 0.6333333333333333  val microF1= 0.6644356620771715  val macroF1= 0.6501722470779756
 trained model:  META2 {'d1': 3, 'd2': 10, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 600  val loss= 0.014482046787937483  val accuracy= 0.6166666666666666  val microF1= 0.566781404045555  val macroF1= 0.5480852286292531
 trained model:  META2 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 300  val loss= 0.011060274516542753  val accurac

Traceback (most recent call last):
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 917, in modelSelection
    val_loss_model(model, loader_val, optimizer, val_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 699, in val_loss_model
    return val_loss_model_META(model, loader, optimizer, val_history)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 692, in val_loss_model_META
    measures = F1Score(total_acc, total_gt)
  File "/media/disk/home/pau/Projectes/GNN-MThesis/src/graph_classification/TFM_graph_classification.py", line 171, in F1Score
    preddict[pred[i]].append(i)
KeyError: 3


 trained model:  META2 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 300  val loss= 0.012194599645833174  val accuracy= 0.6666666666666666  val microF1= 0.5873066462689104  val macroF1= 0.608856547588013
 trained model:  META2 {'d1': 3, 'd2': 20, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 600  val loss= 0.011239480848113695  val accuracy= 0.6749999999999999  val microF1= 0.6644569097399285  val macroF1= 0.6572826730337141
 trained model:  META2 {'d1': 3, 'd2': 50, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 300  val loss= 0.011374074034392834  val accuracy= 0.6666666666666666  val microF1= 0.6545554988951215  val macroF1= 0.6547204077052945
 trained model:  META2 {'d1': 3, 'd2': 50, 'd3': 15, 'd4': 15, 'd5': 10}  epochs: 600  val loss= 0.01329769721875588  val accuracy= 0.6166666666666667  val microF1= 0.640595784463709  val macroF1= 0.6542349664716814
 trained model:  META2 {'d1': 3, 'd2': 50, 'd3': 15, 'd4': 15, 'd5': 20}  epochs: 300  val loss= 0.01067391379425923  val accuracy=

In [22]:

resultsdict3={}
resultsdict3['best_models_list']=[]
resultsdict3['best_models_list'].append(modelsdict1['best_models'])
resultsdict3['best_models_list'].append(modelsdict2['best_models'])
resultsdict3['best_models_list'].append(modelsdict4['best_models'])
resultsdict3['best_models_list'].append(modelsdict5['best_models'])
resultsdict3['best_models_list'].append(modelsdict6['best_models'])
#resultsdict3['best_models_list'].append(modelsdict3['best_models'])
modelsdict['best_models_list']=resultsdict3['best_models_list']

In [2]:
#print([(mod['model'].__name__, round(float(mod['cv_val_accuracy']),2) ) for m in modelsdict['best_models_list']
#      for measure,mod in m.items()])

# gather top 5 models by accuracy
selection = [ (round(float(mod['cv_val_accuracy']),2), mod) for m in modelsdict['best_models_list']
      for measure,mod in m.items()]
selection = sorted(selection, key=lambda x: float(x[0]), reverse=True )
selection = selection[:5]

NameError: name 'modelsdict' is not defined

## Resuming notebook 

In [4]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import MessagePassing
#from torch_geometric.nn.conv.gated_graph_conv import GatedGraphConv
from torch_geometric.nn.glob.glob import global_mean_pool, global_add_pool
import torch.nn as nn
from torch.nn import Sequential as Seq, Linear as Lin, ReLU
from torch_scatter import scatter_mean
from torch_geometric.nn import MetaLayer
from torch_geometric.datasets import TUDataset, QM9, QM7b, PPI, Planetoid, KarateClub


from TFM_graph_classification import *

global device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# load dataset
thename='PROTEINS'
dataset = TUDataset(root='/tmp/'+thename, name=thename)
dataset = dataset.shuffle()
k = 3
n = len(dataset)
print(" n:",n," k folds=",k)
train_dataset, test_dataset = balancedDatasetSplit_slice(dataset, prop=0.8)
print("Datasets balancing: ")
printDatasetBalance(dataset )
printDatasetBalance(train_dataset )
printDatasetBalance(test_dataset )
print()


# load selection from saved models
modelsdict={}
modelsdict['models']=[]
modelsdict['best_models_list']=[]

filenames =[
    './models/proteins/GGNN1__d1-25_d2-20_num_layers-2_aggr_type-mean_300_0.01_0.0005_32_date2019-04-02_15-39-45',
    './models/proteins/GGNN1__d1-25_d2-20_num_layers-2_aggr_type-mean_300_0.01_0.0005_32_date2019-04-02_15-39-45',
    './models/proteins/GGNN1__d1-25_d2-20_num_layers-2_aggr_type-mean_600_0.01_0.0005_32_date2019-04-02_04-01-15',
    './models/proteins/GGNN1__d1-25_d2-50_num_layers-2_aggr_type-mean_600_0.01_0.0005_32_date2019-04-02_04-01-15',
    './models/proteins/GGNN1__d1-50_d2-20_num_layers-2_aggr_type-add_300_0.01_0.0005_32_date2019-04-02_15-39-45',
    './models/proteins/GGNN1__d1-50_d2-20_num_layers-2_aggr_type-mean_300_0.01_0.0005_32_date2019-04-02_04-01-15',
    './models/proteins/GGNN1__d1-50_d2-20_num_layers-4_aggr_type-mean_600_0.01_0.0005_32_date2019-04-02_15-39-45',
    './models/proteins/GGNN2__d1-25_d2-20_num_layers-2_aggr_type-add_300_0.01_0.0005_32_date2019-04-02_06-02-40',
    './models/proteins/GGNN2__d1-50_d2-50_num_layers-2_aggr_type-add_600_0.01_0.0005_32_date2019-04-02_06-02-40',
    './models/proteins/GGNN2__d1-50_d2-50_num_layers-2_aggr_type-mean_600_0.01_0.0005_32_date2019-04-02_06-02-40',
    './models/proteins/GGNN2__d1-50_d2-50_num_layers-4_aggr_type-mean_600_0.01_0.0005_32_date2019-04-02_06-02-40',
    './models/proteins/GGNN3__d1-25_d2-20_num_layers-2_aggr_type-add_300_0.01_0.0005_32_date2019-04-02_08-27-20',
    './models/proteins/GGNN3__d1-25_d2-50_num_layers-2_aggr_type-add_300_0.01_0.0005_32_date2019-04-02_08-27-20',
    './models/proteins/GGNN4__d1-25_d2-20_num_layers-2_aggr_type-mean_600_0.01_0.0005_32_date2019-04-02_10-43-22',
    './models/proteins/GGNN4__d1-50_d2-20_num_layers-2_aggr_type-add_600_0.01_0.0005_32_date2019-04-02_10-43-22',
    './models/proteins/GGNN4__d1-50_d2-20_num_layers-2_aggr_type-mean_300_0.01_0.0005_32_date2019-04-02_10-43-22',
    './models/proteins/GGNN4__d1-50_d2-50_num_layers-2_aggr_type-mean_300_0.01_0.0005_32_date2019-04-02_10-43-22',
    './models/proteins/META2__d1-3_d2-10_d3-15_d4-15_d5-10_300_0.01_0.0005_64_date2019-04-02_12-52-47',
    './models/proteins/META2__d1-3_d2-20_d3-15_d4-15_d5-10_600_0.01_0.0005_64_date2019-04-02_12-52-47',
    './models/proteins/META2__d1-3_d2-50_d3-15_d4-15_d5-20_600_0.01_0.0005_64_date2019-04-02_12-52-47',
    './models/proteins/META3__d1-3_d2-10_d3-15_d4-15_d5-10_600_0.01_0.0005_64_date2019-04-02_12-33-04',
    './models/proteins/META3__d1-3_d2-10_d3-15_d4-15_d5-20_300_0.01_0.0005_64_date2019-04-02_12-33-04',
    './models/proteins/META3__d1-3_d2-20_d3-15_d4-15_d5-10_300_0.01_0.0005_64_date2019-04-02_12-33-04',
]
for path in filenames:
    modeldict = loadModelFromFile(path)
    modelsdict['models'].append(modeldict)
    modelsdict['best_models_list'].append(modeldict)
    modelsdict['testing']={}
    modelsdict['best_models']={}

selection = [ (round(float(0),2), m) for m in modelsdict['best_models_list'] ]




 n: 1113  k folds= 3
Datasets balancing: 
{0: 663, 1: 450}
{0: 530, 1: 360}
{0: 133, 1: 90}



In [5]:
device

device(type='cuda')

## Final results report

In [6]:

for i in range(len(selection)):    
    bmodel = final_model_train(selection[i][1], train_dataset)
    testresult = testModel(bmodel, test_dataset)
    modelsdict['testing'][bmodel.__class__.__name__+'_'+str(i)]=testresult



reportAllTest(modelsdict)
saveResults(modelsdict)
#!cp -r models /content/drive/My\ Drive/TFM/graph_classification/
#!cp -r results /content/drive/My\ Drive/TFM/graph_classification/

len(test_dataset):  223
num graphs:  223
tensor([0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
        0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1,
        0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
        0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
        0, 1, 0, 0, 0, 0, 0], device='cuda:0')
tensor([1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1,
        0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,
        1, 0, 0, 1, 1, 0, 0, 0, 

len(test_dataset):  223
num graphs:  223
tensor([1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,
        0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
        1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
        0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0,
        0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
        1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,
        0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
        0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
        0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
        0, 1, 0, 1, 1, 0, 0], device='cuda:0')
tensor([1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
        1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
        1, 0, 0, 0, 1, 0, 0, 0, 

len(test_dataset):  223
num graphs:  223
tensor([0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0,
        1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0,
        1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
        0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0,
        1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0,
        1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1,
        0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1,
        1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
        0, 1, 1, 1, 1, 0, 0], device='cuda:0')
tensor([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0,
        0, 0, 1, 0, 0, 0, 0, 0, 

len(test_dataset):  223
num graphs:  223
tensor([0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1,
        0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1,
        0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0,
        1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
        0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
        1, 0, 0, 0, 0, 0, 0], device='cuda:0')
tensor([1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0,
        0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0,
        1, 0, 1, 1, 0, 0, 0, 1, 

len(test_dataset):  223
num graphs:  223
tensor([1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
        0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
        0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,
        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
        0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1,
        1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1,
        0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0,
        0, 1, 1, 0, 1, 0, 0], device='cuda:0')
tensor([1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
        0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,
        0, 1, 0, 0, 1, 0, 0, 0, 

Unnamed: 0,accuracy,macroF1,microF1,name
0,0.7399,0.7254,0.7399,GGNN1_0
1,0.722,0.7102,0.722,GGNN1_1
2,0.7534,0.7425,0.7534,GGNN1_2
3,0.7444,0.7293,0.7444,GGNN1_3
4,0.7399,0.7246,0.7399,GGNN1_4
5,0.7265,0.712,0.7265,GGNN1_5
6,0.5964,0.3736,0.5964,GGNN1_6
7,0.7623,0.7496,0.7623,GGNN2_7
8,0.7399,0.7249,0.7399,GGNN2_8
9,0.7399,0.7255,0.7399,GGNN2_9


## Proteins dataset benchmark

Paper "Distinguishing Enzyme Structures from Non-enzymes Without Alignments", develops 2 algorithms that predict enzyme from non-enzyme with **77%** and **80%** accuracy.


Paper  "Protein function prediction via graph kernels" , **72.33** to **84.08%** accuracy.

### References
- [4] K. M. Borgwardt, C. S. Ong, S. Schoenauer, S. V. N. Vishwanathan, A. J. Smola, and H. P. Kriegel. Protein function prediction via graph kernels. Bioinformatics, 21(Suppl 1):i47–i56, Jun 2005.
- [6] P. D. Dobson and A. J. Doig. Distinguishing enzyme structures from non-enzymes without alignments. J. Mol. Biol., 330(4):771–783, Jul 2003.



### Results
1. encapsulate all training, model selection,.. everything
2. present results with Pandas tables, and histograms
3. save models and results to disk, and load them later for testing
4. transform into a python module or package
5. prepare another notebook using the python module (prepare local and on collab)
6. test other GNN layers: GAT, GCN, GraphSAGE, Metalayer
7. do a good HP search
8. repeat HP for , PROTEINS, IMDB

### Pending:

- ko-repeat for PPI, REDDIT 
- report results (top 5 best accruacy/F1 models)
- compare with published benchmarks
- baselines (random and MLP with node features)
- look for published architectures?

