# Train a Graph Neural Network

Let's now apply everything you learned about GNNs. We are starting with some imports. `torch` and `numpy` are a must. 

`torchnet` is an extension of torch, but here we will only include that to handle the graph data. Don't be too concerned about it. You won't going to need it.

`networkx` is an amazing library for the creation, manipulation and plotting of graphs. We briefly encountered it when we talked about eigennalues and eigen vectors

In [None]:
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
import torchnet as tnt
import networkx as nx

## Load data and graph utils

Here we load the MUTAG dataset as a `networkx` graph and transform it to a Pytorch dataset. Each node in the dataset contains a label from 0 to 6 which will be used as a one-hot-encoding feature vector. From the 188 graphs nodes, we will use 150 for training and the rest for validation. We have two classes. Don't focus too much on the data loading part. The important stuff is what comes next.Besides, the goal is to demonstrate that graph neural networks are a great fit for such data. 

In [None]:
import os
import sys
cwd = os.getcwd()
#add MUTAG data in the environment
sys.path.append(cwd + '/../MUTAG')


""" Download MUTAG dataset"""
""" Extra graph utils and data loading stuff"""

def indices_to_one_hot(number, nb_classes, label_dummy=-1):
    """Convert an iterable of indices to one-hot encoded labels."""
    if number == label_dummy:
        return np.zeros(nb_classes)
    else:
        return np.eye(nb_classes)[number]

def get_graph_signal(nx_graph):
    d = dict((k, v) for k, v in nx_graph.nodes.items())
    x = []
    invd = {}
    j = 0
    for k, v in d.items():
        x.append(v['attr_dict'])
        invd[k] = j
        j = j + 1
    return np.array(x)


def load_data(path, ds_name, use_node_labels=True, max_node_label=10):
    node2graph = {}
    Gs = []
    data = []
    dataset_graph_indicator = f"{ds_name}_graph_indicator.txt"
    dataset_adj = f"{ds_name}_A.txt"
    dataset_node_labels = f"{ds_name}_node_labels.txt"
    dataset_graph_labels = f"{ds_name}_graph_labels.txt"

    path_graph_indicator = os.path.join(path,dataset_graph_indicator)
    path_adj = os.path.join(path,dataset_adj)
    path_node_lab = os.path.join(path,dataset_node_labels)
    path_labels = os.path.join(path,dataset_graph_labels)


    with open(path_graph_indicator, "r") as f:
        c = 1
        for line in f:
            node2graph[c] = int(line[:-1])
            if not node2graph[c] == len(Gs):
                Gs.append(nx.Graph())
            Gs[-1].add_node(c)
            c += 1

    with open(path_adj, "r") as f:
        for line in f:
            edge = line[:-1].split(",")
            edge[1] = edge[1].replace(" ", "")
            Gs[node2graph[int(edge[0])] - 1].add_edge(int(edge[0]), int(edge[1]))

    if use_node_labels:
        with open(path_node_lab, "r") as f:
            c = 1
            for line in f:
                node_label = indices_to_one_hot(int(line[:-1]), max_node_label)
                Gs[node2graph[c] - 1].add_node(c, attr_dict=node_label)
                c += 1

    labels = []
    with open(path_labels, "r") as f:
        for line in f:
            labels.append(int(line[:-1]))

    return list(zip(Gs, labels)) 

def create_loaders(dataset, batch_size, split_id, offset=-1):
    train_dataset = dataset[:split_id]
    val_dataset = dataset[split_id:]
    return to_pytorch_dataset(train_dataset, offset,batch_size), to_pytorch_dataset(val_dataset, offset,batch_size)

def to_pytorch_dataset(dataset, label_offset=0, batch_size=1):
    #graphs, labels = dataset
    list_set = []
    for graph, label in dataset:
        F, G = get_graph_signal(graph), nx.to_numpy_matrix(graph)
        numOfNodes = G.shape[0]
        F_tensor = torch.from_numpy(F).float()
        G_tensor = torch.from_numpy(G).float()

        # fix labels to zero-indexing
        if label == -1:
            label = 0

        label += label_offset

        list_set.append(tuple((F_tensor, G_tensor, label)))

    dataset_tnt = tnt.dataset.ListDataset(list_set)
    data_loader = torch.utils.data.DataLoader(dataset_tnt, shuffle=True, batch_size=batch_size)
    return data_loader



dataset = load_data(path='../MUTAG', ds_name='MUTAG',
                  use_node_labels=True, max_node_label=7)
train_dataset, val_dataset = create_loaders(dataset, batch_size=1, split_id=150, offset=0)
print('Data are ready')

## GCN Layer

Now it's your turn. Below, there is a Pytorch module that represents a GCN layer. Your goal is to build that layer from scratch. For your own help, we provide you with two utility functions that create the normalized laplacian of the graph.

Remember that GCNs are nothing more than a matrix multiplication between the input. You will find the sollution is the last cell in this notebooks. But give it a shot. It is just a few lines of code.

In [None]:
def device_as(x,y):
    return x.to(y.device)

# tensor operationa now support batched inputs
def calc_degree_matrix_norm(a):
    return torch.diag_embed(torch.pow(a.sum(dim=-1),-0.5))

def create_graph_lapl_norm(a):
    size = a.shape[-1]
    a +=  device_as(torch.eye(size),a)
    D_norm = calc_degree_matrix_norm(a)
    L_norm = torch.bmm( torch.bmm(D_norm, a) , D_norm )
    return L_norm


class GCN_Layer(nn.Module):
    """
    A simple GCN layer
    """
    
    ### 1. BUILD YOUR GCN LAYER HERE
    def __init__(self, in_features, out_features, bias=True):
        super().__init__()

        

    def forward(self, X, A):
        """
        A: adjαcency matrix
        X: graph signal
        """
       
        


## Graph Neural Network

Now let's stack 3 `GCN_Layer` in order to construct a full Graph Neural Network. The GNN is followed by a `Linear` layer that will output the final classification between the 2 categories.

In [None]:
import torch
import torch.nn as nn

class GNN(nn.Module):
    def __init__(self,
                    in_features = 7,
                    hidden_dim = 64,
                    classes = 2,
                    dropout = 0.5):
        super(GNN, self).__init__()

        self.conv1 = GCN_Layer(in_features, hidden_dim)
        self.conv2 = GCN_Layer(hidden_dim, hidden_dim)
        self.conv3 = GCN_Layer(hidden_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, classes)
        self.dropout = dropout

    def forward(self, x,A):
        x = self.conv1(x, A)
        x = F.relu(x)
        x = self.conv2(x, A)
        x = F.relu(x)
        x = self.conv3(x, A)
        x = F.dropout(x, p=self.dropout, training=self.training)
        # aggregate node embeddings
        x = x.mean(dim=1)
        # final classification layer
        return self.fc(x)


## Training loop

Finally, we have our training loop. As you can see, there is nothing out of the ordinary here. Standard training code as usual.

In [None]:
criterion = torch.nn.CrossEntropyLoss()
device = 'cpu'

print(f'Training on {device}')
model = GNN(in_features = 7,
                hidden_dim = 128,
                classes = 2).to(device)

optimizer= torch.optim.SGD(model.parameters(), lr=0.01)

def train(train_loader):
    model.train()

    for data in train_loader: 
        optimizer.zero_grad()  
        X, A, labels = data
        X, A, labels = X.to(device), A.to(device), labels.to(device)  
        # Forward pass.
        out = model(X, A)  
        # Compute the graph classification loss.
        loss = criterion(out, labels) 
        # Calculate gradients.
        loss.backward()  
        # Updates the models parameters
        optimizer.step() 

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        X,A, labels = data
        # Forward pass.
        out = model(X, A)  
        # Take the index of the class with the highest probability.
        pred = out.argmax(dim=1) 
        # Compare with ground-truth labels.
        correct += int((pred == labels).sum()) 
    return correct / len(loader.dataset)  

best_val = -1
for epoch in range(1, 241):
    train(train_dataset)
    train_acc = test(train_dataset)
    val_acc = test(val_dataset)
    if val_acc>best_val:
        best_val = val_acc
        epoch_best = epoch
    
    if epoch%10==0:
        print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f} || Best Val Score: {best_val:.4f} (Epoch {epoch_best:03d}) ')

### GCN Layer solution


In [None]:
#1. BUILD YOUR GCN LAYER
class GCN_Layer(nn.Module):
    """
    A simple GCN layer
    """
    
    def __init__(self, in_features, out_features, bias=True):
        super().__init__()
        self.linear = nn.Linear(in_features, out_features, bias=bias)
        

    def forward(self, X, A):
        """
        A: adjαcency matrix
        X: graph signal
        """
        L = create_graph_lapl_norm(A)
        x = self.linear(X)
        return torch.bmm(L, x)