<center>
    <h2>ALTeGraD 2023<br>Lab Session 6: Deep Learning for Graphs (2/2)</h2> 21 / 11 / 2023<br>Lecture: Prof. Michalis Vazirgiannis <br>
Lab: Dr. Giannis Nikolentzos & Dr. Johannes Lutzeyer <br> <br>
<b>Student name:</b> Simon Queric
</center>

In [1]:
import time
import networkx as nx
import numpy as np
import scipy.sparse as sp
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from random import randint

In [2]:
def create_dataset():
    Gs = list()
    y = list()

    N = range(10, 21)
    P = [0.2, 0.4]
    for n in N :
        for p in P : 
            Gs.append(nx.erdos_renyi_graph(n, p))
            y.append(1.*(p==0.2))
    return Gs, y


def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse_coo_tensor(indices, values, shape)

In [3]:
class GNN(nn.Module):
    def __init__(self, input_dim, hidden_dim_1, hidden_dim_2, hidden_dim_3, n_class):
        super(GNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim_1)
        self.fc2 = nn.Linear(hidden_dim_1, hidden_dim_2)
        self.fc3 = nn.Linear(hidden_dim_2, hidden_dim_3)
        self.fc4 = nn.Linear(hidden_dim_3, n_class)
        self.relu = nn.ReLU()

    def forward(self, x_in, adj, idx):
        
        ############## Task 2
    
        z1 = self.relu(self.fc1(adj@x_in))
        x = self.relu(self.fc2(adj@z1))
        idx = idx.unsqueeze(1).repeat(1, x.size(1))
        out= torch.zeros(int(torch.max(idx))+1, x.size(1), device=x_in.device)
        out  = out.scatter_add_(0, idx, x) 
        out = self.fc4(self.relu(self.fc3(out)))

        return F.log_softmax(out, dim=1)

In [5]:
# Initializes device
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# Hyperparameters
epochs = 200
batch_size = 8
n_hidden_1 = 16
n_hidden_2 = 32
n_hidden_3 = 32
learning_rate = 0.01

# Generates synthetic dataset
Gs, y = create_dataset()
n_class = np.unique(y).size

# Splits the dataset into a training and a test set
G_train, G_test, y_train, y_test = train_test_split(Gs, y, test_size=0.1)

N_train = len(G_train)
N_test = len(G_test)

# Initializes model and optimizer
model = GNN(1, n_hidden_1, n_hidden_2, n_hidden_3, n_class).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_function = nn.CrossEntropyLoss()

# Trains the model
for epoch in range(epochs):
    t = time.time()
    model.train()
    
    train_loss = 0
    correct = 0
    count = 0
    for i in range(0, N_train, batch_size):
        adj_batch = list()
        idx_batch = list()
        y_batch = list()

        ############## Task 3

        #q, r = N_train//batch_size, N_train%batch_size
        k = min(N_train, i+batch_size)
        y_batch = np.array(y_train[i:k])
        y_batch = torch.from_numpy(y_batch).long()

        adj_batch = [nx.adjacency_matrix(G) for G in G_train[i:k]]
        for j in range(len(adj_batch)) :
            idx_batch+=[j]*adj_batch[j].shape[0]

        idx_batch = torch.from_numpy(np.array(idx_batch))
        adj_batch = sp.block_diag(adj_batch)
        adj_batch = sparse_mx_to_torch_sparse_tensor(adj_batch)
        n = adj_batch.shape[0]
        features_batch = torch.ones(n, 1)

        optimizer.zero_grad()
        output = model(features_batch, adj_batch, idx_batch)
        loss = loss_function(output, y_batch)
        train_loss += loss.item() * output.size(0)
        count += output.size(0)
        preds = output.max(1)[1].type_as(y_batch)
        correct += torch.sum(preds.eq(y_batch).double())
        loss.backward()
        optimizer.step()
    
    if epoch % 10 == 0:
        print('Epoch: {:04d}'.format(epoch+1),
              'loss_train: {:.4f}'.format(train_loss / count),
              'acc_train: {:.4f}'.format(correct / count),
              'time: {:.4f}s'.format(time.time() - t))
        
print('Optimization finished!')

# Evaluates the model
model.eval()
test_loss = 0
correct = 0
count = 0
for i in range(0, N_test, batch_size):
    adj_batch = list()
    idx_batch = list()
    y_batch = list()

    ############## Task 3
    
    k = min(N_train, i+batch_size)
    y_batch = np.array(y_test[i:k])
    y_batch = torch.from_numpy(y_batch).long()


    adj_batch = [nx.adjacency_matrix(G) for G in G_test[i:k]]
    
    for j in range(len(adj_batch)) :
        idx_batch += [j]*adj_batch[j].shape[0]
    idx_batch = torch.from_numpy(np.array(idx_batch))

    adj_batch = sp.block_diag(adj_batch)
    adj_batch = sparse_mx_to_torch_sparse_tensor(adj_batch)
    n = adj_batch.shape[0]

    features_batch = torch.ones(n, 1)


    output = model(features_batch, adj_batch, idx_batch)
    loss = loss_function(output, y_batch)
    test_loss += loss.item() * output.size(0)
    count += output.size(0)
    preds = output.max(1)[1].type_as(y_batch)
    correct += torch.sum(preds.eq(y_batch).double())

print('loss_test: {:.4f}'.format(test_loss / count),
      'acc_test: {:.4f}'.format(correct / count),
      'time: {:.4f}s'.format(time.time() - t))

Epoch: 0001 loss_train: 3.4650 acc_train: 0.5263 time: 0.0117s
Epoch: 0011 loss_train: 0.3042 acc_train: 0.8421 time: 0.0081s
Epoch: 0021 loss_train: 0.2739 acc_train: 0.8421 time: 0.0118s
Epoch: 0031 loss_train: 0.2495 acc_train: 0.8421 time: 0.0085s
Epoch: 0041 loss_train: 0.2270 acc_train: 0.8421 time: 0.0069s
Epoch: 0051 loss_train: 0.2135 acc_train: 0.8421 time: 0.0073s
Epoch: 0061 loss_train: 0.1877 acc_train: 0.8421 time: 0.0067s
Epoch: 0071 loss_train: 0.1511 acc_train: 0.8947 time: 0.0119s
Epoch: 0081 loss_train: 0.1326 acc_train: 0.8947 time: 0.0138s
Epoch: 0091 loss_train: 0.1200 acc_train: 0.8947 time: 0.0082s
Epoch: 0101 loss_train: 0.1068 acc_train: 0.8947 time: 0.0068s
Epoch: 0111 loss_train: 0.1014 acc_train: 0.9474 time: 0.0068s
Epoch: 0121 loss_train: 0.0966 acc_train: 0.9474 time: 0.0076s
Epoch: 0131 loss_train: 0.0853 acc_train: 1.0000 time: 0.0083s
Epoch: 0141 loss_train: 0.0850 acc_train: 1.0000 time: 0.0092s
Epoch: 0151 loss_train: 0.0689 acc_train: 1.0000 time: 

In [44]:
class MessagePassing(nn.Module):
    def __init__(self, input_dim, output_dim, neighbor_aggr):
        super(MessagePassing, self).__init__()
        self.neighbor_aggr = neighbor_aggr
        self.fc1 = nn.Linear(input_dim, output_dim)
        self.fc2 = nn.Linear(input_dim, output_dim)

    def forward(self, x, adj):
        
        ############## Task 6
    
        x_node = self.fc1(x)
        m = self.fc2(adj@x)
        
        if self.neighbor_aggr == 'sum':
            output = x_node + m
        elif self.neighbor_aggr == 'mean':
            deg = torch.spmm(adj, torch.ones(x.size(0),1, device=x.device))
            output = x_node + torch.div(m, deg)
            
        return output


class GNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, neighbor_aggr, readout, dropout):
        super(GNN, self).__init__()
        self.readout = readout
        self.mp1 = MessagePassing(input_dim, hidden_dim, neighbor_aggr)
        self.mp2 = MessagePassing(hidden_dim, hidden_dim, neighbor_aggr)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        self.relu = nn.ReLU()

    def forward(self, x, adj, idx):
        
        ############## Task 7
        z1 = self.relu(self.mp1(x, adj))
        x = self.dropout(z1)
        x = self.relu(self.mp2(x, adj))
        
        if self.readout == 'sum':
            idx = idx.unsqueeze(1).repeat(1, x.size(1))
            out = torch.zeros(int(torch.max(idx))+1, x.size(1), device=x.device)
            out = out.scatter_add_(0, idx, x) 
        elif self.readout == 'mean':
            idx = idx.unsqueeze(1).repeat(1, x.size(1))
            out = torch.zeros(torch.max(idx)+1, x.size(1), device=x.device)
            out = out.scatter_add_(0, idx, x)
            count = torch.zeros(torch.max(idx)+1, x.size(1), device=x.device)
            count = count.scatter_add_(0, idx, torch.ones_like(x, device=x.device))
            out = torch.div(out, count)
            
        ############## Task 7
    
        out = self.fc(out)
        
        return out

In [53]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# Hyperparameters
hidden_dim = 32
output_dim = 4
dropout = 0.0
neighbor_aggr = 'sum'
readout = 'mean'


############## Task 4
        
Gs = [nx.cycle_graph(n) for n in range(10, 20)]


############## Task 5
        
idx_batch = list()

adj_batch = [nx.adjacency_matrix(G) for G in Gs]
for j in range(len(adj_batch)) :
    idx_batch += [j]*adj_batch[j].shape[0]

idx_batch = torch.from_numpy(np.array(idx_batch))

adj_batch = sp.block_diag(adj_batch)
n = adj_batch.shape[0]
adj_batch = sparse_mx_to_torch_sparse_tensor(adj_batch)

features_batch = torch.ones(n, 1)


############## Task 8
        
model = GNN(1, hidden_dim, output_dim, neighbor_aggr, readout, dropout).to(device)

out = model(features_batch, adj_batch, idx_batch)

print("Vector embeddings of the cycle graphs :")
print(out)

############## Task 9
        
G1 = nx.Graph()
G1.add_edges_from([(1, 2), (2, 3), (1, 3), (4, 5), (5, 6), (6, 4)])
G2 = nx.cycle_graph(6)


############## Task 10

idx_batch = list()
       
adj_batch = [nx.adjacency_matrix(G1), nx.adjacency_matrix(G2)]
for j in range(len(adj_batch)) :
    idx_batch += [j]*adj_batch[j].shape[0]

idx_batch = torch.from_numpy(np.array(idx_batch))

adj_batch = sp.block_diag(adj_batch)
n = adj_batch.shape[0]
adj_batch = sparse_mx_to_torch_sparse_tensor(adj_batch)

features_batch = torch.ones(n, 1)



############## Task 11
        
hidden_dim = 32
output_dim = 4
dropout = 0.0
neighbor_aggr = 'sum'
readout = 'sum'

model = GNN(1, hidden_dim, output_dim, neighbor_aggr, readout, dropout).to(device)

out = model(features_batch, adj_batch, idx_batch)

print("Vector embeddings of G1 and G2 :")
print(out)

G1 = nx.Graph()
G1.add_edges_from([(1, 2), (2, 3), (1, 3), (4, 5), (5, 6), (6, 4), (7, 8), (8, 9), (9, 7)])
G2 = nx.cycle_graph(9)

idx_batch = list()
       
adj_batch = [nx.adjacency_matrix(G1), nx.adjacency_matrix(G2)]
for j in range(len(adj_batch)) :
    idx_batch += [j]*adj_batch[j].shape[0]

idx_batch = torch.from_numpy(np.array(idx_batch))

adj_batch = sp.block_diag(adj_batch)
n = adj_batch.shape[0]
adj_batch = sparse_mx_to_torch_sparse_tensor(adj_batch)

features_batch = torch.ones(n, 1)

out = model(features_batch, adj_batch, idx_batch)

print("Vector embeddings of G1 and G2 :")
print(out)

Vector embeddings of the cycle graphs :
tensor([[ 0.6056,  0.1566,  0.0486, -0.2543],
        [ 0.6056,  0.1566,  0.0486, -0.2543],
        [ 0.6056,  0.1566,  0.0486, -0.2543],
        [ 0.6056,  0.1566,  0.0486, -0.2543],
        [ 0.6056,  0.1566,  0.0486, -0.2543],
        [ 0.6056,  0.1566,  0.0486, -0.2543],
        [ 0.6056,  0.1566,  0.0486, -0.2543],
        [ 0.6056,  0.1566,  0.0486, -0.2543],
        [ 0.6056,  0.1566,  0.0486, -0.2543],
        [ 0.6056,  0.1566,  0.0486, -0.2543]], grad_fn=<AddmmBackward0>)
Vector embeddings of G1 and G2 :
tensor([[ 1.5040,  0.6549, -5.3989,  1.4941],
        [ 1.5040,  0.6549, -5.3989,  1.4941]], grad_fn=<AddmmBackward0>)
Vector embeddings of G1 and G2 :
tensor([[ 2.3042,  1.0128, -8.0439,  2.2944],
        [ 2.3042,  1.0128, -8.0439,  2.2944]], grad_fn=<AddmmBackward0>)
