In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Colab Notebooks

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks


In [None]:
!pip  install hdf5storage



This is a brief code on how to use a GNN in the authorship attribution problem.

In [None]:
# First, we import everything we need to import

# Standard libraries
import os
import numpy as np
import torch; torch.set_default_dtype(torch.float64)
import torch.nn as nn

# Own libraries
import Utils.graphML as gml # This one has the GNN layers
import Utils.dataTools as dataTools # This one has the dataset

# Data

Next, we have to load the data

In [None]:
# Determine the basic parameters
authorName = 'austen'
# jacob 'abbott',         robert louis 'stevenson',   louisa may 'alcott',
# horatio 'alger',        james 'allen',              jane 'austen',
# emily 'bronte',         james 'cooper',             charles 'dickens',
# hamlin 'garland',       nathaniel 'hawthorne',      henry 'james',
# herman 'melville',      thomas nelson 'page',       henry 'thoreau',
# mark 'twain',           arthur conan 'doyle',       washington 'irving',
# edgar allan 'poe',      sarah orne 'jewett',        edith 'wharton'

ratioTrain = 0.95 # Ratio of signals that will be used for training
ratioValid = 0.08 # Ratio of signals in the training set that will go for validation
dataPath = os.path.join('dataset','authorshipData.mat') # Where the data is located

In [None]:
# Load the data

data = dataTools.Authorship(authorName,
                            ratioTrain,
                            ratioValid,
                            dataPath,
                            # Leave all this as-is, they are details on how the graph is built
                            'rows', # Normalize the adjacency matrix by 'rows' or 'columns'
                            False, # when False, if the graph has isolated nodes, discard them
                            True, # Create an undirected graph (symmetrize directed edges)
                            True) # Ensure the resulting graph is connected

In [None]:
# Get important information from the loaded data

S = data.getGraph() # Get the adjacency matrix
N = S.shape[0] # Get the number of nodes
xTrain, yTrain = data.getSamples('train') # Get the training samples
xValid, yValid = data.getSamples('valid') # Get the validation samples
xTest, yTest = data.getSamples('test') # Get the test samples
nTrain = xTrain.shape[0] # Number of training samples
nValid = xValid.shape[0] # Number of validation samples
nTest = xTest.shape[0] # Number of testing samples

# Print some info
print("Number of nodes: %d" % N)
print("Number of training signals: %d" % nTrain)
print("Number of validation signals: %d" % nValid)
print("Number of testing signals: %d" % nTest)

# Carry out several important adaptations

# Normalize the adjacency matrix
S = S/np.max(np.linalg.eigvals(S))

# Add the extra "edge_feature" dimension to the matrix
S = np.expand_dims(S, axis = 0) # 1 x N x N

# Note that the xTrain, xValid and xTest are of shape B x N, 
# but we want them to be B x F x N with F = 1 (there's only one input feature)
xTrain = np.expand_dims(xTrain, axis = 1)
xValid = np.expand_dims(xValid, axis = 1)
xTest = np.expand_dims(xTest, axis = 1)

Number of nodes: 189
Number of training signals: 1346
Number of validation signals: 118
Number of testing signals: 78


In [None]:
# Double-check everything is the way it's supposed to be:
print("Spectral norm of S = %.4f" % np.linalg.norm(S[0], ord = 2))
print("Shape of S: ", S.shape)
print("Shape of xTrain: ", xTrain.shape)
print("Shape of xValid: ", xValid.shape)
print("Shape of xTest: ", xTest.shape)

Spectral norm of S = 1.0000
Shape of S:  (1, 189, 189)
Shape of xTrain:  (1346, 1, 189)
Shape of xValid:  (118, 1, 189)
Shape of xTest:  (78, 1, 189)


# Architecture

Let's create the architecture

In [None]:
# We start with a 2-layer GNN
class GNN2Ly(nn.Module):
    
    def __init__(self, F1, F2, K1, K2, S):
        # F1: Number of features (hidden units) at the output of the first layer
        # F2: Number of features (hidden units) at the output of the second layer
        # K1: Number of K-hops to consider in the first layer
        # K2: Number of K-hops to consider in the second layer
        # S: Graph matrix description
        
        # Initialize the parent
        super().__init__()
        
        # First layer
        self.graphConvLy1 = gml.GraphConv(1, F1, K1) # The first '1' is because the input feature is 1
        self.graphConvLy1.set_graph(S) # Set the graph we're going to use
        # Nonlinear activation function
        self.activationFunction1 = nn.ReLU()
        
        # Second layer
        self.graphConvLy2 = gml.GraphConv(F1, F2, K2) # F1 input features from previous layer
        self.graphConvLy2.set_graph(S) # Set the graph we're going to use
        # Nonlinear activation function
        self.activationFunction2 = nn.ReLU()
        
        # Readout layer
        self.readoutLayer = nn.Linear(F2 * S.shape[1], 2)
        #    Note that at the end of the second layer we have N = S.shape[1] nodes, each one with F2 features
        #    Therefore, we want to flatten all of this into a single vector to pass it to the readout layer
        #    The output of the readout layer is the number of classes (or it could be just 1, depending on
        #    what function we're going to use to train this)
    
    def forward(self, x):
        
        # Remember the signal has shape B x Fin x N, where Fin = 1 for us
        
        # Apply the first layer
        y = self.graphConvLy1(x) # Output has shape B x F1 x N
        y = self.activationFunction1(y) # Nonlinear activation
        
        # Apply the second layer
        y = self.graphConvLy2(y) # Output has shape B x F2 x N
        y = self.activationFunction2(y) # Nonlinear activation
        
        # Flatten the output before moving to apply the readout layer
        y = y.reshape(y.shape[0], y.shape[1]*y.shape[2])
        
        # Apply the readout layer
        y = self.readoutLayer(y)
        
        return y

In [None]:
# Set the parameters
F1 = 32 # Number of features at the output of the first layer
F2 = 64 # Number of features at the output of the second layer
K1 = 3 # Gather information up to the 3-hop neighborhood
K2 = 2 # Gather information up to the 2-hop neighborhood
S = torch.tensor(S) # Convert the graph into a torch.tensor before passing it to the architecture

GNN = GNN2Ly(F1, F2, K1, K2, S ) # Don't forget to add the graph

# Training

We're going to do some training here, as usual.

In [None]:
nEpochs = 25 # Number of epochs
batchSize = 20 # Batch size
learningRate = 0.005 # Learning rate for an ADAM optimizer
lossFunction = nn.CrossEntropyLoss() # Loss function to use
optimizer = torch.optim.Adam(GNN.parameters(), lr = learningRate)

In [None]:
# For each epoch
for e in range(nEpochs):
    # Shuffle the batch indices
    idxRandom = np.random.permutation(nTrain)
    # Number of batches
    nBatches = (nTrain//batchSize) if np.mod(nTrain,batchSize) == 0 else (nTrain//batchSize + 1)
    
    # For each batch
    for b in range(nBatches):
        # Get the data
        xBatch = xTrain[b*batchSize : np.min(((b+1)*batchSize,nTrain)), :, :]
        yBatch = yTrain[b*batchSize : np.min(((b+1)*batchSize,nTrain))]
        # Convert it to tensor
        xBatch = torch.tensor(xBatch)
        yBatch = torch.tensor(yBatch)

        # Reset gradients
        GNN.zero_grad()

        # Compute the output
        yHat = GNN(xBatch)

        # Compute the loss
        lossValue = lossFunction(yHat, yBatch)

        # Compute the gradient
        lossValue.backward()

        # Update the parameters
        optimizer.step()

        # Print the info
        if np.mod(e+b,5) == 0:
            print("E: %3d, B: %3d, loss = %.4f" % (e, b, lossValue.item()))

E:   0, B:   0, loss = 0.8261
E:   0, B:   5, loss = 0.0000
E:   0, B:  10, loss = 0.0000
E:   0, B:  15, loss = 0.0000
E:   0, B:  20, loss = 0.0000
E:   0, B:  25, loss = 0.0000
E:   0, B:  30, loss = 0.0000
E:   0, B:  35, loss = 313.5316
E:   0, B:  40, loss = 0.0000
E:   0, B:  45, loss = 0.0000
E:   0, B:  50, loss = 0.0000
E:   0, B:  55, loss = 0.0000
E:   0, B:  60, loss = 0.0000
E:   0, B:  65, loss = 0.0000
E:   1, B:   4, loss = 148.7365
E:   1, B:   9, loss = 0.1100
E:   1, B:  14, loss = 0.0000
E:   1, B:  19, loss = 0.0000
E:   1, B:  24, loss = 0.0000
E:   1, B:  29, loss = 0.0000
E:   1, B:  34, loss = 81.3120
E:   1, B:  39, loss = 9.7011
E:   1, B:  44, loss = 0.0000
E:   1, B:  49, loss = 0.0000
E:   1, B:  54, loss = 0.0000
E:   1, B:  59, loss = 0.0000
E:   1, B:  64, loss = 0.0000
E:   2, B:   3, loss = 48.6574
E:   2, B:   8, loss = 0.0081
E:   2, B:  13, loss = 0.0000
E:   2, B:  18, loss = 0.0000
E:   2, B:  23, loss = 0.0000
E:   2, B:  28, loss = 0.0000
E:  

# Evaluation

Just evaluate on the testing set

In [None]:
# Convert the testing samples to tensor
xTest = torch.tensor(xTest)

# Compute the output
with torch.no_grad():
    yHat = GNN(xTest)

yHat = yHat.detach().cpu().numpy() # Convert to numpy

In [None]:
# Calculate the error
yHat = np.argmax(yHat, axis = 1) # Take the maximum of each class
print("Classification error: %.3f%%" % (np.sum(np.abs(yHat - yTest))/nTest*100))

Classification error: 24.359%
