### Importing Dataset

In [None]:
import torch
import rdkit
from torch_geometric.datasets import MoleculeNet

# Load the ESOL dataset
data = MoleculeNet(root=".", name="ESOL")
data

### Investing the dataset

In [None]:
# Investigating the dataset
print("Dataset type: ", type(data))
print("Dataset features: ", data.num_features)
print("Dataset target: ", data.num_classes)
print("Dataset length: ", data.len)
print("Dataset sample: ", data[0])
print("Sample  nodes: ", data[0].num_nodes)
print("Sample  edges: ", data[0].num_edges)

# edge_index = graph connections
# smiles = molecule with its atoms
# x = node features (32 nodes have each 9 features)
# y = labels (dimension)

### My Gat Layer

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops

import torch
from torch.nn import Linear
import torch.nn.functional as F
import torch.nn as nn
from torch_geometric.nn import GCNConv, TopKPooling, global_mean_pool , GATConv
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
embedding_size = 64

def custom_exp(edge_index, attention_scores):
    # Extract the number of nodes (N) from the shape of the attention score matrix
    N = attention_scores.size(0)
    
    # Initialize an empty tensor to store the attention coefficients
    attention_coefficients = torch.zeros_like(attention_scores)
    
    # Loop over each source node index (row index of edge_index)
    for i in range(edge_index.size(1)):
        src_idx, dst_idx = edge_index[:, i]
        
        numerator = torch.exp(attention_scores[src_idx, dst_idx])

        # get an array with attention scores of neighbours of src_idx
        neighbours = edge_index[1, edge_index[0] == src_idx]
        denominator = torch.sum(torch.exp(attention_scores[src_idx, neighbours]))

        attention_coefficients[src_idx, dst_idx] = numerator / denominator

    return attention_coefficients


class PJzGAT(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(PJzGAT, self).__init__(aggr='add')
        
        self.in_channels = in_channels
        self.out_channels = out_channels
        
        self.W = nn.Parameter(torch.zeros(size=(in_channels, out_channels)))
        nn.init.xavier_uniform_(self.W.data, gain=1.414)

        self.a = nn.Parameter(torch.zeros(size=(2*out_channels, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

        self.leakyrelu = nn.LeakyReLU(0.2)

        self.mlp = nn.Sequential(
            nn.Linear(out_channels, out_channels),  # Adjust input channels for MLP
            nn.ReLU(),
            nn.Linear(out_channels, out_channels),
            nn.ReLU()
        )

    def forward(self, x, edge_index):
        h = torch.matmul(x, self.W)  # Apply linear transformation

        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Compute attention coefficients
        a_input = torch.cat([h.repeat(1, x.size(0)).view(x.size(0) * x.size(0), -1), h.repeat(x.size(0), 1)], dim=1).view(x.size(0), -1, 2 * self.out_channels)
        e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze())

        # Apply mask and activation function
        row , col = edge_index
        zero_vec = -9e15 * torch.ones_like(e)
        mask = torch.zeros_like(e)
        mask[edge_index[0], edge_index[1]] = 1
        attention = mask * e
        attention = F.leaky_relu(attention, negative_slope=0.2)
        attention = custom_exp(edge_index, attention)

        # Perform message passing with attention
        out = self.propagate(edge_index, x=h, attention=attention)

        # Optionally apply MLP after aggregation
        out = self.mlp(out)

        return out

    def message(self, x_j, attention):
        # Compute messages with attention coefficients
        attention = attention.view(-1)
        attention = attention[attention != 0].view(-1, 1)
        buffer = attention.view(-1, 1) * x_j
        return buffer

## Implementing the Graph Neural Network

Building a Graph Neural Network works the same way as building a Convolutional

---

Neural Network, we simple add some layers.

The GCN simply extends torch.nn.Module.
GCNConv expects:
- in_channels = Size of each input sample.
- out_channels = Size of each output sample.

We apply three convolutional layers, which means we learn the information about 3 neighbor hops. After that we apply a pooling layer to combine the information of the individual nodes, as we want to perform graph-level prediction.

In [None]:
class GCN(torch.nn.Module):
    def __init__(self):
        # Init parent
        super(GCN, self).__init__()
        torch.manual_seed(42)

        # GCN layers
        self.initial_conv = PJzGAT(data.num_features, embedding_size)
        self.conv1 = GCNConv(embedding_size, embedding_size)
        self.conv2 = GCNConv(embedding_size, embedding_size)
        self.conv3 = GCNConv(embedding_size, embedding_size)

        # Output layer
        self.out = Linear(embedding_size*2, 1)

    def forward(self, x, edge_index, batch_index):
        # First Conv layer

        hidden = nn.Linear(data.num_features, data.num_features)(x)    
        hidden = self.initial_conv(x, edge_index)
        hidden = F.tanh(hidden)

        # Other Conv layers
        hidden = self.conv1(hidden, edge_index)
        hidden = F.tanh(hidden)
        hidden = self.conv2(hidden, edge_index)
        hidden = F.tanh(hidden)
        hidden = self.conv3(hidden, edge_index)
        hidden = F.tanh(hidden)

        # Global Pooling (stack different aggregations)
        hidden = torch.cat([gmp(hidden, batch_index),
                            gap(hidden, batch_index)], dim=1)

        # Apply a final (linear) classifier.
        out = self.out(hidden)

        return out, hidden

model = GCN()
print(model)
print("Number of parameters: ", sum(p.numel() for p in model.parameters()))

## Training the GNN

In [None]:
from torch_geometric.data import DataLoader
import warnings
warnings.filterwarnings("ignore")

# Root mean squared error
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Use GPU for training
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Wrap data in a data loader
data_size = len(data)
NUM_GRAPHS_PER_BATCH = 512   #initially 64
loader = DataLoader(data[:int(data_size * 0.8)],
                    batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True)
test_loader = DataLoader(data[int(data_size * 0.8):],
                         batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True)

def train(data):
    # Enumerate over the data
    for batch in loader:
      # Use GPU
      batch.to(device)
      # Reset gradients
      optimizer.zero_grad()
      # Passing the node features and the connection info
      pred, embedding = model(batch.x.float(), batch.edge_index, batch.batch)
      # Calculating the loss and gradients
      loss = loss_fn(pred, batch.y)
      loss.backward()
      # Update using the gradients
      optimizer.step()
    return loss, embedding

print("Starting training...")
losses = []
for epoch in range(100):
    loss, h = train(data)
    losses.append(loss)
    if epoch % 1 == 0:
      print(f"Epoch {epoch} | Train Loss {loss}")

### Visualizing the Training loss

In [None]:
# Visualize learning (training loss)
import seaborn as sns
losses_float = [float(loss.cpu().detach().numpy()) for loss in losses]
loss_indices = [i for i,l in enumerate(losses_float)]
plt = sns.lineplot(losses_float)
plt

### Getting a test prediction

In [None]:
import pandas as pd

# Analyze the results for one batch
test_batch = next(iter(test_loader))
with torch.no_grad():
    test_batch.to(device)
    pred, embed = model(test_batch.x.float(), test_batch.edge_index, test_batch.batch)
    df = pd.DataFrame()
    df["y_real"] = test_batch.y.tolist()
    df["y_pred"] = pred.tolist()
df["y_real"] = df["y_real"].apply(lambda row: row[0])
df["y_pred"] = df["y_pred"].apply(lambda row: row[0])
df

In [None]:
plt = sns.scatterplot(data=df, x="y_real", y="y_pred")
plt.set(xlim=(-10, 10))
plt.set(ylim=(-10, 10))
plt.plot([-10, 10], [-10, 10], color='red', linestyle='--')  # Plotting the y=x line
plt