In [1]:
# Add this in a Google Colab cell to install the correct version of Pytorch Geometric.
import torch

def format_pytorch_version(version):
  return version.split('+')[0]

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)

def format_cuda_version(version):
  return 'cu' + version.replace('.', '')

CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)

!pip install torch-scatter     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-sparse      -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-cluster     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-geometric

Looking in links: https://pytorch-geometric.com/whl/torch-2.2.1+cu121.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.2.0%2Bcu121/torch_scatter-2.1.2%2Bpt22cu121-cp310-cp310-linux_x86_64.whl (10.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m51.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2+pt22cu121
Looking in links: https://pytorch-geometric.com/whl/torch-2.2.1+cu121.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.2.0%2Bcu121/torch_sparse-0.6.18%2Bpt22cu121-cp310-cp310-linux_x86_64.whl (5.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.18+pt22cu121
Looking in links: https://pytorch-geometric.com/whl/torch-2.2.1+cu121.html
Collecting torch-cluster
 

In [2]:
import torch
from torch import nn
from torch_geometric.nn import MessagePassing

class GATLayer(MessagePassing):
  def __init__(self, in_feats, out_feats, heads=1, concat=True):
    super(GATLayer, self).__init__(aggr='add')
    self.in_feats = in_feats
    self.out_feats = out_feats
    self.heads = heads
    self.concat = concat

    # Linear transformations for message passing (one per head)
    self.w_from = nn.ModuleList([nn.Linear(in_feats, out_feats) for _ in range(heads)])
    self.w_to = nn.Linear(in_feats, out_feats)

    # LeakyReLU activation
    self.relu = nn.LeakyReLU(0.2)

    # Attention weight parameter (one per head)
    self.att = nn.ParameterList([nn.Parameter(torch.zeros(out_feats, 1)) for _ in range(heads)])

  def propagate(self, edge_index, x):
    # Message passing (one head at a time)
    z = []
    for i in range(self.heads):
      w_from, w_to, att = self.w_from[i], self.w_to, self.att[i]
      # Source node representations
      from_feats = w_from(x)
      # Target node representations
      to_feats = w_to(x[edge_index[1]])
      # Message calculation (a single attention value per edge)
      a = self.relu(torch.matmul(from_feats, to_feats.t()) * att)
      # Attention weights (softmax normalization)
      a = torch.softmax(a, dim=1)
      # Message aggregation (weighted sum)
      z.append(torch.matmul(a, from_feats))
    # Concatenate messages from all heads if specified
    return torch.cat(z, dim=1) if self.concat else torch.sum(torch.stack(z), dim=0)

  def forward(self, x, edge_index):
    x = self.propagate(edge_index, x)
    x = self.relu(torch.mm(x, self.w_to.weight))
    return x

In [3]:
import torch
import pandas as pd
from torch_geometric.datasets import QM9

# Load QM9 data from CSV file (replace 'qm9.csv' with your actual file path)
# Load the QM9 dataset
dataset = QM9(root='/tmp/QM9')

data = dataset

# Select relevant features for node representations (replace with your desired features)
node_features = data[['atomic_num', 'mass', 'x', 'y', 'z']]

# One-hot encode categorical features (if applicable)
# ... (implementation depends on your specific categorical features)

# Convert node features to PyTorch tensor
x = torch.tensor(node_features.values, dtype=torch.float)

# Construct edge index (assuming an edge between each atom in a molecule)
num_nodes = x.shape[0]
edges = []
for molecule_id in range(len(data)):
  molecule_data = data.iloc[molecule_id]
  # Assuming connectivity information is available in a separate column/list
  connections = molecule_data['connectivity']  # Replace with your connectivity data
  for i, neighbor in enumerate(connections):
    edges.append((molecule_id, neighbor))

# Convert edge list to PyTorch tensor (row-major order)
edge_index = torch.tensor(edges, dtype=torch.long).t()

Downloading https://data.pyg.org/datasets/qm9_v3.zip
Extracting /tmp/QM9/raw/qm9_v3.zip
Processing...
Using a pre-processed version of the dataset. Please install 'rdkit' to alternatively process the raw data.
Done!


TypeError: range indices must be integers or slices, not str

In [None]:
# Assuming you have loaded your QM9 data (x, edge_index)
gat_layer = GATLayer(in_feats=x.shape[1], out_feats=16, heads=4)
x = gat_layer(x, edge_index)
# Now 'x' contains updated node representations after one GAT layer