# README:
To run this code, you need to do the following: \
Python \
PyTorch \
PyTorch Geometric (PyG) \
scikit-learn \
Matplotlib \
tqdm 

You can install the required packages using pip. For PyTorch and PyTorch Geometric, follow the instructions provided in their respective installation guides, as their installation commands can vary depending on your system's CUDA version.


In [None]:
import shutil
from typing import Any, Dict, Optional
import torch
import random
import torch.nn.functional as F
from torch.nn import Linear, Sequential, ReLU, GRU, Embedding, LSTM, Dropout, BatchNorm1d, BatchNorm2d, ModuleList, Module
from torch_geometric.nn import GCNConv, GINConv, GATv2Conv, GINEConv, GPSConv,global_mean_pool, TopKPooling
from torch_geometric.datasets import Planetoid, TUDataset, LRGBDataset 
from torch_geometric.loader import DataLoader
from torch_geometric.data import InMemoryDataset
from sklearn.model_selection import train_test_split
from torch_geometric.data import (
    Data,
    InMemoryDataset,
    download_url,
    extract_zip,
)
import matplotlib.pyplot as plt
# from torch_geometric.io import fs

import os
import os.path as osp
import pickle
from typing import Callable, List, Optional
from tqdm import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau

import torch_geometric.transforms as T

In [None]:
random.seed(42)

## Model implementation:

In [None]:
class GCN(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super().__init__()
        self.conv1 = GCNConv(num_node_features, 16)
        self.conv2 = GCNConv(16, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

class GCN_graph(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super().__init__()
        self.conv1 = GCNConv(num_node_features, 64)
        self.conv2 = GCNConv(64, 64)
        self.conv3 = GCNConv(64, 64)  
        self.conv4 = GCNConv(64, 64)
        self.fc = torch.nn.Linear(64, num_classes)  

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = data.x.float()
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5,training=self.training)
        x = self.conv2(x, edge_index)
        x = F.dropout(x, p=0.5,training=self.training)
        x = F.relu(self.conv3(x, edge_index))
        x = F.dropout(x, p=0.5,training=self.training)
        x = F.relu(self.conv4(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = global_mean_pool(x, data.batch)
        x = self.fc(x)
        return F.log_softmax(x, dim=1)

# class GCN_graph(torch.nn.Module):
#     def __init__(self, num_node_features, num_classes):
#         super().__init__()
#         self.conv1 = GCNConv(num_node_features, 64)
#         self.conv2 = GCNConv(64, 64)
#         self.conv3 = GCNConv(64, 64)  
#         self.conv4 = GCNConv(64, 64)
#         self.fc = torch.nn.Linear(64, num_classes)  

#     def forward(self, data):
#         x, edge_index, batch = data.x, data.edge_index, data.batch

#         x = F.relu(self.conv1(x, edge_index))
#         x = F.dropout(x, p=0.5, training=self.training)
#         x = self.conv2(x, edge_index)
#         x = F.dropout(x, p=0.5, training=self.training)
#         x = F.relu(self.conv3(x, edge_index))
#         x = F.dropout(x, p=0.5, training=self.training)
#         x = F.relu(self.conv4(x, edge_index))
#         x = F.dropout(x, p=0.5, training=self.training)

#         # Apply global mean pooling to aggregate node features into graph-level features
#         x = global_mean_pool(x, batch)

#         x = self.fc(x)

#         return F.log_softmax(x, dim=1)




class GATv2(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super().__init__()
        self.conv1 = GATv2Conv(num_node_features, 8, heads=8, dropout=0.6)
        # On the Pubmed dataset, we've seen that using too many heads can lead to overfitting, so we'll use 8 heads.
        self.conv2 = GATv2Conv(8 * 8, num_classes, heads=1, concat=False, dropout=0.6)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)


class GATv2_Graph(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super(GATv2_Graph, self).__init__()
        self.conv1 = GATv2Conv(num_node_features, 32, heads=8, concat=True, dropout=0.0)
        ## add two more convultions
        self.conv2 = GATv2Conv(32*8, 32, heads=8, concat=True, dropout=0.0)
        self.conv3 = GATv2Conv(32*8, 32, heads=1, concat=True, dropout=0.0)
        self.fc1 = torch.nn.Linear(32, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = data.x.float()
        
        x = F.elu(self.conv1(x, edge_index))
        
        x = F.elu(self.conv2(x, edge_index))
        
        x = F.elu(self.conv3(x, edge_index))

        # Global Mean Pooling
        x = global_mean_pool(x, batch)

        x = self.fc1(x)

        return F.log_softmax(x, dim=1)



class GIN_Node(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super(GIN_Node, self).__init__()
        
        
        nn1 = Sequential(
            Linear(num_node_features, 64),
            ReLU(),
            Linear(64, 64)
        )
        self.conv1 = GINConv(nn1)
        self.bn1 = BatchNorm1d(64)

        # Define the second GINConv layer using Sequential
        nn2 = Sequential(
            Linear(64, 64),
            ReLU(),
            Linear(64, 64)
        )
        self.conv2 = GINConv(nn2)
        self.bn2 = BatchNorm1d(64)

        # Define fully connected layers
        self.fc1 = Linear(64, 64)
        self.fc2 = Linear(64, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        
        x = data.x.float()

        # Apply GNN layers
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)

        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)

        # Apply fully connected layers
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.fc2(x)

        return F.log_softmax(x, dim=1)
        

class GIN_Graph(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super(GIN_Graph, self).__init__()
        
        
        nn1 = Sequential(
            Linear(num_node_features, 64),
            ReLU(),
            Linear(64, 64)
        )
        self.conv1 = GINConv(nn1)
        self.bn1 = BatchNorm1d(64)

        # Define the second GINConv layer using Sequential
        nn2 = Sequential(
            Linear(64, 64),
            ReLU(),
            Linear(64, 64)
        )
        self.conv2 = GINConv(nn2)
        self.bn2 = BatchNorm1d(64)

        # Define fully connected layers
        self.fc1 = Linear(64, 64)
        self.fc2 = Linear(64, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)  
        
        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)

        # Global mean pooling
        x = global_mean_pool(x, batch)  

        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.fc2(x)

        return F.log_softmax(x, dim=1)


class GPS_Node(torch.nn.Module):
    def __init__(self, num_node_features, num_classes, dropout_rate=0.5):
        super(GPS_Node, self).__init__()
        self.preprocess = torch.nn.Linear(num_node_features, 64)
        self.conv = GPSConv(64, GCNConv(64, 64), dropout=0.5, heads=4)
        self.dropout = torch.nn.Dropout(dropout_rate) 
        self.fc = torch.nn.Linear(64, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        
        x = self.preprocess(x)
        x = F.relu(x)  
        x = self.dropout(x)  

        x = self.conv(x, edge_index)
        x = F.relu(x)  
        x = self.dropout(x)  

        x = self.fc(x)
        return F.log_softmax(x, dim=1)

class GPS_Graph(torch.nn.Module):
    def __init__(self, num_node_features, num_classes, dropout_rate=0.5):
        super(GPS_Graph, self).__init__()
        self.preprocess = torch.nn.Linear(num_node_features, 64)
        self.conv = GPSConv(64, GCNConv(64, 64), dropout=0.5, heads=4)
        self.dropout = torch.nn.Dropout(dropout_rate) 
        self.fc1 = torch.nn.Linear(64, 64)  
        self.fc2 = torch.nn.Linear(64, num_classes)  

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = data.x.float()
        x = self.preprocess(x)
        x = F.relu(x)  
        x = self.dropout(x)  

        x = self.conv(x, edge_index)
        x = F.relu(x)  
        x = self.dropout(x)  

        x = global_mean_pool(x, batch)  

        x = F.relu(self.fc1(x)) 
        x = self.dropout(x)  

        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [None]:
def evaluate(loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for batch in loader:
            batch.to(device)
            outputs = model(batch)
            _, predicted = torch.max(outputs, 1)

            if batch.y.ndim > 1:
                labels = batch.y.argmax(dim=1)
            else:
                labels = batch.y

            correct += (predicted == labels).sum().item()
    return correct / len(loader.dataset)

### GCN:

#### CORA:

In [None]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(dataset.num_features, dataset.num_classes).to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

model.eval()
pred = model(data).argmax(dim=1)  # Predictions for all nodes
train_correct = (pred[data.train_mask] == data.y[data.train_mask]).sum().item()  
train_acc = train_correct / data.train_mask.sum().item()  

# Evaluation for test data
test_correct = (pred[data.test_mask] == data.y[data.test_mask]).sum().item()  
test_acc = test_correct / data.test_mask.sum().item()  

#
cora_GCN = {
    'train_accuracy': train_acc,
    'test_accuracy': test_acc
}
cora_GCN

#### IMBD:

In [None]:
imdb_dataset = TUDataset(root='/tmp/IMDB', name='IMDB-BINARY')

# Create a new list to store modified graphs
modified_graphs = []

# Add features if not present
for data in imdb_dataset:
    if data.x is None:
        # Add a dummy feature (e.g., all ones)
        num_nodes = data.num_nodes
        one_features = torch.ones((num_nodes, 3))
        data.x = one_features
    modified_graphs.append(data)



random.shuffle(modified_graphs)

split_idx = int(len(modified_graphs) * 0.8)
train_dataset = modified_graphs[:split_idx]
test_dataset = modified_graphs[split_idx:]



train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model = GCN_graph(num_node_features=3, num_classes=2)
model.to(device)


optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)


for epoch in range(400):
    model.train()
    for batch in train_loader:
        batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = F.nll_loss(out, batch.y)
        loss.backward()
        optimizer.step()
    # print(f"Epoch {epoch} | Loss: {loss.item()}")


model.eval()

train_accuracy = evaluate(train_loader)
test_accuracy = evaluate(test_loader)


accuracies = {
    'train_accuracy': train_accuracy,
    'test_accuracy': test_accuracy
}

print(f"Accuracies: {accuracies}")
imdb_acc_GCN = accuracies

#### ENZYME:

In [None]:
enzymes_dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')

In [None]:
train_dataset, test_dataset = train_test_split(enzymes_dataset, test_size=0.2, random_state=42)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model instantiation
model = GCN_graph(num_node_features=3, num_classes=6)
model.to(device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)



for epoch in range(400):
    model.train()
    for batch in train_loader:
        batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = F.nll_loss(out, batch.y)
        loss.backward()
        optimizer.step()
    #print(f"Epoch {epoch} | Loss: {loss.item()}")



model.eval()

train_accuracy = evaluate(train_loader)
test_accuracy = evaluate(test_loader)


accuracies = {
    'train_accuracy': train_accuracy,
    'test_accuracy': test_accuracy
}

print(f"Accuracies: {accuracies}")
enzyme_acc_GCN = accuracies

#### LRGB

In [None]:
pep_train = LRGBDataset(root='path/to/data', name='Peptides-func', split='train')
pep_test = LRGBDataset(root='path/to/data', name='Peptides-func', split='test')

In [None]:
train_loader = DataLoader(pep_train, batch_size=32, shuffle=True)
test_loader = DataLoader(pep_test, batch_size=32, shuffle=False)

In [None]:

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model instantiation
model = GCN_graph(9, 10)
model.to(device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

criterion = torch.nn.CrossEntropyLoss()

#Train the model

for epoch in range(400):
    model.train()
    for batch in train_loader:
        batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        if batch.y.ndim > 1:
            target = batch.y.argmax(dim=1)
        else:
            target = batch.y

        loss = F.nll_loss(out, target)

        loss.backward()
        optimizer.step()
    if epoch%50 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item()}")




model.eval()

train_accuracy = evaluate(train_loader)
test_accuracy = evaluate(test_loader)

# Save the accuracies in a dictionary
accuracies = {
    'train_accuracy': train_accuracy,
    'test_accuracy': test_accuracy
}

print(f"Accuracies: {accuracies}")
pascal_acc_GCN = accuracies

### GIN

### CORA

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GIN_Node(dataset.num_features, dataset.num_classes).to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(500):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    



model.eval()
pred = model(data).argmax(dim=1)  # Predictions for all nodes
train_correct = (pred[data.train_mask] == data.y[data.train_mask]).sum().item()  
train_acc = train_correct / data.train_mask.sum().item()  

# Evaluation for test data
test_correct = (pred[data.test_mask] == data.y[data.test_mask]).sum().item()  
test_acc = test_correct / data.test_mask.sum().item()  

#
cora_GIN = {
    'train_accuracy': train_acc,
    'test_accuracy': test_acc
}
cora_GIN

#### IMDB

In [None]:
imdb_dataset = TUDataset(root='/tmp/IMDB', name='IMDB-BINARY')

In [None]:
modified_graphs = []


for data in imdb_dataset:
    if data.x is None:
        
        num_nodes = data.num_nodes
        one_features = torch.ones((num_nodes, 3))
        data.x = one_features
    modified_graphs.append(data)




random.shuffle(modified_graphs)

split_idx = int(len(modified_graphs) * 0.8)
train_dataset = modified_graphs[:split_idx]
test_dataset = modified_graphs[split_idx:]



train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model = GIN_Graph(3, 2)
model.to(device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Train the model
for epoch in range(400):
    model.train()
    for batch in train_loader:
        batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = F.nll_loss(out, batch.y)
        loss.backward()
        optimizer.step()
        
    
    


model.eval()

train_accuracy = evaluate(train_loader)
test_accuracy = evaluate(test_loader)

# Save the accuracies in a dictionary
accuracies = {
    'train_accuracy': train_accuracy,
    'test_accuracy': test_accuracy
}
imdb_acc_GIN = accuracies

print(f"Accuracies: {accuracies}")

### ENZYME

In [None]:
enzyme_dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')

In [None]:
train_dataset, test_dataset = train_test_split(enzyme_dataset, test_size=0.2, random_state=42)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model instantiation
model = GIN_Graph(3, 6)
model.to(device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)


# Train the model
for epoch in range(400):
    model.train()
    for batch in train_loader:
        batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = F.nll_loss(out, batch.y)
        loss.backward()
        optimizer.step()
        
    
    

# Evaluate the model

model.eval()

train_accuracy = evaluate(train_loader)
test_accuracy = evaluate(test_loader)

# Save the accuracies in a dictionary
accuracies = {
    'train_accuracy': train_accuracy,
    'test_accuracy': test_accuracy
}

print(f"Accuracies: {accuracies}")
enzyme_acc_GIN = accuracies

### LRGB

In [None]:
pep_train = LRGBDataset(root='path/to/data', name='Peptides-func', split='train')
pep_test = LRGBDataset(root='path/to/data', name='Peptides-func', split='test')

train_loader = DataLoader(pep_train, batch_size=32, shuffle=True)
test_loader = DataLoader(pep_test, batch_size=32, shuffle=False)

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model instantiation
model = GIN_Graph(9, 10)
model.to(device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

criterion = torch.nn.CrossEntropyLoss()

#Train the model
for epoch in range(400):
    model.train()
    for batch in train_loader:
        batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        if batch.y.ndim > 1:
            target = batch.y.argmax(dim=1)
        else:
            target = batch.y

        loss = F.nll_loss(out, target)

        # loss = F.nll_loss(out, batch.y)
        loss.backward()
        optimizer.step()
    if epoch%50 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item()}")



model.eval()

train_accuracy = evaluate(train_loader)
test_accuracy = evaluate(test_loader)

# Save the accuracies in a dictionary
accuracies = {
    'train_accuracy': train_accuracy,
    'test_accuracy': test_accuracy
}

print(f"Accuracies: {accuracies}")
pascal_acc_GIN = accuracies

## GATv2

### CORA

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GATv2(dataset.num_node_features, dataset.num_classes).to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(500):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    

model.eval()
pred = model(data).argmax(dim=1)  # Predictions for all nodes
train_correct = (pred[data.train_mask] == data.y[data.train_mask]).sum().item()  
train_acc = train_correct / data.train_mask.sum().item()


# Evaluation for test data
test_correct = (pred[data.test_mask] == data.y[data.test_mask]).sum().item()  
test_acc = test_correct / data.test_mask.sum().item()  

#
cora_GAT = {
    'train_accuracy': train_acc,
    'test_accuracy': test_acc
}
cora_GAT

### IMDB

In [None]:
modified_graphs = []

# Add features if not present
for data in imdb_dataset:
    if data.x is None:
        # Add a dummy feature (e.g., all ones)
        num_nodes = data.num_nodes
        one_features = torch.ones((num_nodes, 3))
        data.x = one_features
    modified_graphs.append(data)

random.shuffle(modified_graphs)

split_idx = int(len(modified_graphs) * 0.8)
train_dataset = modified_graphs[:split_idx]
test_dataset = modified_graphs[split_idx:]



train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model = GATv2_Graph(num_node_features=3, num_classes=2)
model.to(device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Train the model
for epoch in range(400):
    model.train()
    for batch in train_loader:
        batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = F.nll_loss(out, batch.y)
        loss.backward()
        optimizer.step()
        
    


model.eval()

train_accuracy = evaluate(train_loader)
test_accuracy = evaluate(test_loader)

# Save the accuracies in a dictionary
accuracies = {
    'train_accuracy': train_accuracy,
    'test_accuracy': test_accuracy
}
imdb_acc_GAT = accuracies

print(f"Accuracies: {accuracies}")

### ENZYME

In [None]:
train_dataset, test_dataset = train_test_split(enzyme_dataset, test_size=0.2, random_state=42)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model instantiation
model = GATv2_Graph(num_node_features=3, num_classes=6)
model.to(device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)


# Train the model
for epoch in range(400):
    model.train()
    for batch in train_loader:
        batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = F.nll_loss(out, batch.y)
        loss.backward()
        optimizer.step()
    #print(f"Epoch {epoch} | Loss: {loss.item()}")

# Evaluate the model

model.eval()

train_accuracy = evaluate(train_loader)
test_accuracy = evaluate(test_loader)

# Save the accuracies in a dictionary
accuracies = {
    'train_accuracy': train_accuracy,
    'test_accuracy': test_accuracy
}

enzyme_acc_GAT = accuracies
print(f"Accuracies: {accuracies}")

#### LRGB

In [None]:
train_loader = DataLoader(pep_train, batch_size=32, shuffle=True)
test_loader = DataLoader(pep_test, batch_size=32, shuffle=False)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model instantiation
model = GATv2_Graph(num_node_features=9, num_classes=10)
model.to(device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

criterion = torch.nn.CrossEntropyLoss()

#Train the model
for epoch in range(400):
    model.train()
    for batch in train_loader:
        batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        if batch.y.ndim > 1:
            target = batch.y.argmax(dim=1)
        else:
            target = batch.y

        loss = F.nll_loss(out, target)

        # loss = F.nll_loss(out, batch.y)
        loss.backward()
        optimizer.step()
    if epoch%50 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item()}")
    



model.eval()

train_accuracy = evaluate(train_loader)
test_accuracy = evaluate(test_loader)

# Save the accuracies in a dictionary
accuracies = {
    'train_accuracy': train_accuracy,
    'test_accuracy': test_accuracy
}

print(f"Accuracies: {accuracies}")
pascal_acc_GAT = accuracies

### GPS

### CORA

In [None]:

dataset_path = '/tmp/Cora/Cora/' 
processed_path = os.path.join(dataset_path, 'processed')

# Check if the processed directory exists
if os.path.exists(processed_path):
    print("Dataset is already loaded. Deleting processed files...")
    shutil.rmtree(processed_path)
    print("Processed files deleted.")
else:
    print("Dataset not found or not loaded yet.")

transform = T.AddRandomWalkPE(10)
dataset = Planetoid(root='/tmp/Cora', name='Cora', pre_transform=transform)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GPS_Node(dataset.num_node_features, dataset.num_classes).to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()

for epoch in range(200):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    

model.eval()
_, pred = model(data).max(dim=1)
train_correct = (pred[data.train_mask] == data.y[data.train_mask]).sum().item()  
train_acc = train_correct / data.train_mask.sum().item()  

# Evaluation for test data
test_correct = (pred[data.test_mask] == data.y[data.test_mask]).sum().item()  
test_acc = test_correct / data.test_mask.sum().item()  

#
accuracy = {
    'train_accuracy': train_acc,
    'test_accuracy': test_acc
}

cora_GPS = accuracy
cora_GPS

### IMDB

In [None]:
dataset_path = '/tmp/IMDB/IMDB-BINARY/'
processed_path = os.path.join(dataset_path, 'processed')

# Check if the processed directory exists
if os.path.exists(processed_path):
    print("Dataset is already loaded. Deleting processed files...")
    shutil.rmtree(processed_path)
    print("Processed files deleted.")
else:
    print("Dataset not found or not loaded yet.")

transform = T.AddRandomWalkPE(10)

imdb_dataset = TUDataset(root='/tmp/IMDB', name='IMDB-BINARY', pre_transform=transform)
modified_graphs = []

for data in imdb_dataset:
    if data.x is None:
        
        num_nodes = data.num_nodes
        one_features = torch.ones((num_nodes, 3))
        data.x = one_features
    modified_graphs.append(data)

random.shuffle(modified_graphs)

split_idx = int(len(modified_graphs) * 0.8)
train_dataset = modified_graphs[:split_idx]
test_dataset = modified_graphs[split_idx:]

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
model = GPS_Graph(3, 2)
model.to(device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Train the model
for epoch in range(400):
    model.train()
    for batch in train_loader:
        batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = F.nll_loss(out, batch.y)
        loss.backward()
        optimizer.step()
    #print(f"Epoch {epoch} | Loss: {loss.item()}")
    


model.eval()

train_accuracy = evaluate(train_loader)
test_accuracy = evaluate(test_loader)


accuracies = {
    'train_accuracy': train_accuracy,
    'test_accuracy': test_accuracy
}
imdb_acc_GPS = accuracies

print(f"Accuracies: {accuracies}")

### ENZYME

In [None]:
dataset_path = '/tmp/ENZYMES/ENZYMES'
processed_path = os.path.join(dataset_path, 'processed')

# Check if the processed directory exists
if os.path.exists(processed_path):
    print("Dataset is already loaded. Deleting processed files...")
    shutil.rmtree(processed_path)
    print("Processed files deleted.")
else:
    print("Dataset not found or not loaded yet.")

transform = T.AddRandomWalkPE(10)

enzyme_dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', pre_transform=transform)


In [None]:
train_dataset, test_dataset = train_test_split(enzyme_dataset, test_size=0.2, random_state=42)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
model = GPS_Graph(3, 6)
model.to(device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Train the model
for epoch in range(400):
    model.train()
    for batch in train_loader:
        batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = F.nll_loss(out, batch.y)
        loss.backward()
        optimizer.step()
        
        
    
    


model.eval()

train_accuracy = evaluate(train_loader)
test_accuracy = evaluate(test_loader)


accuracies = {
    'train_accuracy': train_accuracy,
    'test_accuracy': test_accuracy
}
enzyme_acc_GPS = accuracies

print(f"Accuracies: {accuracies}")

## LRGB

In [None]:
dataset_path = 'path/to/data/peptides-func/'
processed_path = os.path.join(dataset_path, 'processed')

# Check if the processed directory exists
if os.path.exists(processed_path):
    print("Dataset is already loaded. Deleting processed files...")
    shutil.rmtree(processed_path)
    print("Processed files deleted.")
else:

    print("Dataset not found or not loaded yet.")

transform = T.AddRandomWalkPE(10)

pep_train = LRGBDataset(root='path/to/data', name='Peptides-func', split='train', pre_transform=transform)
pep_test = LRGBDataset(root='path/to/data', name='Peptides-func', split='test', pre_transform=transform)

In [None]:
train_loader = DataLoader(pep_train, batch_size=32, shuffle=True)
test_loader = DataLoader(pep_test, batch_size=32, shuffle=False)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model instantiation
model = GPS_Graph(9, 10)
model.to(device)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

criterion = torch.nn.CrossEntropyLoss()

#Train the model
for epoch in range(400):
    model.train()
    for batch in train_loader:
        batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        if batch.y.ndim > 1:
            target = batch.y.argmax(dim=1)
        else:
            target = batch.y

        loss = F.nll_loss(out, target)

        loss.backward()
        optimizer.step()
    #print(f"Epoch {epoch} | Loss: {loss.item()}")



model.eval()

train_accuracy = evaluate(train_loader)
test_accuracy = evaluate(test_loader)

# Save the accuracies in a dictionary
accuracies = {
    'train_accuracy': train_accuracy,
    'test_accuracy': test_accuracy
}

print(f"Accuracies: {accuracies}")
pascal_acc_GPS = accuracies