In [None]:
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.loader import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np
from tqdm import tqdm

with open('/Users/nasibhuseynzade/Downloads/zinc_dataset.pkl','rb') as f:
    dataset = pickle.load(f)

class GNN(torch.nn.Module):
    def __init__(self, num_node_features, hidden_dim=64, depth=3):
        super(GNN, self).__init__()
        
        # Initialize GCN layers
        self.convs = nn.ModuleList()
        self.convs.append(GCNConv(num_node_features, hidden_dim))  # First layer
        for _ in range(depth - 1):
            self.convs.append(GCNConv(hidden_dim, hidden_dim))  # Hidden layers
        
        # Linear layer for output
        self.lin = nn.Linear(hidden_dim, 1)

    def forward(self, x, edge_index, batch):
        x = x.float()  # Ensure input is float

        # Pass through each GCN layer
        for conv in self.convs:
            x = conv(x, edge_index)
            x = F.relu(x)
            x = F.dropout(x, p=0.5, training=self.training)
        
        # Global mean pooling and linear layer
        x = global_mean_pool(x, batch)
        x = self.lin(x)
        return x



In [None]:
def train_test_model(model, dataset,num_epochs=4, batch_size=32, learning_rate=0.0005):

    train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=42)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    r2_values=[]

    for epoch in tqdm(range(num_epochs)):
 
        model.train()
        total_loss = 0
        
        for data in train_loader:
            data = data.to(device)
            # Convert features and target to float
            data.x = data.x.float()
            data.y = data.y.float().view(-1, 1)
        
            optimizer.zero_grad()
            out = model(data.x, data.edge_index, data.batch)
            loss = F.mse_loss(out, data.y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * data.num_graphs

        model.eval()
        y_true = []
        y_pred = []
    
        with torch.no_grad():
            for data in test_loader:
                data = data.to(device)
                # Convert features and target to float
                data.x = data.x.float()
                data.y = data.y.float().view(-1, 1)
            
                out = model(data.x, data.edge_index, data.batch)
                y_true.append(data.y.cpu().numpy())
                y_pred.append(out.cpu().numpy())
    
        y_true = np.concatenate(y_true)
        y_pred = np.concatenate(y_pred)

        r2_value = r2_score(y_true, y_pred)
        r2_values.append(r2_value)

        print(f'Epoch {epoch+1}/{num_epochs}, R2 Value: {r2_value:.4f}')

    
    return r2_values



In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GNN(num_node_features=dataset[0].x.shape[1], hidden_dim=64).to(device)
r2_values = train_test_model(model, dataset, num_epochs=4)