In [None]:
from torch_geometric.datasets import ZINC

# Load ZINC dataset
dataset = ZINC(root='/Users/nasibhuseynzade/Downloads/ZINC_dataset')  # Use subset for smaller size
print(f"Dataset size: {len(dataset)}")
print(dataset[0])  # Print the first graph


In [None]:
import pickle
with open('/Users/nasibhuseynzade/Downloads/zinc_dataset.pkl','rb') as f:
    dataset = pickle.load(f)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.loader import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np
from tqdm import tqdm

class GNN(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = nn.Linear(hidden_channels, 1)

    def forward(self, x, edge_index, batch):
        # Ensure x is float
        x = x.float()
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        
        x = self.conv3(x, edge_index)
        x = global_mean_pool(x, batch)
        x = self.lin(x)
        return x

def train_model(model, train_loader, optimizer, device):
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        # Convert features and target to float
        data.x = data.x.float()
        data.y = data.y.float().view(-1, 1)
        
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = F.mse_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
    return total_loss / len(train_loader.dataset)

def evaluate(model, loader, device):
    model.eval()
    y_true = []
    y_pred = []
    
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            # Convert features and target to float
            data.x = data.x.float()
            data.y = data.y.float().view(-1, 1)
            
            out = model(data.x, data.edge_index, data.batch)
            y_true.append(data.y.cpu().numpy())
            y_pred.append(out.cpu().numpy())
    
    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    r2_value = r2_score(y_true, y_pred)
    
    return r2_value


num_epochs=100
batch_size=32
hidden_channels=64
learning_rate=0.001

for data in dataset:
    data.x = data.x.float()
    data.y = data.y.float()
    
    # Split dataset
train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=42)
train_dataset, val_dataset = train_test_split(train_dataset, test_size=0.2, random_state=42)

    # Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

    # Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
    
model = GNN(num_node_features=dataset[0].x.shape[1], hidden_channels=hidden_channels).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', 
                                                         factor=0.5, patience=10)

best_val_score = 0
for epoch in tqdm(range(num_epochs)):
    train_loss = train_model(model, train_loader, optimizer, device)
    val_r2 = evaluate(model, val_loader, device)
        
    scheduler.step(val_r2)
        
    if val_r2 > best_val_score:
        best_val_score = val_r2
        torch.save(model.state_dict(), 'best_model.pt')
        
    if (epoch + 1) % 10 == 0:
        print(f'Epoch: {epoch+1:03d}')
        print(f'Train Loss: {train_loss:.4f}')
        print(f'Validation R²: {val_r2:.4f}')
        print('------------------------')

# Load best model and evaluate on test set
model.load_state_dict(torch.load('best_model.pt'))
test_r2 = evaluate(model, test_loader, device)
print(f'\nFinal Test R²: {test_r2:.4f}')
