## GNN-based FEA Surrogate Model
Experimenting with basic GNN architectures from the repaired dataset leveraging
[PyTorch Geometric](https://github.com/pyg-team/pytorch_geometric)

Using full dataset with batching.

Currently: GCN

In [9]:
# Imports
import os
import torch
from torch_geometric.data import Dataset
import glob
from torch_geometric.loader import DataLoader
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
import torch.optim as optim
from torch.utils.data import random_split

Prevent Memory Issues via Lazy Loading

In [None]:
# Custom Class for Loading Graphs from Disk
class ProcessedGraphDataset(Dataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(ProcessedGraphDataset, self).__init__(root, transform, pre_transform)
        self.processed_files = glob.glob(os.path.join(root, '*.pt'))
        
        if len(self.processed_files) > 0:
            # Load sample graph for props
            sample_graph = torch.load(self.processed_files[0])
            self._num_node_features = sample_graph.num_node_features
            self._num_classes = 1  # Assuming regression
        else:
            raise ValueError("No processed graph files found.")
    
    @property
    def num_node_features(self):
        return self._num_node_features
    
    @property
    def num_classes(self):
        return self._num_classes
    
    def len(self):
        return len(self.processed_files)
    
    def get(self, idx):
        graph = torch.load(self.processed_files[idx])
        return graph

In [None]:
from torch.utils.data import random_split

# Initialize the dataset
dataset = ProcessedGraphDataset(root='../data/processed/processed_graphs')

# Split
train_length = int(len(dataset) * 0.8)
test_length = len(dataset) - train_length

train_dataset, test_dataset = random_split(dataset, [train_length, test_length])

In [16]:
# Same Model (GCNConv)
class QualityPredictor(torch.nn.Module):
    def __init__(self, num_node_features):
        super(QualityPredictor, self).__init__()
        # Define layers
        self.conv1 = GCNConv(in_channels=num_node_features, out_channels=64)
        self.conv2 = GCNConv(in_channels=64, out_channels=32)
        self.fc = torch.nn.Linear(in_features=32, out_features=1)
        
    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        
        # Apply GCN layers with ReLU activation
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        
        # Global mean pooling to get graph-level embeddings
        x = global_mean_pool(x, batch)
        
        # Final fully connected layer
        x = self.fc(x)
        
        # Output is a single value per graph
        return x.view(-1)

In [None]:
# Initialize the dataset
dataset = ProcessedGraphDataset(root='../data/processed/processed_graphs')

# Split the dataset
train_length = int(len(dataset) * 0.8)
test_length = len(dataset) - train_length
train_dataset, test_dataset = random_split(dataset, [train_length, test_length])

# Create DataLoaders
batch_size = 4
num_workers = 0 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [28]:
# Initialize the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = QualityPredictor(num_node_features=dataset.num_node_features).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Switch to L1 loss
criterion = torch.nn.L1Loss()
num_epochs = 10

In [29]:
# Learning Rate Scheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau
scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.5, verbose=True)

# Variables for early stopping
best_val_loss = float('inf')
patience_counter = 0
max_patience = 10

# Store loss values
training_losses = []
validation_losses = []

#### Sample Training Loop

Time: 472m 36.9s

In [32]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    # Training phase
    model.train()
    total_train_loss = 0
    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out, batch.y.view(-1))
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item() * batch.num_graphs
        del batch
        torch.cuda.empty_cache()
    avg_train_loss = total_train_loss / len(train_loader.dataset)
    training_losses.append(avg_train_loss)
    
    # Validation phase
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for batch in test_loader:
            batch = batch.to(device)
            out = model(batch)
            val_loss = criterion(out, batch.y.view(-1))
            total_val_loss += val_loss.item() * batch.num_graphs
            del batch
            torch.cuda.empty_cache()
    avg_val_loss = total_val_loss / len(test_loader.dataset)
    validation_losses.append(avg_val_loss)
    
    print(f'Epoch {epoch+1}, Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}')
    
    # Step the scheduler
    scheduler.step(avg_val_loss)
    
    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), 'best_model.pth')
    else:
        patience_counter += 1
        if patience_counter >= max_patience:
            print("Early stopping triggered.")
            break

KeyboardInterrupt: 

#### Results

Epoch 1, Training Loss: 3.2134, Validation Loss: 3.1544
Epoch 2, Training Loss: 3.1943, Validation Loss: 3.1501
Epoch 3, Training Loss: 3.1926, Validation Loss: 3.1477
Epoch 4, Training Loss: 3.1911, Validation Loss: 3.1485
Epoch 5, Training Loss: 3.1896, Validation Loss: 3.1479
Epoch 6, Training Loss: 3.1888, Validation Loss: 3.1453
Epoch 7, Training Loss: 3.1880, Validation Loss: 3.1442
Epoch 8, Training Loss: 3.1871, Validation Loss: 3.1458
Epoch 9, Training Loss: 3.1866, Validation Loss: 3.1470
Epoch 10, Training Loss: 3.1860, Validation Loss: 3.1427

In [31]:
from sklearn.metrics import mean_absolute_error

model.eval()
true_values = []
predicted_values = []
with torch.no_grad():
    for data in test_loader:
        data = data.to(device)
        output = model(data)
        true_values.extend(data.y.cpu().numpy())
        predicted_values.extend(output.cpu().numpy())

mae = mean_absolute_error(true_values, predicted_values)
print(f"Test MAE: {mae:.4f}")

Test MAE: 3.1427
