In [None]:
import torch
from torch_geometric.datasets import AirfRANS
from torch_geometric.loader import DataLoader # We'll use this later for batching

# Define a root directory for the dataset
# This is where the dataset files will be downloaded and stored.
DATA_ROOT = './data/AirfRANS'


In [None]:
# Load the AirfRANS dataset
# 'task="full"' means we're using the full dataset for interpolation tasks (learning general behavior).
# 'train=True' loads the training split, 'train=False' loads the test split.
# We'll apply graph construction and normalization transforms in the next steps.
print("Loading AirfRANS training dataset...")
train_dataset = AirfRANS(root=DATA_ROOT, task='full', train=True, transform=None) # transform=None for now
print(f"Loaded {len(train_dataset)} training samples.")

print("Loading AirfRANS test dataset...")
test_dataset = AirfRANS(root=DATA_ROOT, task='full', train=False, transform=None) # transform=None for now
print(f"Loaded {len(test_dataset)} test samples.")

# Let's inspect the first data sample from the training set
print("\nInspecting the first training data sample:")
first_data_sample = train_dataset[0]
print(first_data_sample)

Loading AirfRANS training dataset...


Downloading https://data.isir.upmc.fr/extrality/pytorch_geometric/AirfRANS.zip
Extracting data\AirfRANS\raw\AirfRANS.zip
Processing...
Done!


Loaded 800 training samples.
Loading AirfRANS test dataset...


Processing...
Done!


Loaded 200 test samples.

Inspecting the first training data sample:
Data(x=[181794, 5], y=[181794, 4], pos=[181794, 2], surf=[181794], name='airFoil2D_SST_36.622_11.319_3.941_5.424_1.0_16.283')


In [None]:
# Understanding the Data Object:
# PyTorch Geometric's Data object is a simple container for graph data.
# It typically contains:
# - data.x: Node feature matrix with shape [num_nodes, num_node_features].
#           For AirfRANS, this includes inlet velocity (vx, vy), distance to airfoil, and airfoil normals (nx, ny). (5 features)
# - data.pos: Node position matrix with shape [num_nodes, num_dimensions].
#             For AirfRANS, this is [num_nodes, 2] for (x, y) coordinates.
# - data.y: Node target matrix with shape [num_nodes, num_node_labels].
#           For AirfRANS, this includes velocity (vx, vy), pressure/specific_mass, and turbulent_kinematic_viscosity. (4 targets)
# - data.edge_index: Graph connectivity in COO format with shape [2, num_edges].
#                    **IMPORTANT:** AirfRANS initially does NOT have `edge_index` by default (it's a point cloud). We'll add this in the next step!
# - data.num_nodes: The number of nodes in the graph.
# - data.num_edges: The number of edges in the graph. (Will be 0 for now, until we add edges)

print(f"Shape of node features (data.x): {first_data_sample.x.shape}")
print(f"Shape of node positions (data.pos): {first_data_sample.pos.shape}")
print(f"Shape of node targets (data.y): {first_data_sample.y.shape}")
print(f"Number of nodes in the first sample: {first_data_sample.num_nodes}")
print(f"Number of edges in the first sample (initially 0): {getattr(first_data_sample, 'num_edges', 0)}") # Safely access num_edges

# Check the number of original node features and output targets
print(f"\nNumber of original node features (in_channels): {train_dataset.num_node_features}")
print(f"Number of original output targets (out_channels): {train_dataset[0].y.shape[1]}")


Shape of node features (data.x): torch.Size([181794, 5])
Shape of node positions (data.pos): torch.Size([181794, 2])
Shape of node targets (data.y): torch.Size([181794, 4])
Number of nodes in the first sample: 181794
Number of edges in the first sample (initially 0): 0

Number of original node features (in_channels): 5
Number of original output targets (out_channels): 4


In [None]:
from torch_geometric.transforms import Compose, KNNGraph, RadiusGraph
from sklearn.preprocessing import StandardScaler
import numpy as np
import torch
from torch_geometric.datasets import AirfRANS
from torch_geometric.loader import DataLoader # Make sure DataLoader is imported for later use

# Define a root directory for the dataset (ensure this is defined in your notebook)
# DATA_ROOT = './data/AirfRANS'

# --- Custom Normalization Class ---
class CustomNormalize(object):
    def __init__(self, x_scaler, y_scaler):
        self.x_scaler = x_scaler
        self.y_scaler = y_scaler

    def __call__(self, data):
        # Ensure data is on CPU before converting to numpy for StandardScaler
        # This is important if your original data.x and data.y are already on GPU
        data.x = torch.tensor(self.x_scaler.transform(data.x.cpu().numpy()), dtype=torch.float)
        data.y = torch.tensor(self.y_scaler.transform(data.y.cpu().numpy()), dtype=torch.float)
        return data

# --- Data Loading and Preprocessing Function ---
def load_and_preprocess_airfrans(graph_transform_type='knn', k=10, r=0.1):
    print(f"Loading AirfRANS dataset with {graph_transform_type} graph transform...")

    if graph_transform_type == 'knn':
        graph_transform = KNNGraph(k=k)
        print(f"Using KNNGraph with k={k}")
    elif graph_transform_type == 'radius':
        graph_transform = RadiusGraph(r=r)
        print(f"Using RadiusGraph with r={r}")
    else:
        raise ValueError("graph_transform_type must be 'knn' or 'radius'")

    # Step 1: Load initial datasets WITHOUT transforms to fit scalers on raw data
    # We only fit scalers on the TRAINING data to avoid data leakage.
    initial_train_dataset = AirfRANS(root=DATA_ROOT, task='full', train=True, transform=None)

    # Initialize scalers
    x_scaler = StandardScaler()
    y_scaler = StandardScaler()

    print("Fitting scalers incrementally using partial_fit()...")
    # Fit scalers incrementally on training data to avoid MemoryError
    for i, data in enumerate(initial_train_dataset):
        # partial_fit expects a 2D array, so we pass data.x.numpy() and data.y.numpy() directly
        x_scaler.partial_fit(data.x.numpy())
        y_scaler.partial_fit(data.y.numpy())
        if (i + 1) % 100 == 0 or (i + 1) == len(initial_train_dataset):
            print(f"  Processed {i + 1}/{len(initial_train_dataset)} samples for scaler fitting.")

    # Create the custom normalization transform using the fitted scalers
    normalize_transform = CustomNormalize(x_scaler, y_scaler)

    # Step 2: Compose final transforms (graph construction + normalization)
    train_transform_final = Compose([graph_transform, normalize_transform])
    test_transform_final = Compose([graph_transform, normalize_transform]) # Use train_scalers for test data!

    # Step 3: Re-load datasets with both graph construction and normalization transforms applied
    print("Applying graph construction and normalization to datasets...")
    train_dataset_preprocessed = AirfRANS(root=DATA_ROOT, task='full', train=True, transform=train_transform_final)
    test_dataset_preprocessed = AirfRANS(root=DATA_ROOT, task='full', train=False, transform=test_transform_final)

    print(f"Loaded {len(train_dataset_preprocessed)} training samples after preprocessing.")
    print(f"Loaded {len(test_dataset_preprocessed)} test samples after preprocessing.")

    return train_dataset_preprocessed, test_dataset_preprocessed, x_scaler, y_scaler

# --- Execute the preprocessing for Day 1 ---
# Define DATA_ROOT here again in case you are running this in a fresh cell
DATA_ROOT = './data/AirfRANS'

# We'll use KNNGraph for the baseline (k=10 as a good starting point)
train_dataset_preprocessed, test_dataset_preprocessed, x_scaler_day1, y_scaler_day1 = load_and_preprocess_airfrans(graph_transform_type='knn', k=10)
#  Original:
# train_dataset_preprocessed, test_dataset_preprocessed, x_scaler_day1, y_scaler_day1 = load_and_preprocess_airfrans(graph_transform_type='knn', k=10)

# MODIFIED for faster testing (e.g., use 5 training samples and 2 test samples)
# Slicing the dataset will create a Subset object.
# train_dataset_subset = train_dataset_preprocessed[:5]
# test_dataset_subset = test_dataset_preprocessed[:2]
# Inspect a sample again to see the added edges and normalized data
print("\nInspecting the first training data sample AFTER preprocessing:")
first_data_sample_preprocessed = train_dataset_preprocessed[0]
print(first_data_sample_preprocessed)

print(f"Number of nodes in the first sample: {first_data_sample_preprocessed.num_nodes}")
print(f"Number of edges in the first sample: {first_data_sample_preprocessed.num_edges}")
print(f"Shape of normalized node features (data.x): {first_data_sample_preprocessed.x.shape}")
print(f"Shape of normalized node targets (data.y): {first_data_sample_preprocessed.y.shape}")

Loading AirfRANS dataset with knn graph transform...
Using KNNGraph with k=10
Fitting scalers incrementally using partial_fit()...
  Processed 100/800 samples for scaler fitting.
  Processed 200/800 samples for scaler fitting.
  Processed 300/800 samples for scaler fitting.
  Processed 400/800 samples for scaler fitting.
  Processed 500/800 samples for scaler fitting.
  Processed 600/800 samples for scaler fitting.
  Processed 700/800 samples for scaler fitting.
  Processed 800/800 samples for scaler fitting.
Applying graph construction and normalization to datasets...
Loaded 800 training samples after preprocessing.
Loaded 200 test samples after preprocessing.

Inspecting the first training data sample AFTER preprocessing:
Data(x=[181794, 5], y=[181794, 4], pos=[181794, 2], surf=[181794], name='airFoil2D_SST_36.622_11.319_3.941_5.424_1.0_16.283', edge_index=[2, 1817940])
Number of nodes in the first sample: 181794
Number of edges in the first sample: 1817940
Shape of normalized node f

In [None]:
# Slicing the dataset will create a Subset object.
train_dataset_subset = train_dataset_preprocessed[:5]
test_dataset_subset = test_dataset_preprocessed[:2]

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import torch # Ensure torch is imported

# --- Baseline GNN Model (GCN) ---
# This class defines a simple GCN architecture for node-level regression.
class GCNBaseline(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCNBaseline, self).__init__()
        # GCNConv is the Graph Convolutional Layer
        # It takes input features, applies a linear transformation, and aggregates features from neighbors.
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels) # Deeper network with 3 layers

        # Final linear layer to project the hidden features to the desired output dimension
        self.lin = nn.Linear(hidden_channels, out_channels)

    def forward(self, data):
        # data.x are node features, data.edge_index defines connections
        x, edge_index = data.x, data.edge_index

        # First GCN layer
        x = self.conv1(x, edge_index)
        x = F.relu(x) # Apply ReLU activation function
        x = F.dropout(x, p=0.5, training=self.training) # Apply dropout for regularization

        # Second GCN layer
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)

        # Third GCN layer
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)

        # Final linear projection to get the output targets
        x = self.lin(x)
        return x

# Instantiate the model
# We get in_channels and out_channels from our preprocessed dataset
# You might need to make sure 'train_dataset_preprocessed' is accessible in this cell
# (e.g., run the loading cell above this one, or pass it as an argument if in a function)
in_channels = train_dataset_preprocessed.num_node_features
out_channels = train_dataset_preprocessed[0].y.shape[1] # Using the corrected way to get output targets
hidden_channels = 64 # A common choice for hidden layer size

model = GCNBaseline(in_channels, hidden_channels, out_channels)

# Print the model architecture to confirm its structure
print(f"GNN Model Architecture:\n{model}")

# Set the device to GPU if available, otherwise CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device) # Move the model to the chosen device
print(f"Model moved to device: {device}")

GNN Model Architecture:
GCNBaseline(
  (conv1): GCNConv(5, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=4, bias=True)
)
Model moved to device: cpu


References:

torch_geometric.nn.GCNConv documentation: PyTorch Geometric Docs
https://www.google.com/search?q=https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html%23torch_geometric.nn.conv.GCNConv

Graph Convolutional Networks (original paper by Kipf and Welling): Semi-Supervised Classification with Graph Convolutional Networks
https://arxiv.org/abs/1609.02907

In [None]:
from torch_geometric.loader import DataLoader # Ensure DataLoader is imported
import torch.optim as optim # For the optimizer

# --- Create DataLoaders ---
# DataLoaders are essential for batching your graphs, which is crucial for
# efficient training and often for memory management, even with batch_size=1
# for very large graphs.
batch_size = 1 # Start with a small batch size, especially with large individual graphs
# train_loader = DataLoader(train_dataset_preprocessed, batch_size=batch_size, shuffle=True)
# test_loader = DataLoader(test_dataset_preprocessed, batch_size=batch_size, shuffle=False)
# Use these subsets in your DataLoader
train_loader = DataLoader(train_dataset_subset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset_subset, batch_size=batch_size, shuffle=False)

print(f"\nDataLoaders created with batch_size={batch_size}.")

# --- Training and Testing Functions ---
# We'll define functions to encapsulate the training and testing logic for one epoch.

def train():
    model.train() # Set the model to training mode (enables dropout, etc.)
    total_loss = 0
    for data in train_loader:
        data = data.to(device) # Move graph data to the same device as the model
        optimizer.zero_grad() # Clear gradients from the previous step
        out = model(data)     # Perform a forward pass
        loss = criterion(out, data.y) # Compute the loss (MSE between prediction and ground truth)
        loss.backward()       # Compute gradients (backpropagation)
        optimizer.step()      # Update model parameters
        total_loss += loss.item() * data.num_graphs # Accumulate loss, weighted by number of nodes in the graph
    return total_loss / len(train_loader.dataset) # Return average loss per node over the dataset

def test(loader):
    model.eval() # Set the model to evaluation mode (disables dropout, etc.)
    total_loss = 0
    with torch.no_grad(): # Disable gradient computation for efficiency and consistency
        for data in loader:
            data = data.to(device)
            out = model(data)
            loss = criterion(out, data.y)
            total_loss += loss.item() * data.num_graphs
    return total_loss / len(loader.dataset) # Return average loss per node over the dataset

# --- Configure Optimizer and Loss Function ---
# Ensure 'model' and 'device' are accessible from previous cells (run them first!)
# model = model.to(device) # This line was already executed in Step 4

optimizer = optim.Adam(model.parameters(), lr=0.001) # Adam optimizer is a good general choice
criterion = torch.nn.MSELoss() # Mean Squared Error is standard for regression tasks

print("\nOptimizer and Loss Function configured.")

# --- Training Loop and Initial Evaluation ---
epochs = 2 # Start with a small number of epochs for initial testing

print("\n--- Starting Training Loop ---")
print(f"Training for {epochs} epochs on {device}.")

# Store losses for plotting later (optional but good practice)
train_losses = []
test_losses = []

for epoch in range(1, epochs + 1):
    train_loss = train()
    test_loss = test(test_loader) # Evaluate on the test set
    train_losses.append(train_loss)
    test_losses.append(test_loss)
    print(f'Epoch: {epoch:03d}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')


print("Environment setup, data loading, initial preprocessing, baseline GNN model, and basic training loop implemented.")
print("You should see the test loss decreasing over epochs, indicating the model is learning!")


DataLoaders created with batch_size=1.

Optimizer and Loss Function configured.

--- Starting Training Loop ---
Training for 2 epochs on cpu.
Epoch: 001, Train Loss: 0.4781, Test Loss: 0.5047
Epoch: 002, Train Loss: 0.4759, Test Loss: 0.5060
Environment setup, data loading, initial preprocessing, baseline GNN model, and basic training loop implemented.
You should see the test loss decreasing over epochs, indicating the model is learning!


References

torch_geometric.loader.DataLoader documentation: PyTorch Geometric Docs
https://www.google.com/search?q=https://pytorch-geometric.readthedocs.io/en/latest/modules/loader.html%23torch_geometric.loader.DataLoader

PyTorch Optimizers (e.g., torch.optim.Adam): PyTorch Docs
https://pytorch.org/docs/stable/optim.html

PyTorch Loss Functions (e.g., torch.nn.MSELoss): PyTorch Docs
https://www.google.com/search?q=https://pytorch.org/docs/stable/nn.html%23loss-functions

In [None]:
import os
print(os.path.abspath('./data/AirfRANS'))

c:\Users\phani\OneDrive\Aerodynamic-_Prediction_with_GNN\data\AirfRANS


In [None]:
# Graph Construction & Model Refinement - Step 1: Graph Construction Comparison

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.datasets import AirfRANS
from torch_geometric.transforms import Compose, KNNGraph, RadiusGraph
from torch_geometric.loader import DataLoader
from sklearn.preprocessing import StandardScaler
import numpy as np
from torch_geometric.nn import GCNConv # Assuming GCNBaseline is still used for comparison

# Define a root directory for the dataset (ensure this is defined in your notebook)
# DATA_ROOT = './data/AirfRANS'

# --- Custom Normalization Class (from previous) ---
class CustomNormalize(object):
    def __init__(self, x_scaler, y_scaler):
        self.x_scaler = x_scaler
        self.y_scaler = y_scaler

    def __call__(self, data):
        data.x = torch.tensor(self.x_scaler.transform(data.x.cpu().numpy()), dtype=torch.float)
        data.y = torch.tensor(self.y_scaler.transform(data.y.cpu().numpy()), dtype=torch.float)
        return data

# --- Data Loading and Preprocessing Function (Modified fromprevious/Fix for MemoryError) ---
# This function now also returns the scalers, which are needed for denormalization later
def load_and_preprocess_airfrans(graph_transform_type='knn', k=10, r=0.1, data_root='./data/AirfRANS'):
    print(f"Loading AirfRANS dataset with {graph_transform_type} graph transform...")

    if graph_transform_type == 'knn':
        graph_transform = KNNGraph(k=k)
        print(f"Using KNNGraph with k={k}")
    elif graph_transform_type == 'radius':
        graph_transform = RadiusGraph(r=r)
        print(f"Using RadiusGraph with r={r}")
    else:
        raise ValueError("graph_transform_type must be 'knn' or 'radius'")

    initial_train_dataset = AirfRANS(root=data_root, task='full', train=True, transform=None)

    x_scaler = StandardScaler()
    y_scaler = StandardScaler()

    print("Fitting scalers incrementally using partial_fit()...")
    for i, data in enumerate(initial_train_dataset):
        x_scaler.partial_fit(data.x.numpy())
        y_scaler.partial_fit(data.y.numpy())
        if (i + 1) % 100 == 0 or (i + 1) == len(initial_train_dataset):
            print(f"  Processed {i + 1}/{len(initial_train_dataset)} samples for scaler fitting.")

    normalize_transform = CustomNormalize(x_scaler, y_scaler)

    train_transform_final = Compose([graph_transform, normalize_transform])
    test_transform_final = Compose([graph_transform, normalize_transform])

    train_dataset_preprocessed = AirfRANS(root=data_root, task='full', train=True, transform=train_transform_final)
    test_dataset_preprocessed = AirfRANS(root=data_root, task='full', train=False, transform=test_transform_final)

    print(f"Loaded {len(train_dataset_preprocessed)} training samples after preprocessing.")
    print(f"Loaded {len(test_dataset_preprocessed)} test samples after preprocessing.")

    return train_dataset_preprocessed, test_dataset_preprocessed, x_scaler, y_scaler

# --- Baseline GNN Model (GCN, from previous - used for comparison) ---
class GCNBaseline(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=3, dropout_rate=0.5):
        super(GCNBaseline, self).__init__()
        self.num_layers = num_layers
        self.dropout_rate = dropout_rate

        self.convs = nn.ModuleList()
        self.convs.append(GCNConv(in_channels, hidden_channels))
        for _ in range(num_layers - 1):
            self.convs.append(GCNConv(hidden_channels, hidden_channels))

        self.lin = nn.Linear(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            x = F.relu(x)
            if i < self.num_layers - 1:
                x = F.dropout(x, p=self.dropout_rate, training=self.training)
        x = self.lin(x)
        return x

# --- Training and Testing Functions (from previous) ---
def train_model(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
    return total_loss / len(train_loader.dataset)

def test_model(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            out = model(data)
            loss = criterion(out, data.y)
            total_loss += loss.item() * data.num_graphs
    return total_loss / len(loader.dataset)

# --- Main Execution  ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Define DATA_ROOT (ensure this matches your environment)
DATA_ROOT = './data/AirfRANS'

# --- Experiment 1.1: KNNGraph (k=10, from Dprevious) ---
print("\n--- Experiment 1.1: KNNGraph (k=10) ---")
train_dataset_knn10, test_dataset_knn10, _, _ = load_and_preprocess_airfrans(graph_transform_type='knn', k=10, data_root=DATA_ROOT)

# Analyze graph properties
def analyze_graph_properties(dataset, name):
    num_nodes_total = 0
    num_edges_total = 0
    for data in dataset:
        num_nodes_total += data.num_nodes
        num_edges_total += data.num_edges
    avg_nodes_per_graph = num_nodes_total / len(dataset)
    avg_edges_per_graph = num_edges_total / len(dataset)
    avg_degree = (num_edges_total / num_nodes_total) if num_nodes_total > 0 else 0
    print(f"--- {name} Graph Properties ---")
    print(f"Total graphs: {len(dataset)}")
    print(f"Average nodes per graph: {avg_nodes_per_graph:.2f}")
    print(f"Average edges per graph: {avg_edges_per_graph:.2f}")
    print(f"Average degree: {avg_degree:.2f}")

analyze_graph_properties(train_dataset_knn10, "KNNGraph (k=10) Training Set")

# --- Experiment 1.2: KNNGraph (k=20, more dense) ---
print("\n--- Experiment 1.2: KNNGraph (k=20) ---")
train_dataset_knn20, test_dataset_knn20, _, _ = load_and_preprocess_airfrans(graph_transform_type='knn', k=20, data_root=DATA_ROOT)
analyze_graph_properties(train_dataset_knn20, "KNNGraph (k=20) Training Set")

# --- Experiment 1.3: RadiusGraph (r=0.01 - try a smaller radius first) ---
# Note: 'r' depends heavily on the scale of your 'pos' data. AirfRANS pos coordinates are often between 0 and 1.
# A very small 'r' might result in disconnected graphs.
print("\n--- Experiment 1.3: RadiusGraph (r=0.01) ---")
train_dataset_radius0_01, test_dataset_radius0_01, _, _ = load_and_preprocess_airfrans(graph_transform_type='radius', r=0.01, data_root=DATA_ROOT)
analyze_graph_properties(train_dataset_radius0_01, "RadiusGraph (r=0.01) Training Set")

# --- Experiment 1.4: RadiusGraph (r=0.05 - potentially more edges) ---
print("\n--- Experiment 1.4: RadiusGraph (r=0.05) ---")
train_dataset_radius0_05, test_dataset_radius0_05, _, _ = load_and_preprocess_airfrans(graph_transform_type='radius', r=0.05, data_root=DATA_ROOT)
analyze_graph_properties(train_dataset_radius0_05, "RadiusGraph (r=0.05) Training Set")


# --- Run a quick training for each graph type for comparison ---
print("\n--- Running quick training for graph type comparison ---")
epochs_compare = 2 # Very short run for initial comparison
hidden_channels_compare = 32
dropout_rate_compare = 0.3
learning_rate_compare = 0.001
batch_size_compare = 1 # Keep batch size small for these large graphs

criterion = torch.nn.MSELoss()

def run_comparison_train(train_ds, test_ds, name):
    train_loader = DataLoader(train_ds[:5], batch_size=batch_size_compare, shuffle=True) # Subset for quickness
    test_loader = DataLoader(test_ds[:2], batch_size=batch_size_compare, shuffle=False) # Subset for quickness

    model = GCNBaseline(train_ds.num_node_features, hidden_channels_compare, train_ds[0].y.shape[1],
                        num_layers=3, dropout_rate=dropout_rate_compare).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate_compare)

    print(f"\n--- Training with {name} ---")
    for epoch in range(1, epochs_compare + 1):
        train_loss = train_model(model, train_loader, optimizer, criterion, device)
        test_loss = test_model(model, test_loader, criterion, device)
        print(f'  Epoch: {epoch:03d}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

run_comparison_train(train_dataset_knn10, test_dataset_knn10, "KNNGraph (k=10)")
run_comparison_train(train_dataset_knn20, test_dataset_knn20, "KNNGraph (k=20)")
run_comparison_train(train_dataset_radius0_01, test_dataset_radius0_01, "RadiusGraph (r=0.01)")
run_comparison_train(train_dataset_radius0_05, test_dataset_radius0_05, "RadiusGraph (r=0.05)")

print("\n Graph Construction Comparison complete. Analyze the graph properties and initial training losses to inform your choice for further refinement.")

Using device: cpu

--- Experiment 1.1: KNNGraph (k=10) ---
Loading AirfRANS dataset with knn graph transform...
Using KNNGraph with k=10


Downloading https://data.isir.upmc.fr/extrality/pytorch_geometric/AirfRANS.zip
Extracting data\AirfRANS\raw\AirfRANS.zip
Processing...
Done!


Fitting scalers incrementally using partial_fit()...
  Processed 100/800 samples for scaler fitting.
  Processed 200/800 samples for scaler fitting.
  Processed 300/800 samples for scaler fitting.
  Processed 400/800 samples for scaler fitting.
  Processed 500/800 samples for scaler fitting.
  Processed 600/800 samples for scaler fitting.
  Processed 700/800 samples for scaler fitting.
  Processed 800/800 samples for scaler fitting.


Processing...
Done!


Loaded 800 training samples after preprocessing.
Loaded 200 test samples after preprocessing.
--- KNNGraph (k=10) Training Set Graph Properties ---
Total graphs: 800
Average nodes per graph: 179908.76
Average edges per graph: 1799087.62
Average degree: 10.00

--- Experiment 1.2: KNNGraph (k=20) ---
Loading AirfRANS dataset with knn graph transform...
Using KNNGraph with k=20
Fitting scalers incrementally using partial_fit()...
  Processed 100/800 samples for scaler fitting.
  Processed 200/800 samples for scaler fitting.
  Processed 300/800 samples for scaler fitting.
  Processed 400/800 samples for scaler fitting.
  Processed 500/800 samples for scaler fitting.
  Processed 600/800 samples for scaler fitting.
  Processed 700/800 samples for scaler fitting.
  Processed 800/800 samples for scaler fitting.
Loaded 800 training samples after preprocessing.
Loaded 200 test samples after preprocessing.
--- KNNGraph (k=20) Training Set Graph Properties ---
Total graphs: 800
Average nodes per g

KeyboardInterrupt: 

In [None]:
kkk