In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import kneighbors_graph
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# -----------------------
# Load & preprocess data
# -----------------------
dataset_path = "../../data/data.csv"
data = pd.read_csv(dataset_path)

# Fill missing numeric values with mean
num_cols = data.select_dtypes(include=[np.number]).columns
data[num_cols] = data[num_cols].fillna(data[num_cols].mean())

# Target & features
target_column = "Sint"
drop_cols = ["ID", "Unnamed: 0", "e_Sint", target_column]
feature_columns = [c for c in data.columns if c not in drop_cols]

X = data[feature_columns].values
y = data[target_column].values

# Scale
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

# Train-test split
X_train_all, X_test_all, y_train, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.2, random_state=42
)

# -----------------------
# Graph construction
# -----------------------
k = 5
adj_matrix = kneighbors_graph(
    X_train_all, n_neighbors=k, mode="connectivity", include_self=False
)
edge_index = np.vstack(adj_matrix.nonzero())
edge_index = torch.LongTensor(edge_index)

# PyTorch Geometric Data
X_tensor = torch.FloatTensor(X_train_all)
y_tensor = torch.FloatTensor(y_train)
data_graph = Data(x=X_tensor, edge_index=edge_index, y=y_tensor)


In [8]:
# -----------------------
# GNN Model
# -----------------------
class GCNRegressor(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels=1):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        return x

def train_gnn(data, epochs=20, lr=0.01, hidden_channels=64):
    model = GCNRegressor(
        in_channels=data.x.shape[1],
        hidden_channels=hidden_channels,
        out_channels=1
    )
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index).squeeze()
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")
    return model

# -----------------------
# Train
# -----------------------
model = train_gnn(data_graph, epochs=60, lr=0.01, hidden_channels=64)


Epoch 1/60, Loss: 2.3375
Epoch 2/60, Loss: 1.4358
Epoch 3/60, Loss: 0.9487
Epoch 4/60, Loss: 0.7871
Epoch 5/60, Loss: 0.7929
Epoch 6/60, Loss: 0.7974
Epoch 7/60, Loss: 0.7424
Epoch 8/60, Loss: 0.6517
Epoch 9/60, Loss: 0.5637
Epoch 10/60, Loss: 0.5004
Epoch 11/60, Loss: 0.4667
Epoch 12/60, Loss: 0.4577
Epoch 13/60, Loss: 0.4642
Epoch 14/60, Loss: 0.4764
Epoch 15/60, Loss: 0.4873
Epoch 16/60, Loss: 0.4923
Epoch 17/60, Loss: 0.4895
Epoch 18/60, Loss: 0.4795
Epoch 19/60, Loss: 0.4647
Epoch 20/60, Loss: 0.4489
Epoch 21/60, Loss: 0.4350
Epoch 22/60, Loss: 0.4250
Epoch 23/60, Loss: 0.4194
Epoch 24/60, Loss: 0.4173
Epoch 25/60, Loss: 0.4174
Epoch 26/60, Loss: 0.4179
Epoch 27/60, Loss: 0.4177
Epoch 28/60, Loss: 0.4167
Epoch 29/60, Loss: 0.4147
Epoch 30/60, Loss: 0.4123
Epoch 31/60, Loss: 0.4098
Epoch 32/60, Loss: 0.4077
Epoch 33/60, Loss: 0.4061
Epoch 34/60, Loss: 0.4049
Epoch 35/60, Loss: 0.4042
Epoch 36/60, Loss: 0.4037
Epoch 37/60, Loss: 0.4033
Epoch 38/60, Loss: 0.4026
Epoch 39/60, Loss: 0.

In [9]:
# -----------------------
# Predict
# -----------------------
def predict_gnn(model, X_test, k=5):
    model.eval()
    X_test_tensor = torch.FloatTensor(X_test)

    # Build adjacency for test set
    test_adj = kneighbors_graph(X_test, n_neighbors=k, mode="connectivity", include_self=False)
    test_edge_index = np.vstack(test_adj.nonzero())
    test_edge_index = torch.LongTensor(test_edge_index)

    test_data = Data(x=X_test_tensor, edge_index=test_edge_index)

    with torch.no_grad():
        preds = model(test_data.x, test_data.edge_index).squeeze().numpy()
    return preds

gnn_predictions = predict_gnn(model, X_test_all, k=5)

# -----------------------
# Evaluation
# -----------------------
mse = mean_squared_error(y_test, gnn_predictions)
mae = mean_absolute_error(y_test, gnn_predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, gnn_predictions)

alpha = np.max(y_scaled) - np.min(y_scaled)
mae_acc  = (1 - mae / alpha) * 100
rmse_acc = (1 - rmse / alpha) * 100
mse_acc  = (1 - mse / alpha) * 100

print("\n--- Evaluation Metrics (Test) ---")
print(f"MSE  : {mse:.4f}")
print(f"MAE  : {mae:.4f}, Accuracy: {mae_acc:.2f}%")
print(f"RMSE : {rmse:.4f}, Accuracy: {rmse_acc:.2f}%")
print(f"R^2  : {r2:.4f}")
print(f"MSE Accuracy : {mse_acc:.2f}%")


--- Evaluation Metrics (Test) ---
MSE  : 0.6800
MAE  : 0.1660, Accuracy: 99.64%
RMSE : 0.8246, Accuracy: 98.21%
R^2  : 0.3495
MSE Accuracy : 98.53%
