In [2]:
# 📘 Notebook: 03_train_model.ipynb
# Entrenamiento de un modelo GNN con PyTorch Geometric

import torch
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.loader import DataLoader
import torch.nn.functional as F
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Cargar los grafos guardados previamente
data_list = torch.load("data/gnn_graphs.pt")

# Dividir dataset en train/test
split = int(0.8 * len(data_list))
train_data = data_list[:split]
test_data = data_list[split:]

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# Definir la arquitectura del modelo
class GNNModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(1, 32)
        self.conv2 = GCNConv(32, 64)
        self.lin = torch.nn.Linear(64, 1)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = global_mean_pool(x, batch)
        return self.lin(x)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GNNModel().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Entrenamiento del modelo
def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.mse_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

# Evaluación del modelo
def evaluate(loader):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            out = model(data).view(-1).cpu().numpy()
            y = data.y.view(-1).cpu().numpy()
            y_true.extend(y)
            y_pred.extend(out)
    return y_true, y_pred

# Entrenar por 20 epochs
losses = []
for epoch in range(1, 21):
    loss = train()
    losses.append(loss)
    print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Evaluación final
y_true, y_pred = evaluate(test_loader)
mae = mean_absolute_error(y_true, y_pred)
rmse = mean_squared_error(y_true, y_pred, squared=False)

print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")

# Gráfico de pérdida
plt.plot(losses)
plt.title("Training Loss over Epochs")
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.grid(True)
plt.show()

# Gráfico de predicciones vs reales
plt.scatter(y_true, y_pred, alpha=0.6)
plt.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)], color='red', linestyle='--')
plt.title("Predicted vs Actual Formation Energy")
plt.xlabel("Actual")
plt.ylabel("Predicted")
plt.grid(True)
plt.show()


plt.savefig("docs/training_loss.png")
plt.savefig("docs/pred_vs_actual.png")

FileNotFoundError: [Errno 2] No such file or directory: 'data/gnn_graphs.pt'