In [1]:
from ogb.lsc.pcqm4mv2_pyg import PygPCQM4Mv2Dataset
from ogb.utils import smiles2graph

pyg_dataset = PygPCQM4Mv2Dataset(root="C://Users//phd//Fernando//ОИЯИ//Model", smiles2graph=smiles2graph)

  if osp.exists(f) and torch.load(f) != _repr(self.pre_transform):
  if osp.exists(f) and torch.load(f) != _repr(self.pre_filter):
  self.data, self.slices = torch.load(self.processed_paths[0])


In [2]:
from torch_geometric.loader import DataLoader

split_dict = pyg_dataset.get_idx_split()
train_idx = split_dict['train']
valid_idx = split_dict['valid']
#test_idx = split_dict['test-dev']

train_loader = DataLoader(pyg_dataset[train_idx], batch_size=128, shuffle=True, num_workers=4, pin_memory=True)
valid_loader = DataLoader(pyg_dataset[valid_idx], batch_size=128, shuffle=False, num_workers=4, pin_memory=True)
#test_loader = DataLoader(pyg_dataset[test_idx], batch_size=64, shuffle=False, num_workers=4, pin_memory=True)

for batch_idx, batch in enumerate(train_loader):  # Iterar sobre los batches
    print(f"[Dataloader] Tipo de batch {batch_idx}: {type(batch)}")  # Ver el tipo
    print(batch)  # Opcional: Ver el contenido del batch
    break  # Solo imprimimos el primer batch para no llenar la consola

  split_dict = replace_numpy_with_torchtensor(torch.load(osp.join(self.root, 'split_dict.pt')))


[Dataloader] Tipo de batch 0: <class 'torch_geometric.data.batch.DataBatch'>
DataBatch(edge_index=[2, 3678], edge_attr=[3678, 3], x=[1788, 9], y=[128], batch=[1788], ptr=[129])


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GATv2Conv, global_add_pool, global_mean_pool
from torch.nn import Linear, Sequential
from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder

class GATv2_Molecular(torch.nn.Module):
    def __init__(self, node_dim=9, edge_dim=3, hidden_dim=256, heads=8,
                 n_layers=4, dropout_rate=0.4, use_intermediate_states=True):
        super(GATv2_Molecular, self).__init__()
        
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        self.use_intermediate_states = use_intermediate_states
        
        # Embedding inicial
        self.node_encoder = AtomEncoder(hidden_dim)
        self.edge_encoder = BondEncoder(hidden_dim)
        
        # Capas GATv2
        self.conv_layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()
        self.layer_norms = nn.ModuleList()
        
        # Primera capa GATv2
        self.conv_layers.append(
            GATv2Conv(
                in_channels=hidden_dim,
                out_channels=hidden_dim // heads,
                heads=heads,
                edge_dim=hidden_dim,
                dropout=dropout_rate,
                concat=True
            )
        )
        
        # Capas intermedias
        for _ in range(n_layers - 1):
            self.conv_layers.append(
                GATv2Conv(
                    in_channels=hidden_dim,
                    out_channels=hidden_dim // heads,
                    heads=heads,
                    edge_dim=hidden_dim,
                    dropout=dropout_rate,
                    concat=True
                )
            )
            
        # Normalización para cada capa
        for _ in range(n_layers):
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))
            self.layer_norms.append(nn.LayerNorm(hidden_dim))
        
        # Red más profunda para predicción final
        pred_input_dim = hidden_dim * 2  # Para mean y sum pooling
        if use_intermediate_states:
            pred_input_dim += hidden_dim  # Para estados intermedios agregados
            
        self.prediction_network = Sequential(
            Linear(pred_input_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.ELU(),
            nn.Dropout(dropout_rate),
            Linear(hidden_dim, hidden_dim // 2),
            nn.LayerNorm(hidden_dim // 2),
            nn.ELU(),
            nn.Dropout(dropout_rate),
            Linear(hidden_dim // 2, 1)
        )
        
        self.dropout = nn.Dropout(dropout_rate)
        
    def _aggregate_intermediate_states(self, states, batch):
        """Agrega estados intermedios usando atención"""
        # Convertir lista de estados a tensor
        states_tensor = torch.stack([global_mean_pool(state, batch) for state in states])
        
        # Calcular atención sobre estados
        weights = F.softmax(torch.mean(states_tensor, dim=2), dim=0)
        
        # Aplicar atención y agregar estados
        weighted_states = torch.sum(states_tensor * weights.unsqueeze(-1), dim=0)
        return weighted_states
        
    def forward(self, data, return_attention=False):
        x, edge_index, edge_attr, batch = data.x, data.edge_index, data.edge_attr, data.batch
        
        # Embeddings iniciales
        x = self.node_encoder(x)
        edge_attr = self.edge_encoder(edge_attr)
        
        # Lista para estados intermedios
        intermediate_states = []
        attention_weights = [] if return_attention else None
        
        # Propagación a través de capas GATv2
        for i in range(self.n_layers):
            x_residual = x
            
            # Aplicar capa GATv2
            if return_attention:
                x, attention = self.conv_layers[i](x, edge_index, edge_attr, 
                                                 return_attention_weights=True)
                attention_weights.append(attention)
            else:
                x = self.conv_layers[i](x, edge_index, edge_attr)
                
            x = self.batch_norms[i](x)
            x = self.layer_norms[i](x)
            x = F.elu(x)
            x = self.dropout(x)
            
            # Conexión residual
            x = x + x_residual
            
            # Guardar estado intermedio
            intermediate_states.append(x)
        
        # Agregación global combinando mean y sum pooling
        x_mean = global_mean_pool(x, batch)
        x_sum = global_add_pool(x, batch)
        
        # Combinar features
        features = [x_mean, x_sum]
        
        # Agregar estados intermedios si está activado
        if self.use_intermediate_states:
            x_intermediate = self._aggregate_intermediate_states(intermediate_states, batch)
            features.append(x_intermediate)
        
        # Concatenar todas las features
        x = torch.cat(features, dim=1)
        
        # Predicción final usando la red más profunda
        x = self.prediction_network(x)
        
        if return_attention:
            return x.squeeze(), attention_weights
        return x.squeeze()
    
    def get_attention_weights(self, data):
        """
        Calcula los pesos de atención solo cuando es necesario.
        Retorna una tupla (predicción, pesos de atención)
        """
        return self.forward(data, return_attention=True)

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GATv2_Molecular(
    node_dim=9,  # Dimensión de características de nodos
    edge_dim=3,  # Dimensión de características de aristas
    hidden_dim=256,  # Dimensión oculta
    heads=8,  # Número de cabezas de atención
    n_layers=4,  # Número de capas
    dropout_rate=0.4  # Tasa de dropout
).to(device)

In [5]:
import torch
import torch.nn as nn
from torch.optim import AdamW
from tqdm import tqdm
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

def initialize_training(model, learning_rate=0.001):
    """
    Inicializa el criterio, optimizador y scheduler para el entrenamiento
    """
    criterion = nn.MSELoss()
    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=0.7,
        patience=3,
        min_lr=1e-4,
    )
    return criterion, optimizer, scheduler

def train_regression(model, data_loader, criterion, optimizer, device):
    """
    Función de entrenamiento por época
    """
    model.train()
    total_loss = 0
    total_relative_error = 0
    predictions, actual = [], []
    
    pbar = tqdm(data_loader, desc='Training')
    for batch in pbar:
        batch = batch.to(device)
        optimizer.zero_grad(set_to_none=True)
        
        # Forward pass - GATGNN espera un objeto data directamente
        output = model(batch)
        
        # Asegurar dimensiones correctas
        y_true = batch.y.float().view(-1, 1)
        
        # Compute loss
        loss = criterion(output.view(-1, 1), y_true)
        total_loss += loss.item()
        
        # Calcular error relativo porcentual
        pred_np = output.detach().cpu().numpy()
        true_np = y_true.cpu().numpy()
        
        # Guardar predicciones para métricas
        predictions.extend(pred_np)
        actual.extend(true_np)
        
        # Calcular error relativo
        errors = pred_np - true_np
        rms_error = np.sqrt(np.mean(np.square(errors)))
        mean_y = np.mean(np.abs(true_np))
        relative_error_percent = (rms_error / mean_y) * 100 if mean_y != 0 else 0
        total_relative_error += relative_error_percent
        
        # Backward pass con gradient clipping
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        # Actualizar barra de progreso
        pbar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'rel_error': f'{relative_error_percent:.2f}%'
        })
    
    # Calcular métricas finales
    avg_loss = total_loss / len(data_loader)
    avg_relative_error = total_relative_error / len(data_loader)
    r2 = r2_score(actual, predictions)
    rmse = np.sqrt(mean_squared_error(actual, predictions))
    
    metrics = {
        'loss': avg_loss,
        'relative_error': avg_relative_error,
        'r2': r2,
        'rmse': rmse
    }
    
    return metrics

def evaluate_regression(model, data_loader, criterion, device):
    """
    Función de evaluación
    """
    model.eval()
    total_loss = 0
    total_relative_error = 0
    predictions, actual = [], []
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc='Evaluating'):
            batch = batch.to(device)
            
            # Forward pass
            output = model(batch)
            y_true = batch.y.float().view(-1, 1)
            
            # Compute loss
            loss = criterion(output.view(-1, 1), y_true)
            total_loss += loss.item()
            
            # Guardar predicciones
            pred_np = output.cpu().numpy()
            true_np = y_true.cpu().numpy()
            predictions.extend(pred_np)
            actual.extend(true_np)
            
            # Calcular error relativo
            errors = pred_np - true_np
            rms_error = np.sqrt(np.mean(np.square(errors)))
            mean_y = np.mean(np.abs(true_np))
            relative_error_percent = (rms_error / mean_y) * 100 if mean_y != 0 else 0
            total_relative_error += relative_error_percent
    
    # Calcular métricas finales
    avg_loss = total_loss / len(data_loader)
    avg_relative_error = total_relative_error / len(data_loader)
    r2 = r2_score(actual, predictions)
    rmse = np.sqrt(mean_squared_error(actual, predictions))
    
    metrics = {
        'loss': avg_loss,
        'relative_error': avg_relative_error,
        'r2': r2,
        'rmse': rmse,
        'predictions': predictions,
        'actual': actual
    }
    
    return metrics

In [6]:
def train_model(model, train_loader, valid_loader, device, 
                num_epochs=10, early_stopping_patience=5,
                learning_rate=0.001):
    """
    Función principal de entrenamiento
    """
    # Inicializar componentes de entrenamiento
    criterion, optimizer, scheduler = initialize_training(model, learning_rate)
    
    # Inicializar seguimiento de métricas
    best_val_loss = float('inf')
    no_improve = 0
    history = {
        'train_loss': [], 'train_relative_error': [], 'train_r2': [], 'train_rmse': [],
        'val_loss': [], 'val_relative_error': [], 'val_r2': [], 'val_rmse': []
    }
    
    for epoch in range(num_epochs):
        print(f'\nEpoch {epoch+1}/{num_epochs}')
        
        # Training phase
        train_metrics = train_regression(model, train_loader, criterion, optimizer, device)
        
        # Validation phase
        val_metrics = evaluate_regression(model, valid_loader, criterion, device)
        
        # Actualizar history
        for key in ['loss', 'relative_error', 'r2', 'rmse']:
            history[f'train_{key}'].append(train_metrics[key])
            history[f'val_{key}'].append(val_metrics[key])
        
        # Print metrics
        print(f"Train - Loss: {train_metrics['loss']:.4f}, Rel Error: {train_metrics['relative_error']:.2f}%, R2: {train_metrics['r2']:.4f}, RMSE: {train_metrics['rmse']:.4f}")
        print(f"Valid - Loss: {val_metrics['loss']:.4f}, Rel Error: {val_metrics['relative_error']:.2f}%, R2: {val_metrics['r2']:.4f}, RMSE: {val_metrics['rmse']:.4f}")
        
        # Learning rate scheduling
        scheduler.step(val_metrics['loss'])
        
        # Early stopping check
        if val_metrics['loss'] < best_val_loss:
            best_val_loss = val_metrics['loss']
            no_improve = 0
            # Save best model
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': best_val_loss,
                'metrics': val_metrics
            }, 'best_model.pt')
            print("Model improved and saved!")
        else:
            no_improve += 1
            if no_improve >= early_stopping_patience:
                print(f'Early stopping triggered after {epoch+1} epochs')
                break
    
    return history

In [7]:
def train_gatgnn(model, train_loader, valid_loader, device, **kwargs):
    print("Starting GATGNN training...")
    history = train_model(
        model=model,
        train_loader=train_loader,
        valid_loader=valid_loader,
        device=device,
        **kwargs
    )
    return history

history = train_gatgnn(
    model=model,
    train_loader=train_loader,
    valid_loader=valid_loader,
    device=device,
    num_epochs=10,
    early_stopping_patience=5,
    learning_rate=0.001
)

Starting GATGNN training...

Epoch 1/50


Training: 100%|█████████████| 26396/26396 [25:30<00:00, 17.25it/s, loss=0.3129, rel_error=31.53%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:41<00:00, 14.02it/s]


Train - Loss: 0.2354, Rel Error: 28.03%, R2: 0.8257, RMSE: 0.4852
Valid - Loss: 0.1539, Rel Error: 29.33%, R2: 0.9018, RMSE: 0.3918
Model improved and saved!

Epoch 2/50


Training: 100%|█████████████| 26396/26396 [25:40<00:00, 17.14it/s, loss=0.1547, rel_error=30.87%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:41<00:00, 13.91it/s]


Train - Loss: 0.1329, Rel Error: 28.04%, R2: 0.9016, RMSE: 0.3646
Valid - Loss: 0.1322, Rel Error: 29.15%, R2: 0.9158, RMSE: 0.3628
Model improved and saved!

Epoch 3/50


Training: 100%|█████████████| 26396/26396 [25:23<00:00, 17.32it/s, loss=0.0936, rel_error=24.30%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:41<00:00, 13.92it/s]


Train - Loss: 0.1198, Rel Error: 28.10%, R2: 0.9113, RMSE: 0.3461
Valid - Loss: 0.1360, Rel Error: 28.88%, R2: 0.9133, RMSE: 0.3681

Epoch 4/50


Training: 100%|█████████████| 26396/26396 [25:47<00:00, 17.06it/s, loss=0.0731, rel_error=29.97%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:41<00:00, 13.91it/s]


Train - Loss: 0.1133, Rel Error: 28.13%, R2: 0.9161, RMSE: 0.3367
Valid - Loss: 0.1185, Rel Error: 29.95%, R2: 0.9245, RMSE: 0.3434
Model improved and saved!

Epoch 5/50


Training: 100%|█████████████| 26396/26396 [26:05<00:00, 16.86it/s, loss=0.1094, rel_error=22.39%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:41<00:00, 13.73it/s]


Train - Loss: 0.1091, Rel Error: 28.15%, R2: 0.9192, RMSE: 0.3304
Valid - Loss: 0.1259, Rel Error: 29.76%, R2: 0.9198, RMSE: 0.3541

Epoch 6/50


Training: 100%|█████████████| 26396/26396 [26:17<00:00, 16.73it/s, loss=0.0667, rel_error=25.05%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:41<00:00, 13.79it/s]


Train - Loss: 0.1063, Rel Error: 28.16%, R2: 0.9213, RMSE: 0.3260
Valid - Loss: 0.1092, Rel Error: 29.52%, R2: 0.9305, RMSE: 0.3296
Model improved and saved!

Epoch 7/50


Training: 100%|█████████████| 26396/26396 [26:19<00:00, 16.71it/s, loss=0.1386, rel_error=31.35%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:43<00:00, 13.28it/s]


Train - Loss: 0.1043, Rel Error: 28.17%, R2: 0.9228, RMSE: 0.3229
Valid - Loss: 0.1081, Rel Error: 29.31%, R2: 0.9312, RMSE: 0.3280
Model improved and saved!

Epoch 8/50


Training: 100%|█████████████| 26396/26396 [26:37<00:00, 16.52it/s, loss=0.1799, rel_error=29.79%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:43<00:00, 13.09it/s]


Train - Loss: 0.1028, Rel Error: 28.18%, R2: 0.9239, RMSE: 0.3206
Valid - Loss: 0.1102, Rel Error: 29.63%, R2: 0.9298, RMSE: 0.3311

Epoch 9/50


Training: 100%|█████████████| 26396/26396 [26:07<00:00, 16.84it/s, loss=0.0713, rel_error=25.42%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:43<00:00, 13.27it/s]


Train - Loss: 0.1014, Rel Error: 28.18%, R2: 0.9249, RMSE: 0.3185
Valid - Loss: 0.0992, Rel Error: 29.39%, R2: 0.9368, RMSE: 0.3141
Model improved and saved!

Epoch 10/50


Training: 100%|█████████████| 26396/26396 [26:08<00:00, 16.83it/s, loss=0.0599, rel_error=27.54%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:43<00:00, 13.22it/s]


Train - Loss: 0.1005, Rel Error: 28.19%, R2: 0.9256, RMSE: 0.3170
Valid - Loss: 0.1034, Rel Error: 29.36%, R2: 0.9341, RMSE: 0.3208

Epoch 11/50


Training: 100%|█████████████| 26396/26396 [26:07<00:00, 16.84it/s, loss=0.0712, rel_error=27.47%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:43<00:00, 13.22it/s]


Train - Loss: 0.0998, Rel Error: 28.19%, R2: 0.9261, RMSE: 0.3159
Valid - Loss: 0.1091, Rel Error: 29.62%, R2: 0.9305, RMSE: 0.3296

Epoch 12/50


Training: 100%|█████████████| 26396/26396 [26:13<00:00, 16.77it/s, loss=0.0897, rel_error=24.48%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:43<00:00, 13.11it/s]


Train - Loss: 0.0991, Rel Error: 28.20%, R2: 0.9266, RMSE: 0.3148
Valid - Loss: 0.1002, Rel Error: 29.30%, R2: 0.9362, RMSE: 0.3158

Epoch 13/50


Training: 100%|█████████████| 26396/26396 [26:10<00:00, 16.81it/s, loss=0.1445, rel_error=25.94%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:43<00:00, 13.17it/s]


Train - Loss: 0.0987, Rel Error: 28.20%, R2: 0.9269, RMSE: 0.3142
Valid - Loss: 0.1048, Rel Error: 29.27%, R2: 0.9332, RMSE: 0.3230

Epoch 14/50


Training: 100%|█████████████| 26396/26396 [25:48<00:00, 17.04it/s, loss=0.1009, rel_error=25.88%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:43<00:00, 13.37it/s]


Train - Loss: 0.0952, Rel Error: 28.21%, R2: 0.9295, RMSE: 0.3085
Valid - Loss: 0.0968, Rel Error: 29.56%, R2: 0.9383, RMSE: 0.3104
Model improved and saved!

Epoch 15/50


Training: 100%|█████████████| 26396/26396 [25:45<00:00, 17.08it/s, loss=0.0612, rel_error=23.24%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:43<00:00, 13.20it/s]


Train - Loss: 0.0938, Rel Error: 28.22%, R2: 0.9305, RMSE: 0.3063
Valid - Loss: 0.0932, Rel Error: 29.57%, R2: 0.9406, RMSE: 0.3046
Model improved and saved!

Epoch 16/50


Training: 100%|█████████████| 26396/26396 [25:58<00:00, 16.94it/s, loss=0.0760, rel_error=24.98%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:45<00:00, 12.67it/s]


Train - Loss: 0.0904, Rel Error: 28.24%, R2: 0.9331, RMSE: 0.3006
Valid - Loss: 0.0886, Rel Error: 29.46%, R2: 0.9436, RMSE: 0.2968

Epoch 24/50


Training: 100%|█████████████| 26396/26396 [25:56<00:00, 16.96it/s, loss=0.0906, rel_error=26.37%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:43<00:00, 13.28it/s]


Train - Loss: 0.0900, Rel Error: 28.24%, R2: 0.9334, RMSE: 0.3000
Valid - Loss: 0.0892, Rel Error: 29.93%, R2: 0.9432, RMSE: 0.2979

Epoch 25/50


Training: 100%|█████████████| 26396/26396 [25:58<00:00, 16.93it/s, loss=0.0731, rel_error=27.77%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:41<00:00, 13.71it/s]


Train - Loss: 0.0894, Rel Error: 28.24%, R2: 0.9338, RMSE: 0.2990
Valid - Loss: 0.0878, Rel Error: 29.75%, R2: 0.9442, RMSE: 0.2954

Epoch 28/50


Training: 100%|█████████████| 26396/26396 [25:51<00:00, 17.01it/s, loss=0.0762, rel_error=28.05%]
Evaluating: 100%|██████████████████████████████████████████████| 575/575 [00:43<00:00, 13.24it/s]

Train - Loss: 0.0870, Rel Error: 28.25%, R2: 0.9355, RMSE: 0.2950
Valid - Loss: 0.0847, Rel Error: 29.65%, R2: 0.9461, RMSE: 0.2902
Early stopping triggered after 30 epochs





In [None]:
# Asumiendo que tienes tus datos en variables
y_true = ... # Tus valores reales del gap HOMO-LUMO
y_pred = ... # Tus predicciones del modelo

# Crear la visualización
fig = plot_prediction_analysis(y_true, y_pred, save_path='predicciones_homo_lumo.png')
plt.show()