In [1]:
import torch
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GATConv
import torch.nn.functional as F
import os
from tqdm import tqdm

In [3]:
import torch
import random

def embaralhar_grafos(input_path, output_path):
    # Carrega o arquivo .pt
    data = torch.load(input_path)
    grafos = data['grafos']
    vetores = data['vetores']

    # Embaralha os grafos e vetores de forma consistente
    combined = list(zip(grafos, vetores))
    random.shuffle(combined)
    grafos, vetores = zip(*combined)

    # Salva o arquivo embaralhado
    torch.save({'grafos': list(grafos), 'vetores': list(vetores)}, output_path)
    print(f"Arquivo embaralhado salvo em: {output_path}")

# Exemplo de uso
input_path = 'af.pt'  # Caminho do arquivo original
output_path = 'af_embaralhado.pt'  # Caminho do arquivo embaralhado
embaralhar_grafos(input_path, output_path)


  data = torch.load(input_path)


Arquivo embaralhado salvo em: af_embaralhado.pt


In [9]:
import torch
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
import os
from tqdm import tqdm


# Função para carregar os grafos salvos
def carregar_grafos_visibilidade(file_path):
    data = torch.load(file_path)
    grafos = data['grafos']
    dataset = []
    for grafo in grafos:
        src = grafo['src']
        dst = grafo['dst']
        edge_index = torch.stack([src, dst], dim=0)  # Constrói o edge_index
        num_nodes = max(torch.max(src), torch.max(dst)) + 1  # Calcula o número de nós
        x = torch.rand((num_nodes, 1))  # Inicializa features dos nós aleatoriamente
        dataset.append(Data(x=x, edge_index=edge_index))
    return dataset


# Modelo GCN-AE
class GCNAutoencoder(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCNAutoencoder, self).__init__()
        self.encoder1 = GCNConv(in_channels, hidden_channels)
        self.encoder2 = GCNConv(hidden_channels, out_channels)
        self.decoder = torch.nn.Linear(out_channels * 2, 1)  # Para reconstruir arestas

    def forward(self, x, edge_index):
        # Encoder: aplica duas camadas GCN
        z = F.relu(self.encoder1(x, edge_index))
        z = self.encoder2(z, edge_index)
        
        # Decoder: reconstrói a matriz de adjacência
        row, col = edge_index
        edge_features = torch.cat([z[row], z[col]], dim=1)  # Concatena embeddings das arestas
        adj_reconstructed = self.decoder(edge_features).squeeze()
        return z, adj_reconstructed


# Função de perda
def loss_function(reconstructed, edge_index, num_nodes):
    true_adj = torch.zeros((num_nodes, num_nodes), device=reconstructed.device)
    true_adj[edge_index[0], edge_index[1]] = 1
    pred_adj = torch.sigmoid(reconstructed)
    return F.binary_cross_entropy(pred_adj, true_adj[edge_index[0], edge_index[1]])


# Avaliar métricas do autoencoder
def calcular_metricas(reconstructed, edge_index, embeddings):
    pred_adj = torch.sigmoid(reconstructed)
    predicted_edges = pred_adj > 0.5
    true_edges = torch.ones_like(predicted_edges)

    precision = (predicted_edges == true_edges).float().mean().item()
    feature_variance = embeddings.var(dim=0).mean().item()
    return precision, feature_variance


# Treinar o modelo GCN-AE
def treinar_gcn_ae(train_loader, val_loader, in_channels=1, hidden_channels=64, out_channels=16, epochs=20, lr=0.001, save_path="model/gcn_ae_model.pth"):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = GCNAutoencoder(in_channels, hidden_channels, out_channels).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    print(f"Iniciando o treinamento por {epochs} épocas.")
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        precisions, variances = [], []

        for data in tqdm(train_loader, desc=f"Treinando Epoch {epoch+1}/{epochs}"):
            data = data.to(device)
            optimizer.zero_grad()
            z, reconstructed_adj = model(data.x, data.edge_index)
            loss = loss_function(reconstructed_adj, data.edge_index, data.num_nodes)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            precision, variance = calcular_metricas(reconstructed_adj, data.edge_index, z)
            precisions.append(precision)
            variances.append(variance)

        print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss:.4f}, Precision: {sum(precisions) / len(precisions):.4f}, Variance: {sum(variances) / len(variances):.4f}")

        # Validação
        model.eval()
        val_loss, val_precisions, val_variances = 0, [], []
        with torch.no_grad():
            for data in val_loader:
                data = data.to(device)
                z, reconstructed_adj = model(data.x, data.edge_index)
                loss = loss_function(reconstructed_adj, data.edge_index, data.num_nodes)
                val_loss += loss.item()
                precision, variance = calcular_metricas(reconstructed_adj, data.edge_index, z)
                val_precisions.append(precision)
                val_variances.append(variance)

        print(f"Validation - Loss: {val_loss:.4f}, Precision: {sum(val_precisions) / len(val_precisions):.4f}, Variance: {sum(val_variances) / len(val_variances):.4f}")

    # Salvar o modelo
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    torch.save(model.state_dict(), save_path)
    print(f"Modelo salvo em: {save_path}")
    return model


# Main script
if __name__ == "__main__":
    #file_path = '/scratch/arturxavier/Clustering-Paper/Grafo/af.pt'
    file_path = '/scratch/guilherme.evangelista/Clustering-Paper/Grafo/af_embaralhado.pt'

    # Carregar os grafos
    print("Carregando grafos...")
    dataset = carregar_grafos_visibilidade(file_path)
    print(f"Total de grafos carregados: {len(dataset)}")

    # Dividir em treino e validação
    train_dataset = dataset[:int(len(dataset) * 0.8)]
    val_dataset = dataset[int(len(dataset) * 0.8):]
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Treinar o autoencoder
    print("Treinando o autoencoder...")
    modelo_treinado = treinar_gcn_ae(train_loader, val_loader, in_channels=1, hidden_channels=64, out_channels=16, epochs=20, lr=0.001)


Carregando grafos...


  data = torch.load(file_path)


Total de grafos carregados: 10261
Treinando o autoencoder...
Iniciando o treinamento por 20 épocas.


Treinando Epoch 1/20: 100%|██████████| 257/257 [00:03<00:00, 81.41it/s]


Epoch 1/20 - Loss: 30.4011, Precision: 0.9707, Variance: 0.2194
Validation - Loss: 0.2448, Precision: 1.0000, Variance: 0.4203


Treinando Epoch 2/20: 100%|██████████| 257/257 [00:03<00:00, 81.61it/s]


Epoch 2/20 - Loss: 0.4862, Precision: 1.0000, Variance: 0.5236
Validation - Loss: 0.0609, Precision: 1.0000, Variance: 0.6161


Treinando Epoch 3/20: 100%|██████████| 257/257 [00:03<00:00, 81.63it/s]


Epoch 3/20 - Loss: 0.1629, Precision: 1.0000, Variance: 0.6789
Validation - Loss: 0.0273, Precision: 1.0000, Variance: 0.7458


Treinando Epoch 4/20: 100%|██████████| 257/257 [00:03<00:00, 81.69it/s]


Epoch 4/20 - Loss: 0.0819, Precision: 1.0000, Variance: 0.7920
Validation - Loss: 0.0154, Precision: 1.0000, Variance: 0.8467


Treinando Epoch 5/20: 100%|██████████| 257/257 [00:03<00:00, 81.63it/s]


Epoch 5/20 - Loss: 0.0490, Precision: 1.0000, Variance: 0.8833
Validation - Loss: 0.0098, Precision: 1.0000, Variance: 0.9309


Treinando Epoch 6/20: 100%|██████████| 257/257 [00:03<00:00, 81.61it/s]


Epoch 6/20 - Loss: 0.0323, Precision: 1.0000, Variance: 0.9616
Validation - Loss: 0.0067, Precision: 1.0000, Variance: 1.0047


Treinando Epoch 7/20: 100%|██████████| 257/257 [00:03<00:00, 81.63it/s]


Epoch 7/20 - Loss: 0.0226, Precision: 1.0000, Variance: 1.0306
Validation - Loss: 0.0048, Precision: 1.0000, Variance: 1.0710


Treinando Epoch 8/20: 100%|██████████| 257/257 [00:03<00:00, 81.64it/s]


Epoch 8/20 - Loss: 0.0166, Precision: 1.0000, Variance: 1.0936
Validation - Loss: 0.0036, Precision: 1.0000, Variance: 1.1319


Treinando Epoch 9/20: 100%|██████████| 257/257 [00:03<00:00, 81.53it/s]


Epoch 9/20 - Loss: 0.0125, Precision: 1.0000, Variance: 1.1523
Validation - Loss: 0.0027, Precision: 1.0000, Variance: 1.1888


Treinando Epoch 10/20: 100%|██████████| 257/257 [00:03<00:00, 81.61it/s]


Epoch 10/20 - Loss: 0.0097, Precision: 1.0000, Variance: 1.2068
Validation - Loss: 0.0021, Precision: 1.0000, Variance: 1.2426


Treinando Epoch 11/20: 100%|██████████| 257/257 [00:03<00:00, 81.55it/s]


Epoch 11/20 - Loss: 0.0076, Precision: 1.0000, Variance: 1.2591
Validation - Loss: 0.0017, Precision: 1.0000, Variance: 1.2938


Treinando Epoch 12/20: 100%|██████████| 257/257 [00:03<00:00, 81.59it/s]


Epoch 12/20 - Loss: 0.0061, Precision: 1.0000, Variance: 1.3095
Validation - Loss: 0.0014, Precision: 1.0000, Variance: 1.3431


Treinando Epoch 13/20: 100%|██████████| 257/257 [00:03<00:00, 81.63it/s]


Epoch 13/20 - Loss: 0.0049, Precision: 1.0000, Variance: 1.3571
Validation - Loss: 0.0011, Precision: 1.0000, Variance: 1.3909


Treinando Epoch 14/20: 100%|██████████| 257/257 [00:03<00:00, 81.57it/s]


Epoch 14/20 - Loss: 0.0040, Precision: 1.0000, Variance: 1.4041
Validation - Loss: 0.0009, Precision: 1.0000, Variance: 1.4373


Treinando Epoch 15/20: 100%|██████████| 257/257 [00:03<00:00, 81.60it/s]


Epoch 15/20 - Loss: 0.0033, Precision: 1.0000, Variance: 1.4497
Validation - Loss: 0.0008, Precision: 1.0000, Variance: 1.4828


Treinando Epoch 16/20: 100%|██████████| 257/257 [00:03<00:00, 81.62it/s]


Epoch 16/20 - Loss: 0.0028, Precision: 1.0000, Variance: 1.4945
Validation - Loss: 0.0006, Precision: 1.0000, Variance: 1.5274


Treinando Epoch 17/20: 100%|██████████| 257/257 [00:03<00:00, 81.56it/s]


Epoch 17/20 - Loss: 0.0023, Precision: 1.0000, Variance: 1.5385
Validation - Loss: 0.0005, Precision: 1.0000, Variance: 1.5715


Treinando Epoch 18/20: 100%|██████████| 257/257 [00:03<00:00, 81.54it/s]


Epoch 18/20 - Loss: 0.0019, Precision: 1.0000, Variance: 1.5813
Validation - Loss: 0.0004, Precision: 1.0000, Variance: 1.6150


Treinando Epoch 19/20: 100%|██████████| 257/257 [00:03<00:00, 81.45it/s]


Epoch 19/20 - Loss: 0.0016, Precision: 1.0000, Variance: 1.6237
Validation - Loss: 0.0004, Precision: 1.0000, Variance: 1.6581


Treinando Epoch 20/20: 100%|██████████| 257/257 [00:03<00:00, 81.53it/s]


Epoch 20/20 - Loss: 0.0014, Precision: 1.0000, Variance: 1.6679
Validation - Loss: 0.0003, Precision: 1.0000, Variance: 1.7011
Modelo salvo em: model/gcn_ae_model.pth


In [17]:
import torch
from torch_geometric.data import Data
from tqdm import tqdm


# Função para carregar o modelo
class GCNAutoencoder(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCNAutoencoder, self).__init__()
        self.encoder1 = GCNConv(in_channels, hidden_channels)
        self.encoder2 = GCNConv(hidden_channels, out_channels)
        self.decoder = torch.nn.Linear(out_channels * 2, 1)

    def forward(self, x, edge_index):
        z = F.relu(self.encoder1(x, edge_index))
        z = self.encoder2(z, edge_index)
        row, col = edge_index
        edge_features = torch.cat([z[row], z[col]], dim=1)
        adj_reconstructed = self.decoder(edge_features).squeeze()
        return z, adj_reconstructed


def processar_grafos(autoencoder_path, input_file, output_file, in_channels=1, hidden_channels=64, out_channels=16):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = GCNAutoencoder(in_channels, hidden_channels, out_channels).to(device)
    model.load_state_dict(torch.load(autoencoder_path))
    model.eval()

    data = torch.load(input_file)
    grafos = data['grafos']
    novos_grafos = []

    for grafo in tqdm(grafos, desc="Processando grafos"):
        src, dst = grafo['src'], grafo['dst']
        edge_index = torch.stack([src, dst], dim=0)
        num_nodes = max(torch.max(src), torch.max(dst)) + 1
        x = torch.rand((num_nodes, in_channels))

        data = Data(x=x, edge_index=edge_index).to(device)
        with torch.no_grad():
            z, _ = model(data.x, data.edge_index)
        novos_grafos.append(Data(x=z.cpu(), edge_index=edge_index.cpu()))

    torch.save({'grafos': novos_grafos}, output_file)
    print(f"Grafos processados salvos em: {output_file}")


if __name__ == "__main__":
    autoencoder_path = "model/gcn_ae_model.pth"
    input_file = "grafos/umdavb.pt"
    output_file = "grafos/umdavb.pt"

    print("Processando grafos da classe...")
    processar_grafos(autoencoder_path, input_file, output_file)


Processando grafos da classe...


  model.load_state_dict(torch.load(autoencoder_path))
  data = torch.load(input_file)


Processando grafos: 100%|██████████| 994/994 [00:00<00:00, 1055.29it/s]


Grafos processados salvos em: grafos/umdavb.pt
