In [1]:
import torch
from torch import nn
import torch_geometric
from torch_geometric.nn import SAGEConv, GraphConv
from torch_geometric.data import Data
import networkx as nx
import numpy as np
import pandas as pd 
import pickle
import pdb
from torch_geometric.loader import DataLoader
import torch.optim as optim
import torch.nn.functional as F
from torch_geometric.utils import to_dense_adj, subgraph, k_hop_subgraph
import matplotlib.pyplot as plt
from tools.combine_nx_to_dataloader import GraphDataset
torch.manual_seed(42)

<torch._C.Generator at 0x114352af0>

In [2]:
class GraphEncoder(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GraphEncoder, self).__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels * 2)
        self.conv3 = SAGEConv(hidden_channels * 2, hidden_channels)  # Additional hidden layer
        self.conv4 = nn.Linear(hidden_channels, out_channels)  # Final Layer

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = F.relu(self.conv3(x, edge_index))
        x = self.conv4(x)
        return x

class GraphDecoder(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GraphDecoder, self).__init__()
        # Assuming the encoded features are to be decoded back to original feature size
        self.conv1 = SAGEConv(out_channels, hidden_channels * 2)
        self.conv2 = SAGEConv(hidden_channels * 2, hidden_channels * 2)  # Mimic encoder complexity
        self.conv3 = SAGEConv(hidden_channels * 2, hidden_channels)  # Additional hidden layer
        self.conv4 = nn.Linear(hidden_channels, in_channels)  # Additional hidden layer to output size

    def forward(self, z, edge_index):
        # pdb.set_trace()
        z = F.relu(self.conv1(z, edge_index))
        z = F.relu(self.conv2(z, edge_index))
        z = F.relu(self.conv3(z, edge_index))
        z = self.conv4(z)
        return z

class MaskedGraphAutoencoder(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(MaskedGraphAutoencoder, self).__init__()
        self.encoder = GraphEncoder(in_channels, hidden_channels, out_channels)
        self.decoder = GraphDecoder(in_channels, hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x_masked = x #* mask
        z = self.encoder(x_masked, edge_index)
        x_reconstructed = self.decoder(z, edge_index)
        return x_reconstructed, z

In [3]:
def train(model, data_loader, optimizer):
    model.train()
    total_loss = 0
    embeddings = []
    origs = []
    for data in data_loader:
        optimizer.zero_grad()
        reconstructed_x, embed = model(data[0][0], data[1][0])
        origs.append(data[0][0])
        # pdb.set_trace()
        loss = loss_function(reconstructed_x, data[0][0])
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        embeddings.append(embed.detach())
    return total_loss / len(data_loader), embeddings, origs

# Assuming a loss function appropriate for node feature reconstruction, e.g., MSE for continuous features
def loss_function(reconstructed_x, original_x):
    return F.mse_loss(reconstructed_x, original_x)


In [4]:
# Example usage
if __name__ == "__main__":
    # data = 
    # for node_features, edge_index, edge_features in dataloader:

    # pdb.set_trace()
    # cuda_available = torch.cuda.is_available()
    # device = torch.device("cuda" if cuda_available else "cpu")
    folder_graph = './graphs/graphsage_graph/single_graphs/'
    fname = 'graph_list_with_features.pth'

    hC = 64
    inC = 40 #len(G.nodes[0]['x'])
    # print('dimension of nodes: ',inC)
    '''
    print(f"radius: {nx.radius(G)}")
    print(f"diameter: {nx.diameter(G)}")
    # print(f"eccentricity: {nx.eccentricity(G)}")
    print(f"center: {nx.center(G)}")
    # print(f"periphery: {nx.periphery(G)}")
    # print(f"density: {nx.density(G)}")
    print(f"max degree: {np.max(G.degree())}")
    '''
    
    # trainG, testG = get_test_train(G, num_test_nodes=0)
    # Convert networkx graph to PyTorch Geometric graph
    # trainPyG = networkx_to_pyg_graph(trainG, 'x', 'weight')
    # pdb.set_trace()
    # subgraphs = create_subgraphs(pyg_graph, num_subgraphs=1, num_hops=2)
    # pdb.set_trace()
    # testPyG = get_test_train(testG, 'x')
    # data_loader = DataLoader(subgraphs, batch_size=1, shuffle=True)
    # pdb.set_trace()
    graph_list = torch.load(fname)
    # pdb.set_trace()
    dataset = GraphDataset(graph_list)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
    actuals = []
    final_loss = []
    embeddings = []
    for outchannels in range(2,6):
        model = MaskedGraphAutoencoder(in_channels=inC, hidden_channels=hC, out_channels=outchannels)
        optimizer = optim.Adam(model.parameters(), lr=0.01)
        losses = []
        embeds = []
        acts = []
        for epoch in range(1, 10):  # Number of epochs
            loss, embeds, orig = train(model, dataloader, optimizer)
            embeds.append(embeds)
            acts.append(orig)
            print(f'Epoch {epoch}, Loss: {loss:.4f}')
            losses.append(loss)
        embeddings.append(embeds)
        final_loss.append(losses)
        actuals.append(acts)
        # Assuming encoder and decoder are your model's components
        encoder_state_dict = model.encoder.state_dict()
        decoder_state_dict = model.decoder.state_dict()

        # Save the state dictionaries
        torch.save(encoder_state_dict, 'encoder_state_dict'+str(outchannels)+'.pth')
        torch.save(decoder_state_dict, 'decoder_state_dict'+str(outchannels)+'.pth')

    # print(final_loss)
    np.save('gaelossarray_sage_no_weights_3_5_with_embeds.npy', final_loss)
    # pdb.set_trace()
    fl = open('embeddings_3_5.pickle', 'wb')
    pickle.dump(embeddings, fl)
    fl.close()

    fl = open('actuals_3_5.pickle', 'wb')
    pickle.dump(actuals, fl)
    fl.close()


    # np.save('embeddings.npy', embeddings.numpy()) 
    # print(range(5,20))
        

Epoch 1, Loss: 0.2227
Epoch 2, Loss: 0.1533
Epoch 3, Loss: 0.1434
Epoch 4, Loss: 0.1319
Epoch 5, Loss: 0.1279
Epoch 6, Loss: 0.1274
Epoch 7, Loss: 0.1242
Epoch 8, Loss: 0.1275
Epoch 9, Loss: 0.1258
Epoch 1, Loss: 0.2440
Epoch 2, Loss: 0.1477
Epoch 3, Loss: 0.1353
Epoch 4, Loss: 0.4290
Epoch 5, Loss: 0.4236
Epoch 6, Loss: 0.4226
Epoch 7, Loss: 0.2741
Epoch 8, Loss: 0.1663
Epoch 9, Loss: 0.1487
Epoch 1, Loss: 0.2080
Epoch 2, Loss: 0.2457
Epoch 3, Loss: 0.4354
Epoch 4, Loss: 0.3574
Epoch 5, Loss: 0.2934
Epoch 6, Loss: 0.2561
Epoch 7, Loss: 0.3481
Epoch 8, Loss: 0.4447
Epoch 9, Loss: 0.4413
Epoch 1, Loss: 0.2073
Epoch 2, Loss: 0.1657
Epoch 3, Loss: 0.1166
Epoch 4, Loss: 0.1091
Epoch 5, Loss: 0.1197
Epoch 6, Loss: 0.1052
Epoch 7, Loss: 0.1032
Epoch 8, Loss: 0.1036
Epoch 9, Loss: 0.1197


In [5]:
del encoder_state_dict
del decoder_state_dict
del dataloader
del dataset
del embeddings
del embeds
del graph_list 

import gc
gc.collect()

677420

In [6]:
del model
del final_loss
del losses
gc.collect()

564

In [7]:
encoder_dict = torch.load('encoder_state_dict3.pth')
decoder_dict = torch.load('decoder_state_dict3.pth')



In [8]:
in_channels = 40
hidden_channels = 64
out_channels = 3
GAE = MaskedGraphAutoencoder(in_channels, hidden_channels, out_channels)
GAE.encoder.load_state_dict(encoder_dict)
GAE.decoder.load_state_dict(decoder_dict)

fil =  open('embeddings_3_5.pickle', 'rb')
embed = pickle.load(fil)   
fil.close() 
emb = embed[1]
# print(len(emb[0]))
fil =  open('actuals_3_5.pickle', 'rb')
original = pickle.load(fil)   
fil.close() 

63


In [9]:
e = emb[0][0].tolist()
print(emb[0])
# print(e[0].tolist())
print(original[0][0][0][0])

tensor([[0.6899, 3.8092],
        [0.8655, 4.8483],
        [0.6520, 3.5848],
        [0.6274, 3.4393],
        [0.6382, 3.5034],
        [0.6238, 3.4181],
        [0.5995, 3.2746],
        [0.8384, 4.6879],
        [0.8023, 4.4742],
        [0.5978, 3.2640],
        [0.4694, 2.5042],
        [0.5848, 3.1874],
        [0.4171, 2.1951],
        [0.5699, 3.0993],
        [0.7785, 4.3333],
        [0.7704, 4.2854],
        [0.6392, 3.5094],
        [0.6047, 3.3048],
        [0.5704, 3.1022],
        [0.5622, 3.0537],
        [0.3867, 2.0147],
        [0.3390, 1.7328],
        [0.2493, 1.2020],
        [0.3571, 1.8396],
        [0.3441, 1.7630],
        [0.3353, 1.7109],
        [0.7546, 4.1923],
        [0.3446, 1.7656],
        [0.7442, 4.1306],
        [0.3781, 1.9643],
        [0.3351, 1.7095],
        [0.2432, 1.1656],
        [0.5398, 2.9212],
        [0.3317, 1.6898],
        [0.7371, 4.0883],
        [0.5623, 3.0539],
        [0.3877, 2.0208],
        [0.3292, 1.6749],
        [0.2

In [None]:
torch.empty((2,1), dtype=torch.int)

tensor([[0],
        [0]], dtype=torch.int32)

In [None]:
original_input = torch.tensor([[original[0][0][0][0].tolist()]])
print(original_input)

tensor([[[ 0.0000, -0.1500,  0.0000,  0.5810,  0.0000, -0.1500,  0.0000,
           0.5810,  0.0000, -0.1500,  0.0000,  0.5810,  0.0000, -0.1500,
           0.0000,  0.5810,  0.0000, -0.1500,  0.0000,  0.5810,  0.0000,
          -0.1500,  0.0000,  0.5810,  3.0000, -0.1500,  0.0000,  0.5810,
           3.0000, -0.1500,  0.0000,  0.5810,  3.0000, -0.1500,  0.0000,
           0.5810,  3.0000, -0.1500,  0.0000,  0.5810]]])


In [None]:
# input = torch.tensor([[e]])
result = GAE.encoder(original_input, torch.empty((2,0), dtype=torch.int64))
print(original_input)
print(result)

tensor([[[ 0.0000, -0.1500,  0.0000,  0.5810,  0.0000, -0.1500,  0.0000,
           0.5810,  0.0000, -0.1500,  0.0000,  0.5810,  0.0000, -0.1500,
           0.0000,  0.5810,  0.0000, -0.1500,  0.0000,  0.5810,  0.0000,
          -0.1500,  0.0000,  0.5810,  3.0000, -0.1500,  0.0000,  0.5810,
           3.0000, -0.1500,  0.0000,  0.5810,  3.0000, -0.1500,  0.0000,
           0.5810,  3.0000, -0.1500,  0.0000,  0.5810]]])
tensor([[[0.0000, 0.0000, 6.6842]]], grad_fn=<ReluBackward0>)


In [None]:
input = torch.tensor([[e]])
result = GAE.decoder(input, torch.empty((2,0), dtype=torch.int64))
print(input)
print(result)
# for i in range(0,5):
#     print(e[i])
#     print(result[i])

tensor([[[0.0000, 0.0000, 0.5841, 0.0000]]])
tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.3101, 0.0000, 0.0000, 0.6715,
          0.5578, 0.0000, 0.0000, 0.6770, 0.8943, 0.0000, 0.0000, 0.6568,
          1.3684, 0.0000, 0.0000, 0.0000, 1.8016, 0.0000, 0.0000, 0.6539,
          2.2844, 0.0000, 0.0000, 0.6016, 2.6953, 0.0000, 0.0000, 0.5784,
          3.4477, 0.2503, 0.3357, 0.0000, 4.9098, 1.0378, 1.0295, 0.0000]]],
       grad_fn=<ReluBackward0>)


In [None]:
result = GAE.decoder(e, torch.empty((2,0), dtype=torch.int64))
for i in range(0,5):
    print(e[i])
    print(result[i])

tensor([0.0000, 0.0000, 0.5841, 0.0000])
tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.3101, 0.0000, 0.0000, 0.6715, 0.5578,
        0.0000, 0.0000, 0.6770, 0.8943, 0.0000, 0.0000, 0.6568, 1.3684, 0.0000,
        0.0000, 0.0000, 1.8016, 0.0000, 0.0000, 0.6539, 2.2844, 0.0000, 0.0000,
        0.6016, 2.6953, 0.0000, 0.0000, 0.5784, 3.4477, 0.2503, 0.3357, 0.0000,
        4.9098, 1.0378, 1.0295, 0.0000], grad_fn=<SelectBackward0>)
tensor([0.0000, 0.0000, 0.1196, 0.0000])
tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.8237, 0.0000,
        0.0000, 0.0000, 0.8369, 0.0000, 0.0000, 0.0000, 0.7865, 0.3559, 0.0000,
        0.0000, 0.0000, 0.7825, 0.0000, 0.0000, 0.7615, 1.6295, 0.0000, 0.0000,
        0.6953, 2.0372, 0.0000, 0.0000, 0.7430, 2.3667, 0.0000, 0.0000, 0.0000,
        4.7823, 1.0200, 0.9818, 0.0000], grad_fn=<SelectBackward0>)
tensor([0.0000, 0.0000, 0.1196, 0.0000])
tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.8237, 0.0000,
        0.0000, 0.000