In [43]:
import pickle
import numpy as np
import pandas as pd
import torch 
from torch import nn
# import torch_geometric
from torch_geometric.nn import SAGEConv
# from torch_geometric.data import Data
import networkx as nx
import numpy as np
import pandas as pd 
import pickle
import pdb
from torch_geometric.loader import DataLoader
# import torch.optim as optim
import torch.nn.functional as F
# from torch_geometric.utils import to_dense_adj, subgraph, k_hop_subgraph
import matplotlib.pyplot as plt
from tools.combine_nx_to_dataloader import GraphDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau


In [34]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim=40, encoded_dim=3):
        super(Autoencoder, self).__init__()
        
        # Encoder
        # Adjusted to include two additional layers
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=3, stride=2, padding=1),  # input is 1 channel, output is 16 channels
            nn.ReLU(True),
            nn.Conv1d(16, 32, kernel_size=3, stride=2, padding=1),  # Further reduces the dimension
            nn.ReLU(True),
            nn.Flatten(),  # Flatten the output to feed into a linear layer for the encoded representation
            nn.Linear(320, encoded_dim)  # Adjust the input features to match the output of the last conv layer
        )
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(encoded_dim, 320),  # First, expand to the flattened size before the last conv layer
            nn.ReLU(True),
            nn.Unflatten(1, (32, 10)),  # Unflatten to shape that matches the encoder's last conv output
            nn.ConvTranspose1d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose1d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()  # Ensure output values are between 0 and 1
        )
    
    
    def forward(self, x):
        x = x.unsqueeze(1)  # Add a channel dimension
        z = self.encoder(x)
        recon = self.decoder(z)
        recon = recon.squeeze(1)
        return recon, z


In [44]:
class DeepSparseVAE(nn.Module):
    def __init__(self, input_dim=784, hidden_dims=[400, 200], latent_dim=20, sparsity_target=0.1, sparsity_weight=1e-3):
        super(DeepSparseVAE, self).__init__()
        self.sparsity_target = sparsity_target
        self.sparsity_weight = sparsity_weight

        # Encoder
        encoder_layers = []
        for i in range(len(hidden_dims) - 1):
            encoder_layers.append(nn.Linear(hidden_dims[i], hidden_dims[i+1]))
            encoder_layers.append(nn.ReLU(True))
        self.encoder = nn.Sequential(*encoder_layers)
        self.fc_mu = nn.Linear(hidden_dims[-1], latent_dim)
        self.fc_logvar = nn.Linear(hidden_dims[-1], latent_dim)

        # Decoder
        decoder_layers = []
        hidden_dims_reversed = hidden_dims[::-1]
        for i in range(len(hidden_dims_reversed) - 1):
            decoder_layers.append(nn.Linear(hidden_dims_reversed[i], hidden_dims_reversed[i+1]))
            decoder_layers.append(nn.ReLU(True))
        self.decoder = nn.Sequential(*decoder_layers, nn.Linear(hidden_dims_reversed[-1], input_dim), nn.Sigmoid())

    def encode(self, x):
        h = self.encoder(x)
        return self.fc_mu(h), self.fc_logvar(h), h

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        return self.decoder(z)

    def forward(self, x):
        mu, logvar, h = self.encode(x.view(-1, 784))
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar, h

    def loss_function(self, recon_x, x, mu, logvar, h):
        BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), reduction='sum')
        KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        sparsity_loss = F.kl_div(h.mean(0).log(), torch.tensor([self.sparsity_target]).to(h.device), reduction='batchmean')
        sparsity_loss *= self.sparsity_weight
        return BCE + KLD + sparsity_loss

# Example instantiation with deeper architecture
model = DeepSparseVAE(input_dim=40, hidden_dims=[400, 200, 100], latent_dim=3)


In [7]:
import os
import pickle

In [18]:

# Example usage
node_list = []
folder_graph = './graphs/graphsage_graph/single_graphs/'
metadata_file = folder_graph + 'metadata.csv'
succTimeFiles = folder_graph + 'graphMetadata.csv'

files = os.listdir(folder_graph)
files = [file for file in files if file.endswith('.pickle')]
for file in files:
    fil =  open(folder_graph+file, 'rb')
    G = pickle.load(fil)
    fil.close()
    # print(G.nodes[0])
    for node in G.nodes(data=True):
        # print(node[1]['x'])
        # break
        node_list.append(node[1]['x'])
    # break

# dataset = GraphDataset(graph_list)
# dataloader = DataLoader(dataset, batch_size=10, shuffle=True)

# Save the graph list for later use, similar to the previous example
torch.save(node_list, 'nodelist.pth')

# # Example of iterating over the DataLoader in a training loop
# for node_features, edge_index, edge_features in dataloader:
#     # Use these in your GAE model
#     pass

In [45]:
cuda_available = torch.cuda.is_available()
device = torch.device("cuda" if cuda_available else "cpu")
folder_graph = './graphs/graphsage_graph/single_graphs/'
fname = 'nodelist.pth'
nodelist = torch.load(fname)
data_nodes = torch.tensor(nodelist, dtype=torch.float32)
from torch.utils.data import Dataset, DataLoader
class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        # Return the item at the given index
        return self.data[index]
dataset = CustomDataset(data_nodes)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


In [42]:
import gc
# Initialize the autoencoder
generated_embeds = []
losses = []
for ed in range(3,10):
    autoencoder = DeepSparseVAE(input_dim=40, hidden_dims=[400, 200, 100], latent_dim=ed)
    # Example of a loss function and optimizer
    criterion = nn.MSELoss() # Mean Squared Error Loss
    optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.1) # Adam optimizer
    scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=5, verbose=True)

    # d = [a for a in dataloader]
    # print(d)
    loss_vec = []
    embed_vec = []
    for epoch in range(1, 50):
        total_loss = 0.0
        for input_vector in dataloader:  # Assuming the dataset returns a tuple where the first element is the input
            optimizer.zero_grad()
            output, embed = autoencoder(input_vector)
            # print(input_vector[0], '\n', output)
            loss = criterion(output, input_vector)  # Assuming target is the same as input
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        # Average loss for this epoch
        epoch_loss = total_loss / len(dataloader)
        embed_vec.append(embed)
        loss_vec.append(epoch_loss)
        # Step the scheduler based on the epoch loss
        scheduler.step(epoch_loss)
        gc.collect()
        # if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {epoch_loss:.4f}')
    losses.append(loss_vec)
    generated_embeds.append(embed_vec)

NameError: name 'dataloader' is not defined

In [40]:
# del nodelist
# del data_nodes
# del dataloader
# del dataset
# del node_list
# del autoencoder
gc.collect()


0