In [1]:
import pandas as pd
import networkx as nx
import torch
import random
from pathlib import Path
import numpy as np
from typing import Any

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

Using device: cpu



In [79]:
dataset = "/Users/vanshgupta/Desktop/AI_and_ML_reading_material/GraphGAN_Project/GraphGAN/bio-grid-human/bio-grid-human_dataset.txt1" #used to generate graph and get the number of nodes
filename = "/Users/vanshgupta/Desktop/AI_and_ML_reading_material/GraphGAN_Project/Emb_Data/Emb/biogrid-human/Struc2Vec/emb.txt" #used to read the file and get embeddings.
k = 5 # k -> Number of clusters 
node_embed_dim = 50 # Dimension of node feature vector

rng = np.random.default_rng(seed = 42) 

In [80]:
def add_noise(x : torch.Tensor, noise_ratio : float, node_embed_dim : int) -> torch.Tensor:
    """
    Adds Gaussian noise to a subset of nodes in a given tensor.

    This function introduces noise to a fraction of the nodes in the input tensor `x`. The noise is 
    generated from a normal distribution with mean 0 and standard deviation 1. The number of nodes 
    to be noised is determined by the `noise_ratio` parameter.

    Parameters:
        x (torch.Tensor): The input tensor of shape `(num_nodes, node_embed_dim)` representing 
                          node embeddings.
        noise_ratio (float): The fraction of nodes to which noise will be added. Should be a 
                             value between 0 and 1.
        node_embed_dim (int): The dimensionality of the node embeddings.

    Returns:
        torch.Tensor: The modified tensor `x` with noise added to a subset of nodes.

    """


    num_nodes = x.shape[0] #fetch the number of nodes
    num_nodes_to_add_noise = int(num_nodes * noise_ratio) #fetch the number of nodes that are to be noised
    total_nodes = torch.arange(0 , num_nodes).tolist() #gives a list of nodes
    nodes_to_noise = random.sample(total_nodes, num_nodes_to_add_noise) #gives the node-ids where noise is to be added.

    noise_tensor = torch.normal(mean = 0,
                                std = 1, 
                                size = (num_nodes_to_add_noise, node_embed_dim))
    
    x[nodes_to_noise] = noise_tensor
    return x 


def read_embeddings(filename: Path, n_node: int, n_embed: int) -> np.ndarray:
    """
    Reads pretrained node embeddings from a file and returns them as a numpy array.

    This function initializes an embedding matrix with random values and updates it with 
    the pretrained embeddings provided in the file. If a node does not have a pretrained 
    embedding in the file, its embedding will remain as the initialized random values.

    Parameters:
        filename (Path): Path to the file containing pretrained node embeddings.
                         The file format is expected to have node embeddings on each 
                         line in the format: `node_id dim_1 dim_2 ... dim_n`, with the 
                         first line typically skipped (header).
        n_node (int): The total number of nodes in the graph.
        n_embed (int): The embedding dimensionality for each node.

    Returns:
        np.ndarray: A numpy array of shape `(n_node, n_embed)` representing the node embeddings.

    """

    # Open the file and read all lines, skipping the first line (header)
    with open(filename, "r") as f:
        lines = f.readlines()[1:]

        # Create an embedding matrix initialized with random normal values
        embedding_matrix = rng.standard_normal(size=(n_node, n_embed))

        # Iterate over each line in the file (representing a node embedding)
        for line in lines:
            emd = line.split()  # Split the line into node ID and embedding values
            # Update the embedding matrix for the specific node ID
            embedding_matrix[int(emd[0]), :] = str_list_to_float(emd[1:])

        # Return the final embedding matrix
        return embedding_matrix

def save_embeddings(filepath : Path, graph : Any, embeddings : np.ndarray) -> None:
    """
    Saves node embeddings to a file in a specified format.

    This function takes a graph, its corresponding embeddings, and a file path. It maps the 
    embeddings to the graph nodes, combines the node indices with their respective embeddings, 
    and writes the data to the file. The output file includes a header indicating the total 
    number of nodes and the embedding dimensionality, followed by the embeddings for each node.

    Parameters:
        filepath (Path): Path to the file where embeddings will be saved.
        graph (Any): A graph object (e.g., from NetworkX) containing node information.
        embeddings (np.ndarray): A 2D numpy array of shape `(num_nodes, embedding_dim)` 
                                  containing node embeddings.

    File Format:
        The saved file has the following format:
        - The first line contains the number of nodes and the embedding dimensionality, 
          separated by a tab.
        - Each subsequent line corresponds to a node, containing the node ID followed 
          by its embedding values separated by spaces.
    """
    # Map the embeddings to the node indices in the graph
    new = embeddings[torch.tensor(np.array(graph.nodes()))]

    # Get the node indices and reshape them into a column vector
    index = np.array(graph.nodes()).reshape(-1, 1)

    # Combine node indices with their embeddings into a single matrix
    embedding_matrix = np.hstack([index, new])

    # Convert the embedding matrix into a list of strings for saving
    embedding_list = embedding_matrix.tolist()
    embedding_str = [str(int(emb[0])) + " " + " ".join([str(x) for x in emb[1:]]) + "\n"
                     for emb in embedding_list]

    # Write the embeddings to the file
    with open(filepath, "w+") as f:
        # Include the header with number of nodes and embedding dimensionality
        lines = [str(graph.number_of_nodes()) + "\t" + str(embeddings.shape[1]) + "\n"] + embedding_str
        f.writelines(lines)


def str_list_to_float(str_list : list[str]) -> list[float]:
    """Convert the string items of a list to float items"""
    return [float(item) for item in str_list]

In [81]:

df = pd.read_csv(dataset,
                sep = '\t',
                names = ["NodeIDfrom", "NodeIDto"])
#create the graph networkx object from the above dataframe

G = nx.from_pandas_edgelist(df = df,
                             source = "NodeIDfrom",
                             target = "NodeIDto",
                             create_using=nx.Graph())

X = read_embeddings(filename=filename,
                                     n_node = len(G),
                                     n_embed = 50)

print(f"Number of Nodes in the graph : {len(G)}")
print(f"The shape of the embedding vector {X.shape}")

Number of Nodes in the graph : 9436
The shape of the embedding vector (9436, 50)


In [82]:
### ------------------------------------------------------------ TO ADD NOISE ------------------------------------------------------------ ###
# Run this when you have to add noise to the original embeddings.
X = add_noise(x = X, noise_ratio=0.02, node_embed_dim=50)
X = np.array(X)

# Save the noise-induced embeddings  
error_emb_filename = "/Users/vanshgupta/Desktop/AI_and_ML_reading_material/GraphGAN_Project/Emb_Data/Emb/biogrid-human/Struc2Vec/emb_2%_error.txt"

new = X[torch.tensor(np.array(G.nodes()))]
index = np.array(G.nodes()).reshape(-1, 1)
embedding_matrix = np.hstack([index, new])
embedding_list = embedding_matrix.tolist()
embedding_str = [str(int(emb[0])) + " " + " ".join([str(x) for x in emb[1:]]) + "\n"
                  for emb in embedding_list]
with open(error_emb_filename, "w+") as f:
    lines = [str(G.number_of_nodes()) + "\t" + str(node_embed_dim) + "\n"] + embedding_str
    f.writelines(lines)

In [83]:
rng = np.random.default_rng(seed = 42)
X = np.array(X)
centroid = rng.standard_normal(size = (k, node_embed_dim))

In [84]:

#K-Means Algorithm for node embeddings
#the embediddings are 1xd dimensional.
class KMeansNodeClustering:

  def __init__(self, X, k, node_embed_dim):
      self.k = k #k is the number of cluster centres
      self.X = X #dataset consisting of node embeddings
      self.node_embed_dim = node_embed_dim #dimensions of node embeddings.
      self.centroid = None #these will be the centres of our distribution

  @staticmethod
  def euclidean_measure(centroid, node_embed):  # calculates the distance of the k-dimensional node from the centre
      return np.sqrt(np.sum((centroid - node_embed)**2, axis=1))

  def dimensional_mean(cluster_num, node_embed_dim, cluster_indices, cluster_centres): #calculates the mean of of the arrays, dimension-wise
        axis_centre = np.zeros(node_embed_dim)
        for i in range(node_embed_dim):
            y = 0
            tup = cluster_indices[cluster_num].shape
            shape = tup[0]
            for x in range(shape):
                y += X[cluster_indices[cluster_num][x]][0][i]
            mean = np.mean(y)
            axis_centre[i] += mean
        axis_centre = axis_centre/np.linalg.norm(axis_centre)
        return cluster_centres.append(axis_centre)

  def fit(self, max_iterations = 200):
      rng = np.random.default_rng(seed = 69)
      self.centroid = rng.standard_normal(size = (self.k, self.node_embed_dim))
      self.centroid  = (self.centroid)/(np.max(self.centroid))
      for _ in range(max_iterations):
          y = []

          for node_embed in self.X:
                distance = KMeansNodeClustering.euclidean_measure(node_embed = np.array(node_embed),
                                                                  centroid = centroid)
                cluster_num = np.argmin(distance)
                y.append(cluster_num)

          y = np.array(y) #stores the clustur number each of the nodes belong to

          cluster_indices = [] #to know which node belongs to which cluster

          for i in range(self.k):
                cluster_indices.append(np.argwhere(y == i)) #returns every node which belongs to the same cluster
          cluster_indices = np.array(cluster_indices, dtype = object)
          cluster_centres = [] #stores the centres of the clusters

          for j, indices in enumerate(cluster_indices): #cluster_indices contains the cluster numbers and the indices that belong to a particular cluster
                #i = cluster number
                #indices = indices of the nodes that belong to i.
                if len(indices) == 0:
                    cluster_centres.append(self.centroid[i])
                else:
                    KMeansNodeClustering.dimensional_mean(
                                                          cluster_num = j,
                                                          node_embed_dim=node_embed_dim,
                                                          cluster_indices = cluster_indices,
                                                          cluster_centres=cluster_centres)

          self.centroid = np.array(cluster_centres)
          y = np.array(y)
      return y, cluster_indices




In [85]:
clustered_nodes = KMeansNodeClustering(X = X,
                                       k = k,
                                       node_embed_dim = node_embed_dim)
#cluster_indices_id stores the number_id of the clusters each embeddings is related to
#cluster_indices stores all the nodes that belong to one particular cluster, for all the clusters.
cluster_indices_id, cluster_indices = clustered_nodes.fit()


In [86]:
centroid_array = clustered_nodes.centroid #contains the centroids of all the clusteres.
centroid_array

array([[-0.04203649, -0.02398007,  0.0543293 ,  0.03709349, -0.19481731,
        -0.23199867,  0.11306756,  0.09367199, -0.18545094, -0.1798814 ,
         0.14223191, -0.19863543, -0.01068128, -0.05251942, -0.02731857,
        -0.02791057,  0.13005896,  0.12065515, -0.23498105, -0.29573511,
        -0.06885951,  0.28131776,  0.31373932,  0.0564836 ,  0.02835249,
         0.17190833, -0.03749068,  0.04751545, -0.2127734 , -0.01121337,
         0.05824769, -0.09289639, -0.00321679, -0.08346256, -0.25157313,
         0.00293849,  0.09485431,  0.00072263, -0.06408919, -0.0857017 ,
         0.15478988,  0.04548804, -0.0902423 , -0.06233313,  0.31061596,
         0.13110521,  0.05435116, -0.17786423,  0.13030271,  0.06361567],
       [ 0.06533315,  0.07145934, -0.00340568,  0.01771328,  0.04248543,
        -0.13292006,  0.07279786,  0.28650912, -0.39514107, -0.0756019 ,
        -0.16507294, -0.25103149,  0.15143345,  0.23623347, -0.13639874,
         0.11304678, -0.14141157, -0.1043319 , -0.

In [87]:
cluster_indices = np.array(cluster_indices, dtype=object)
cluster_indices_array = np.array([torch.tensor(np.squeeze(cluster_indices[0], axis = 1)),
                                  torch.tensor(np.squeeze(cluster_indices[1], axis = 1)),
                                  torch.tensor(np.squeeze(cluster_indices[2], axis = 1)),
                                  torch.tensor(np.squeeze(cluster_indices[3], axis = 1)),
                                  torch.tensor(np.squeeze(cluster_indices[4], axis = 1)),
                                 ], dtype = object)


cluster_indices_array

  cluster_indices_array = np.array([torch.tensor(np.squeeze(cluster_indices[0], axis = 1)),


array([tensor([   0,    3,    4,  ..., 9433, 9434, 9435]),
       tensor([  12,   13,   14,  ..., 9420, 9426, 9427]),
       tensor([  27,   52,  137,  177,  195,  239,  336,  367,  386,  489,  504,  952,
                987, 1348, 1350, 1351, 1352, 1522, 3021, 3142, 3294, 3419, 3501, 3553,
               3632, 3677, 3681, 4126, 4258, 4807, 4808, 4809, 4810, 4811, 4812, 4814,
               5071, 5089, 5241, 5273, 5534, 5536, 5628, 5660, 5674, 5772, 6265, 6539,
               6809, 6810, 6888, 6924, 7192, 7529, 7540, 7939, 7941, 8698, 8722, 8777,
               8822, 8842, 8868, 8873, 8875, 8881, 8888, 8899, 8982, 9078])           ,
       tensor([   2,   18,   43,   44,   50,   54,   57,   65,   70,   80,   83,  110,
                181,  231,  237,  268,  328,  350,  390,  413,  436,  459,  505,  519,
                536,  572,  599,  619,  637,  655,  660,  688,  707,  733,  745,  761,
                763,  779,  792,  828,  832,  844,  889,  936,  989, 1003, 1005, 1117,
           

In [88]:
import torch

class FuzzyLayer:
    """
    This is the parent class which will contain variables for Antecedant, Inference and Consequent classes.

    Args:
        X: The dataset which contains all the embeddings. shape(no_of_nodes, node_embed_dim)
        centroid_array: contains all the centroids of all the clusters. shape(no_of_clusters, node_embed_dim)
        cluster_indices_array: contains all the indices that belong to one particular cluster. shape(no_of_clusters, *number_of_indices_per_cluster*)
                                                                                               number_of_indices_per_cluster varies therefore it has no particular shape
    """

    def __init__(self, X, centroid_array, cluster_indices_array):
        self.X = torch.tensor(X, device=device, dtype=torch.float64)
        self.centroid_array = torch.tensor(centroid_array, device=device, dtype=torch.float64)
        self.cluster_indices_array = cluster_indices_array

class Antecedant(FuzzyLayer):
    """
    Antecedant part of the fuzzy logic system, it gives us membership functions for the node embeddings.
    """
    def __init__(self, X, centroid_array, cluster_indices_array):
        super().__init__(X, centroid_array, cluster_indices_array)

    def dot_product(self):
        # Use matrix multiplication instead of nested loops for efficiency
        dot_products = torch.matmul(self.X, self.centroid_array.T)
        return dot_products

    def get_points(self, cluster_num, embed_axis):
        # Fetch points corresponding to the cluster_num and embed_axis
        cluster_ids = torch.tensor(self.cluster_indices_array[cluster_num], device=device)
        points = self.X[cluster_ids, embed_axis]
        return points

    def get_mean(self, cluster_num, embed_axis):
        # Directly access the mean from centroid array
        return self.centroid_array[cluster_num, embed_axis]

    def get_standard_deviation(self, cluster_num, embed_axis):
        # Compute the variance in a vectorized manner
        points = self.get_points(cluster_num, embed_axis)
        mean = self.get_mean(cluster_num, embed_axis)
        variance = torch.var(points)
        variance = torch.sqrt(variance)
        return variance

    def get_stddev_tensor(self, embed_axis):
        # Vectorize variance computation for all clusters
        stddev = torch.tensor([self.get_standard_deviation(cluster_num, embed_axis) for cluster_num in range(self.centroid_array.shape[0])], device=device)
        return stddev

    def gaussianMF(self, cluster_num, embed_axis, element):
        # Vectorized Gaussian Membership Function calculation
        mean = self.get_mean(cluster_num, embed_axis)
        stddev_tensor = self.get_stddev_tensor(embed_axis)
        if stddev_tensor[cluster_num] != 0:
            gaussian = (element - mean) ** 2 / (2 * (stddev_tensor[cluster_num] ** 2))
        else:
            return 0.0
        return torch.exp(-gaussian)

    def get_membership_array(self, embed_axis):
        # Preallocate the membership array
        membership_array = torch.zeros((self.X.shape[0], self.centroid_array.shape[0]), device=device)
        for node_embed_num in range(self.X.shape[0]):
            for i in range(self.centroid_array.shape[0]):
                membership = self.gaussianMF(cluster_num=i, embed_axis=embed_axis, element=self.X[node_embed_num, embed_axis])
                membership_array[node_embed_num, i] = membership
        return membership_array



In [89]:
b = Antecedant(X = X,
               centroid_array = centroid_array,
               cluster_indices_array = cluster_indices_array)


In [90]:
membership_array = []
for embed_axis in range(node_embed_dim):
    mem_array_per_axis = b.get_membership_array(embed_axis = embed_axis)
    membership_array.append(mem_array_per_axis)
membership_array = torch.stack(membership_array)
membership_array

  cluster_ids = torch.tensor(self.cluster_indices_array[cluster_num], device=device)


tensor([[[4.8005e-01, 7.1221e-01, 7.4032e-01, 9.4302e-01, 8.8516e-01],
         [5.1398e-01, 4.9810e-01, 9.4329e-01, 9.1783e-01, 9.4693e-01],
         [9.9959e-01, 9.7456e-01, 9.8215e-01, 9.9854e-01, 9.9809e-01],
         ...,
         [8.9800e-01, 8.3728e-01, 9.9987e-01, 9.8147e-01, 9.9597e-01],
         [7.6906e-01, 7.1622e-01, 9.9038e-01, 9.6240e-01, 9.8380e-01],
         [9.2085e-01, 8.6104e-01, 9.9993e-01, 9.8481e-01, 9.9757e-01]],

        [[3.0571e-06, 4.3126e-04, 6.4834e-03, 2.6206e-01, 5.4608e-02],
         [7.7162e-02, 3.0762e-01, 5.1135e-01, 7.4207e-01, 5.6244e-01],
         [2.3254e-02, 8.5883e-02, 1.9370e-01, 6.7957e-01, 4.2083e-01],
         ...,
         [7.3100e-01, 9.1068e-01, 9.6022e-01, 9.5968e-01, 9.3365e-01],
         [7.7662e-01, 9.3307e-01, 9.7227e-01, 9.6669e-01, 9.4634e-01],
         [8.5849e-01, 9.6842e-01, 9.8982e-01, 9.7860e-01, 9.6765e-01]],

        [[1.1245e-01, 2.2506e-01, 6.9831e-01, 6.0596e-01, 7.0057e-01],
         [3.4045e-05, 1.3654e-03, 1.7379e-01,

In [138]:
#membership_array = membership_array[:, :, [0, 1, 2, 3]]

In [None]:
#membership_array

tensor([[[0.7825, 0.7935, 0.9832, 0.9271],
         [0.4548, 0.5402, 0.7105, 0.7738],
         [0.9512, 0.9475, 0.9999, 0.9853],
         ...,
         [0.7364, 0.7520, 0.9747, 0.9096],
         [0.6389, 0.6637, 0.9527, 0.8697],
         [0.5685, 0.5995, 0.9333, 0.8383]],

        [[0.9994, 0.9729, 0.9529, 0.9939],
         [0.8489, 0.9733, 0.9984, 0.9703],
         [0.5832, 0.8158, 0.9865, 0.8572],
         ...,
         [0.7676, 0.9347, 0.9999, 0.9411],
         [0.9267, 0.9972, 0.9899, 0.9922],
         [0.9998, 0.9615, 0.9453, 0.9894]],

        [[0.9658, 0.8643, 0.9663, 0.8856],
         [0.9310, 0.9997, 0.8681, 0.9993],
         [0.9875, 0.9048, 0.9542, 0.9164],
         ...,
         [0.9981, 0.9363, 0.9417, 0.9407],
         [1.0000, 0.9520, 0.9338, 0.9531],
         [0.9584, 0.8528, 0.9692, 0.8769]],

        ...,

        [[0.5658, 0.8158, 0.8558, 0.8628],
         [0.7698, 0.9426, 0.9121, 0.9163],
         [0.9069, 0.8077, 1.0000, 1.0000],
         ...,
         [0.7643, 0.6

In [91]:
import torch
import numpy as np  # Only if absolutely necessary

class Consequent(FuzzyLayer):
    """
    This class generates the crisp embeddings using the membership values generated by the Antecedant class.

    Essentially, we fuzzified each of the embeddings for each node, clustered the nodes, found centroids,
    and calculated membership for each embedding. Now, we use that membership to output crisp memberships.

    To do that, we generate sets with variances and treat them as the consequent fuzzy set from which crisp output is generated.
    """

    def __init__(self, X, centroid_array, cluster_indices_array):
        super().__init__(X, centroid_array, cluster_indices_array)

    def get_average_membership_array(self, membership_array):
        """
        Calculates the average membership for each embedding dimension across all clusters.

        Args:
            membership_array (torch.Tensor): The membership values of shape (node_embed_dim, num_nodes, num_clusters).

        Returns:
            torch.Tensor: The average membership values of shape (node_embed_dim, num_nodes).
        """
        return torch.mean(membership_array, dim=2)  # shape = (node_embed_dim, num_nodes)

    def get_embed_axis_mean(self):
        """
        Calculates the mean of each embedding axis.

        Returns:
            torch.Tensor: The mean values for each embedding axis of shape (node_embed_dim,).
        """
        a = self.X.transpose(dim0=1, dim1=0) #shape = (50, 5242)
        return torch.mean(a, axis = 1) #shape = (50,)

    def get_embed_axis_variance(self):
        """
        Calculates the variance of each embedding axis.

        Returns:
            torch.Tensor: The variance values for each embedding axis of shape (node_embed_dim,).
        """
        a = self.X.transpose(dim0=1, dim1=0)
        mean = self.get_embed_axis_mean()
        variance = torch.zeros(size = (node_embed_dim,))
        for i in range(node_embed_dim):
            x = torch.sqrt(torch.mean((a[i] - mean[i])**2))
            variance[i] += x
        return variance #shape = (50,)

    def get_crisp_embeddings(self, average_membership_array):
        """
        Generates the crisp embeddings using the average membership values.

        Args:
            average_membership_array (torch.Tensor): The average membership values of shape (node_embed_dim, num_nodes).

        Returns:
            torch.Tensor: The crisp embeddings of shape (num_nodes, node_embed_dim).
        """
        a = self.X.transpose(dim0=1, dim1=0)
        mean = self.get_embed_axis_mean()
        variance = self.get_embed_axis_variance()
        new_X = torch.zeros(size = (node_embed_dim, self.X.shape[0]))
        for i in range(node_embed_dim):
            for j in range(self.X.shape[0]):
                if average_membership_array[i][j] == 0:
                    new_X[i][j] = a[i][j]
                else:
                    b = -1*torch.log(average_membership_array[i][j])
                    if(a[i][j] < 0):
                        b = -1*torch.sqrt(b)
                    else:
                        b = torch.sqrt(b)
                    b = b*variance[i] + mean[i]
                    new_X[i][j] = b
        new_X = torch.transpose(new_X, dim0=1, dim1=0)
        new_X.shape
        #new_X = new_X/np.linalg.norm(new_X, axis = 0)
        return new_X

In [92]:
c = Consequent(X = X,
               centroid_array=centroid_array,
               cluster_indices_array = cluster_indices_array)

In [93]:
average_membership = c.get_average_membership_array(membership_array = membership_array)
average_membership 
average_membership.shape


torch.Size([50, 9436])

In [94]:
crisp = c.get_crisp_embeddings(average_membership_array = average_membership)
norm = np.linalg.norm(crisp, axis = 1)
print(norm.shape)
for i in range(crisp.shape[0]):
    crisp[i] = crisp[i]
crisp


(9436,)


tensor([[ 0.2507, -0.8600,  0.5219,  ..., -1.3424,  0.3504,  0.5811],
        [-0.3088,  0.4770,  0.8776,  ...,  0.1443, -0.1686, -0.0680],
        [-0.0847, -0.5847, -0.2775,  ...,  0.8069,  0.4502, -0.0019],
        ...,
        [-0.1623,  0.1743, -0.0270,  ..., -0.3273,  0.3599,  0.1165],
        [-0.2193,  0.1557,  0.0896,  ..., -0.1586,  0.2853,  0.1086],
        [-0.1498,  0.1190,  0.0198,  ..., -0.1663,  0.2794,  0.1132]])

In [95]:
crisp[torch.tensor(np.array(G.nodes()))]

tensor([[ 0.2507, -0.8600,  0.5219,  ..., -1.3424,  0.3504,  0.5811],
        [-0.3088,  0.4770,  0.8776,  ...,  0.1443, -0.1686, -0.0680],
        [-0.0847, -0.5847, -0.2775,  ...,  0.8069,  0.4502, -0.0019],
        ...,
        [-0.1646,  0.1358,  0.0897,  ..., -0.3266,  0.3130,  0.1090],
        [-0.1773,  0.1142,  0.0874,  ..., -0.1209,  0.2371,  0.1081],
        [-0.1498,  0.1190,  0.0198,  ..., -0.1663,  0.2794,  0.1132]])

In [96]:
import os
embedding_filename = "/Users/vanshgupta/Desktop/AI_and_ML_reading_material/GraphGAN_Project/Emb_Data/Emb/biogrid-human/Struc2Vec/fuzzy_emb_2%_error.txt"
embeddings = crisp[torch.tensor(np.array(G.nodes()))]
index = np.array(G.nodes()).reshape(-1, 1)
embedding_matrix = np.hstack([index, embeddings])
embedding_list = embedding_matrix.tolist()
embedding_str = [str(int(emb[0])) + " " + " ".join([str(x) for x in emb[1:]]) + "\n"
                  for emb in embedding_list]
with open(embedding_filename, "w+") as f:
    lines = [str(G.number_of_nodes()) + "\t" + str(node_embed_dim) + "\n"] + embedding_str
    f.writelines(lines)
