In [None]:
import networkx as nx
import numpy as np
from scipy.sparse.linalg import inv
from scipy.sparse import csr_matrix
import random

In [None]:
# Reading in a tsv files, creating a list of edges and a graph object from that
def create_graph(file_path):
    '''
    Reads file with an edgelist and creates NetworkX graph from it

    Parameters:
        file_path (str): Path to the file containing the edge list.

    Returns:
        G (networkx.Graph): Graph created from the edgelist
    '''
    edges = [] # Empty list to store edges
    with open(file_path, 'r') as file: # Open the file containing the edgelist
        for line in file:
            node1, node2 = line.strip().split('\t') # Split ech line into two nodes
            edges.append((node1, node2)) # Append edge to edgelist
    G = nx.Graph() # Create an empty graph object
    G.add_edges_from(edges) # Add edges to the graph
    return G

In [1]:
def effective_resistance_matrix(L):
    '''
    Computes the effective resistance matrix for a given Laplacian matrix L.

    Parameters:
        L(nx.Graph())
    '''
    L_pinv = inv(csr_matrix(L).tocsc())
    n = L.shape[0]
    R = np.zeros(L.shape)
    for i in range(n):
        for j in range(i+1, n):
            e_i = np.zeros(n)
            e_i[i] = 1
            e_j = np.zeros(n)
            e_j[j] = 1
            R[i, j] = R[j, i] = (e_i - e_j) @ L_pinv @ (e_i - e_j)
    return R

In [2]:
def gsp_graph_sparsify(G, epsilon=0.1):
    N = G.number_of_nodes()
    if N < 3:
        raise ValueError('Cannot sparsify a graph with less than 3 nodes')

    if epsilon < 1/np.sqrt(N) or epsilon > 1:
        raise ValueError('Epsilon out of required range')

    # Compute Laplacian matrix
    L = nx.laplacian_matrix(G).toarray()

    # Compute resistance distances
    resistance_distances = effective_resistance_matrix(L)

    # Initialize the probability distribution for edge selection
    edges = list(G.edges())
    Pe = np.zeros(len(edges))
    for idx, (u, v) in enumerate(edges):
        Pe[idx] = resistance_distances[u, v]

    Pe /= np.sum(Pe)  # Normalize to form a probability distribution

    max_tries = 10  # Maximum number of attempts to get a connected graph

    for i in range(max_tries):
        # Set Q
        C0 = 1/30  # This constant can be adjusted
        C = 4 * C0
        q = round(9 * C**2 * N * np.log(N) / (epsilon**2))

        # Choose random edges based on the probability distribution
        selected_edges_idx = np.random.choice(len(edges), size=q, p=Pe)
        selected_edges = [edges[idx] for idx in selected_edges_idx]

        H = nx.Graph()
        H.add_edges_from(selected_edges)

        # Check if the new graph is connected
        if nx.is_connected(H):
            break
        elif i == max_tries - 1:
            raise Warning('Despite attempts to reduce epsilon, sparsified graph is disconnected')
        else:
            epsilon = epsilon - (epsilon - 1/np.sqrt(N)) / 2

    return H

# Sparsifying the graphs

In [1]:
# Specifying the paths were the original, non-sparsified network is found, and where sparsified ones shall
# be saved
non_sparsified_network_path = 'path_to_non_sparsified_network_file'
save_folder = 'folder_to_save_sparsified_networks'

In [None]:
original_graph = create_graph(non_sparsified_network_path) # Creating a networkx graph object
L = nx.laplacian_matrix(original_graph).toarray() # Get the Laplacian matrix of the graph as a dense array 

epsilons = [0.03, 0.05, 0.07, 0.08, 0.09, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 0.6, 0.7] # List of epsilons for different sparsifications
R = effective_resistance_matrix(L) # Calculating the effective resistance matrix

# Iterate through all epsilons to get the differently strongly sparsified graphs
for epsilon in epsilons:
    sparsified_graph = sparsify_graph(original_graph, R, epsilon)
    nx.write_edgelist(sparsified_graph, f'{save_folder}/{file_name}_sparsified_epsilon_{epsilon}.tsv', delimiter='\t', data=True)