Random Node sampling

In [49]:
import networkx as nx
import random

def sample_graph(input_file, output_file, sample_size):
    # Read the network graph from the .txt file
    G = nx.read_edgelist(input_file)

    # Get a list of all nodes in the graph
    all_nodes = list(G.nodes)

    # Randomly select a subset of nodes based on the desired sample size
    sampled_nodes = random.sample(all_nodes, min(sample_size, len(all_nodes)))

    # Create a subgraph with only the sampled nodes
    sampled_graph = G.subgraph(sampled_nodes)

    # Save the sampled graph to a new .txt file without additional attributes
    nx.write_edgelist(sampled_graph, output_file, data=False)

if __name__ == "__main__":
    # Specify the input and output file paths
    input_file_path = "soc-google-plus.txt"
    output_file_path = "sampled_graph.txt"

    # Specify the desired sample size (number of nodes)
    sample_size = 10000

    # Call the function to sample the graph and save the result
    sample_graph(input_file_path, output_file_path, sample_size)


Random Pagerank

In [61]:
import networkx as nx
import random

def read_network(file_path):
    # Read the network graph from the .txt file
    G = nx.read_edgelist(file_path)
    return G

def run_pagerank(graph):
    # Run PageRank on the graph
    pagerank_scores = nx.pagerank(graph)
    return pagerank_scores

def sample_graph(graph, pagerank_scores, sample_size):
    # Get a list of nodes sorted by PageRank scores
    sorted_nodes = sorted(pagerank_scores, key=pagerank_scores.get, reverse=True)

    # Randomly select a subset of nodes based on the desired sample size
    sampled_nodes = random.sample(sorted_nodes, min(sample_size, len(graph)))

    # Create a subgraph with only the sampled nodes
    sampled_graph = graph.subgraph(sampled_nodes)

    return sampled_graph

if __name__ == "__main__":
    # Specify the path to the input .txt file for the network
    file_path = "soc-google-plus.txt"

    # Read the network
    network = read_network(file_path)

    # Run PageRank
    pagerank_scores = run_pagerank(network)

    # Specify the desired sample size (number of nodes)
    sample_size = 10000

    # Sample the graph based on PageRank scores
    sampled_graph = sample_graph(network, pagerank_scores, sample_size)

    # Save the sampled graph to a new .txt file
    nx.write_edgelist(sampled_graph, "sampled_graph_pagerank.txt")
