Random Node sampling

In [4]:
import networkx as nx
import random

def sample_graph(input_file, output_file, sample_size):
    # Read the network graph from the .txt file
    G = nx.read_edgelist(input_file)

    # Get a list of all nodes in the graph
    all_nodes = list(G.nodes)

    # Randomly select a subset of nodes based on the desired sample size
    sampled_nodes = random.sample(all_nodes, min(sample_size, len(all_nodes)))

    # Create a subgraph with only the sampled nodes
    sampled_graph = G.subgraph(sampled_nodes)

    # Save the sampled graph to a new .txt file without additional attributes
    nx.write_edgelist(sampled_graph, output_file, data=False)

if __name__ == "__main__":
    # Specify the input and output file paths
    input_file_path = "/home/noshamedevil/Documents/Assignments/SNACKS/SNACKS-Final-Project/Original_Data/soc-google-plus.txt"
    output_file_path = "/home/noshamedevil/Documents/Assignments/SNACKS/SNACKS-Final-Project/Sampled_Graphs/sampled_graph.txt"

    # Specify the desired sample size (number of nodes)
    sample_size = 10000

    # Call the function to sample the graph and save the result
    sample_graph(input_file_path, output_file_path, sample_size)
    print("done working")

done working


Random Pagerank

In [5]:
import networkx as nx
import random

def read_network(file_path):
    # Read the network graph from the .txt file
    G = nx.read_edgelist(file_path)
    return G

def run_pagerank(graph):
    # Run PageRank on the graph
    pagerank_scores = nx.pagerank(graph)
    return pagerank_scores

def sample_graph(graph, pagerank_scores, sample_size):
    # Get a list of nodes sorted by PageRank scores
    sorted_nodes = sorted(pagerank_scores, key=pagerank_scores.get, reverse=True)

    # Randomly select a subset of nodes based on the desired sample size
    sampled_nodes = random.sample(sorted_nodes, min(sample_size, len(graph)))

    # Create a subgraph with only the sampled nodes
    sampled_graph = graph.subgraph(sampled_nodes)

    return sampled_graph

if __name__ == "__main__":
    # Specify the path to the input .txt file for the network
    file_path = "/home/noshamedevil//Documents/Assignments/SNACKS/SNACKS-Final-Project/Original_Data/soc-google-plus.txt"

    # Read the network
    network = read_network(file_path)

    # Run PageRank
    pagerank_scores = run_pagerank(network)

    # Specify the desired sample size (number of nodes)
    sample_size = 10000

    # Sample the graph based on PageRank scores
    sampled_graph = sample_graph(network, pagerank_scores, sample_size)

    # Save the sampled graph to a new .txt file
    nx.write_edgelist(sampled_graph, "/home/noshamedevil//Documents/Assignments/SNACKS/SNACKS-Final-Project/Sampled_Graphs/sampled_graph_pagerank.txt")
    print("done working")

done working


Random Edge Selection

In [6]:
import networkx as nx
import random

def read_network(file_path):
    # Read the network graph from the .txt file
    G = nx.read_edgelist(file_path)
    return G

def random_edge_sampling(graph, sampling_fraction):
    # Get the list of edges
    all_edges = list(graph.edges())

    # Calculate the number of edges to sample
    num_edges_to_sample = int(len(all_edges) * sampling_fraction)

    # Randomly select edges based on the desired fraction
    sampled_edges = random.sample(all_edges, num_edges_to_sample)

    # Create a subgraph with only the sampled edges
    sampled_graph = nx.Graph(sampled_edges)

    return sampled_graph

if __name__ == "__main__":
    # Specify the path to the input .txt file for the network
    file_path = "/home/noshamedevil//Documents/Assignments/SNACKS/SNACKS-Final-Project/Original_Data/soc-google-plus.txt"

    # Read the network
    network = read_network(file_path)

    # Specify the sampling fraction (e.g., 0.1 for 10% sampling)
    sampling_fraction = 0.15

    # Perform random edge sampling
    sampled_graph = random_edge_sampling(network, sampling_fraction)

    # Save the sampled graph to a new .txt file
    nx.write_edgelist(sampled_graph, "/home/noshamedevil//Documents/Assignments/SNACKS/SNACKS-Final-Project/Sampled_Graphs/sampled_network_random_edge.txt")
    print("done working")

done working


Forest Fire

In [7]:
import networkx as nx
import random

def read_network(file_path):
    # Read the network graph from the .txt file
    G = nx.read_edgelist(file_path)
    return G

def forest_fire_sampling(graph, initial_nodes, depth_limit, p_burn, p_extend):
    sampled_graph = nx.Graph()

    burned_nodes = set()
    next_to_burn = set(initial_nodes)

    while next_to_burn and len(sampled_graph) < len(graph):
        current_burning = set(next_to_burn)
        next_to_burn = set()

        for node in current_burning:
            sampled_graph.add_node(node, **graph.nodes[node])
            burned_nodes.add(node)

            neighbors = set(graph.neighbors(node)) - burned_nodes
            sampled_neighbors = set(random.sample(neighbors, int(p_extend * len(neighbors))))

            sampled_graph.add_edges_from((node, neighbor) for neighbor in sampled_neighbors)
            next_to_burn.update(sampled_neighbors)

        next_to_burn = set(random.sample(next_to_burn, int(p_burn * len(next_to_burn))))

    return sampled_graph

if __name__ == "__main__":
    # Specify the path to the input .txt file for the network
    file_path = "/home/noshamedevil//Documents/Assignments/SNACKS/SNACKS-Final-Project/Original_Data/soc-google-plus.txt"

    # Read the network
    network = read_network(file_path)

    # Specify forest fire sampling parameters
    initial_nodes = random.sample(network.nodes(), 100)  # Example: Select 100 random nodes as initial nodes
    depth_limit = 35
    p_burn = 0.3  # Probability of an edge being burned
    p_extend = 0.1  # Probability of extending the fire

    # Perform forest fire sampling
    sampled_graph = forest_fire_sampling(network, initial_nodes, depth_limit, p_burn, p_extend)

    # Save the sampled graph to a new .txt file
    nx.write_edgelist(sampled_graph, "/home/noshamedevil//Documents/Assignments/SNACKS/SNACKS-Final-Project/Sampled_Graphs/sampled_forest_fire_network.txt")
    print("done working")

since Python 3.9 and will be removed in a subsequent version.
  initial_nodes = random.sample(network.nodes(), 100)  # Example: Select 100 random nodes as initial nodes
since Python 3.9 and will be removed in a subsequent version.
  sampled_neighbors = set(random.sample(neighbors, int(p_extend * len(neighbors))))
since Python 3.9 and will be removed in a subsequent version.
  next_to_burn = set(random.sample(next_to_burn, int(p_burn * len(next_to_burn))))


done working


Random Walk Sampling

In [8]:
import networkx as nx
import random

def read_network(file_path):
    # Read the network graph from the .txt file
    G = nx.read_edgelist(file_path)
    return G

def random_walk_sampling(graph, start_node, steps):
    sampled_nodes = [start_node]
    current_node = start_node

    for _ in range(steps):
        neighbors = list(graph.neighbors(current_node))
        if neighbors:
            next_node = random.choice(neighbors)
            sampled_nodes.append(next_node)
            current_node = next_node
        else:
            break  # Break if the current node has no neighbors

    sampled_graph = graph.subgraph(sampled_nodes)

    return sampled_graph

if __name__ == "__main__":
    # Specify the path to the input .txt file for the network
    file_path = "/home/noshamedevil//Documents/Assignments/SNACKS/SNACKS-Final-Project/Original_Data/soc-google-plus.txt"

    # Read the network
    network = read_network(file_path)

    # Specify random walk sampling parameters
    start_node = random.choice(list(network.nodes()))
    steps = 1000  # Adjust the number of steps as needed

    # Perform random walk sampling
    sampled_graph = random_walk_sampling(network, start_node, steps)

    # Save the sampled graph to a new .txt file
    nx.write_edgelist(sampled_graph, "/home/noshamedevil//Documents/Assignments/SNACKS/SNACKS-Final-Project/Sampled_Graphs/sampled_random_walk_network.txt")
    print("done working")

done working
