In [None]:
import networkx as nx
import random
import tarfile

# Method 1: Randomly selecting 100 nodes from graph 

In [None]:
with tarfile.open("facebook.tar.gz", "r:gz") as tar:
    tar.extractall(path="facebook_data/")
    file_list = tar.getnames()


facebook_file = "facebook_data/" + file_list[0]  

parse data and form connections
G = nx.Graph()
with open(facebook_file, 'r') as f:
    for line in f:
        user1, user2 = map(int, line.strip().split())
        G.add_edge(user1, user2)

In [None]:
new_G = nx.Graph()
selected_nodes = random.sample(list(G.nodes()), 100)

# add edges if exist in original graph
for i in range(100):
    for j in range(i+1, 100):
        if G.has_edge(selected_nodes[i], selected_nodes[j]):
            new_G.add_edge(selected_nodes[i], selected_nodes[j])

print(f"New Graph has {new_G.number_of_nodes()} nodes and {new_G.number_of_edges()} edges.")
nx.write_edgelist(new_G, "new_facebook_network.txt")

# Method 2: Random walk sampling

Start at a random and node and perform a "random walk." At each step, with a given probability, continue the walk to a neighboring node or stop the walk. If the walk stops, start another random walk at a node that hasn't been visited.

In [None]:
def random_walk_sampling(G, num_nodes):
    sampled_graph = nx.Graph()
    all_nodes = list(G.nodes())
    
    visited = set()

    while len(visited) < num_nodes:
        if not all_nodes:
            break
        
        start_node = random.choice(all_nodes)
        if start_node in visited:
            all_nodes.remove(start_node)
            continue
        
        current_path = [start_node]
        
        while True:
            neighbors = list(G.neighbors(current_path[-1]))
            neighbors = [n for n in neighbors if n not in visited and n not in current_path]

            if not neighbors:
                break

            next_node = random.choice(neighbors)
            current_path.append(next_node)

            if len(visited) + len(current_path) > num_nodes:
                break

        visited.update(current_path[:num_nodes-len(visited)])
        for i in range(1, len(current_path)):
            sampled_graph.add_edge(current_path[i-1], current_path[i])

    return sampled_graph