In [1]:
import random
import networkx as nx

In [2]:
# Load data
# Tip: The data is in the format of edgelist
# Tip 2: The node type is int (nodetype=int)
G = nx.read_edgelist("../data/25_1.txt", nodetype = int)

In [3]:
# We need to keep track of the nodes and the edges we have sampled, so we store them in sets.
# Then we need a seed node where to start the exploration. Here I decided to just pick a random node.
sampled_nodes = set()
sampled_edges = set()
curnode = random.choice(list(G.nodes))

In [4]:
# Tip: Remember that we have to sample 2000 nodes. This can be done in a while loop.
# Tip 2: Get the neighbors of the current node.
# Tip 3: 
 
while len(sampled_nodes) <= 2000:
   # First we get the neighbors of the node we're currently exploring
   neighbors = list(G.neighbors(curnode))
   if not curnode in sampled_nodes: # This is true if we never sampled this node before. This means we never added its connections to sampled_edges
      
      sampled_nodes.add(curnode) # This will allow us to remember we sampled this node
      
      # We update the set of sampled edges. We need to have a canonical representation of the edge because the network is undirected,
      # so if we already saw the edge because we sampled the neighbor, we might have stored the edge as (neighbor, curnode) rather than
      # (curnode, neighbor). With this min-max trick, this is not an issue.
      
      new_edges = set([(min(curnode, neighbor), max(curnode, neighbor)) for neighbor in neighbors])
      sampled_edges.update(new_edges) 
   
   # We move on to sampling a random neighbor of the current node, because we're doing a random walk.
   curnode = random.choice(neighbors) 


In [5]:
# Make a graf of the sampled edges
G_smpl = nx.Graph(list(sampled_edges))

# Print the number of nodes
print(len(G_smpl.nodes))

64294
