In [1]:
import random
import numpy as np
import pandas as pd
import networkx as nx
from collections import Counter
from scipy.stats import linregress

In [2]:
# Function implementing the random walk logic from the previous question
# Tip: Should take in a graph and a number of nodes to sample
# Tip: Should return a graph with the sampled nodes and the edges between them
def rw(G, n):
   sampled_nodes = set()
   sampled_edges = set()
   curnode = random.choice(list(G.nodes))
   while len(sampled_nodes) <= n:
      neighbors = list(G.neighbors(curnode))
      if not curnode in sampled_nodes:
         sampled_nodes.add(curnode)
         sampled_edges |= set([(min(curnode, neighbor), max(curnode, neighbor)) for neighbor in neighbors])
      curnode = random.choice(neighbors)
   return nx.Graph(list(sampled_edges))

In [3]:
def ccdf(dd):
   dd = pd.DataFrame(list(dd.items()), columns = ("k", "count")).sort_values(by = "k")
   ccdf = dd.sort_values(by = "k", ascending = False)
   ccdf["cumsum"] = ccdf["count"].cumsum()
   ccdf["ccdf"] = ccdf["cumsum"] / ccdf["count"].sum()
   ccdf = ccdf[["k", "ccdf"]].sort_values(by = "k")
   return ccdf

def dd_exponent(degdistr):
   logcdf = np.log10(degdistr[["k", "ccdf"]])
   slope, log10intercept, r_value, p_value, std_err = linregress(logcdf["k"], logcdf["ccdf"])
   return slope

In [4]:
# Load the data
# The data is in the form of an edgelist (nodetype=int)
G = nx.read_edgelist("../data/25_1.txt", nodetype = int)



In [5]:
#Tip: Get the degree distribution of the original graph
#Tip 1: turn the degree view into a dictionary and then get the values
#Tip 2: You can use the Counter function from the collections package
#Tip 3 Use the ccdf on the degree distribution to get the ccdf
dd = Counter(dict(G.degree).values())
G_ccdf = ccdf(dd)

print("Original Exponent: %1.4f" % dd_exponent(G_ccdf))

Original Exponent: -1.6013


In [6]:
# Let's take 100 samples and store their degree exponent in a list
# This will take a while
smpl_exponents = []
for _ in range(100):
   G_smpl = rw(G, 2000)
   G_smpl_ccdf = ccdf(Counter(dict(G_smpl.degree).values()))
   smpl_exponents.append(dd_exponent(G_smpl_ccdf))

In [7]:
# Find the mean and standard deviation of the exponents
smpl_exponents_mean = np.mean(smpl_exponents)
smpl_exponents_std = np.std(smpl_exponents)
print("Sample Exponent: %1.4f (+/- %1.4f)" % (smpl_exponents_mean, smpl_exponents_std)) # The exponent of the sample is different! ~1.125 vs 

Sample Exponent: -1.1251 (+/- 0.0097)
