In [1]:
import random
import numpy as np
import pandas as pd
import networkx as nx
from collections import Counter
from scipy.stats import linregress

In [2]:
# Function implementing the random walk logic from the previous question
def rw(G, n):
   sampled_nodes = set()
   sampled_edges = set()
   curnode = random.choice(list(G.nodes))
   while len(sampled_nodes) <= n:
      neighbors = list(G.neighbors(curnode))
      if not curnode in sampled_nodes:
         sampled_nodes.add(curnode)
         sampled_edges |= set([(min(curnode, neighbor), max(curnode, neighbor)) for neighbor in neighbors])
      curnode = random.choice(neighbors)
   return nx.Graph(list(sampled_edges))

In [3]:
# Function generating a CCDF, from previous exercises
def ccdf(dd):
   dd = pd.DataFrame(list(dd.items()), columns = ("k", "count")).sort_values(by = "k")
   ccdf = dd.sort_values(by = "k", ascending = False)
   ccdf["cumsum"] = ccdf["count"].cumsum()
   ccdf["ccdf"] = ccdf["cumsum"] / ccdf["count"].sum()
   ccdf = ccdf[["k", "ccdf"]].sort_values(by = "k")
   return ccdf

In [4]:
# Function performing a simple regression in log-log space
def dd_exponent(degdistr):
   logcdf = np.log10(degdistr[["k", "ccdf"]])
   slope, log10intercept, r_value, p_value, std_err = linregress(logcdf["k"], logcdf["ccdf"])
   return slope

In [6]:
G = nx.read_edgelist("../data/25_1.txt", nodetype = int)
G_ccdf = ccdf(Counter(dict(G.degree).values()))
print("Original Exponent: %1.4f" % dd_exponent(G_ccdf))

Original Exponent: -1.6013


In [7]:
# Let's take 100 samples and store their degree exponent in a list
smpl_exponents = []
for _ in range(100):
   G_smpl = rw(G, 2000)
   G_smpl_ccdf = ccdf(Counter(dict(G_smpl.degree).values()))
   smpl_exponents.append(dd_exponent(G_smpl_ccdf))

In [9]:
smpl_exponents_mean = np.mean(smpl_exponents)
smpl_exponents_std = np.std(smpl_exponents)
print("Sample Exponent: %1.4f (+/- %1.4f)" % (smpl_exponents_mean, smpl_exponents_std)) # The exponent of the sample is different! ~1.125 vs 

Sample Exponent: -1.1256 (+/- 0.0116)


In [10]:
smpl_exponents_mean = np.mean(smpl_exponents)
smpl_exponents_std = np.std(smpl_exponents)
print("Sample Exponent: %1.4f (+/- %1.4f)" % (smpl_exponents_mean, smpl_exponents_std)) # The exponent of the sample is different! ~1.125 vs 1.6!

Sample Exponent: -1.1256 (+/- 0.0116)
