In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import time
import random

In [2]:
FILE_PATH_PREFIX = './data/'
comments = '#'
delimiter = ' '

In [3]:
%%time
lscc = nx.read_edgelist(FILE_PATH_PREFIX + 'gplus_edges_lscc.txt', 
                     comments=comments, delimiter=delimiter, 
                     nodetype=float,create_using=nx.DiGraph())

CPU times: user 31 s, sys: 788 ms, total: 31.8 s
Wall time: 31.8 s


In [4]:
%%time
lwcc = nx.read_edgelist(FILE_PATH_PREFIX + 'gplus_edges_lwcc.txt', 
                     comments=comments, delimiter=delimiter,
                     nodetype=float,create_using=nx.DiGraph())

CPU times: user 44.9 s, sys: 976 ms, total: 45.9 s
Wall time: 45.9 s


In [5]:
def getRandomShortestPathDistribution(ccGraph, numOfSamples):
    graphnodes = list(ccGraph.nodes())
    nodesLength = len(graphnodes)-1
    shortestPaths = []
    for i in range(numOfSamples):
        try:
            shortestPaths.append(nx.shortest_path_length(ccGraph, 
                                                     graphnodes[random.randint(0, nodesLength)],
                                                     graphnodes[random.randint(0, nodesLength)])) 
        except:
            pass
    return np.array(shortestPaths)

In [6]:
def getRandomSourcesSPDistribution(ccGraph, numOfSamples):
    graphnodes = list(ccGraph.nodes())
    nodesLength = len(graphnodes)-1
    shortestPaths = []
    for i in range(numOfSamples):
        try:
            shortestPaths = shortestPaths + list(nx.single_source_shortest_path_length(
                ccGraph,
                graphnodes[random.randint(0, nodesLength)]).values())[1:]
        except:
            pass
    return np.array(shortestPaths)

In [7]:
def getStatisticsForDistribution(dist):
    dist_mean = np.mean(dist);
    dist_median = np.percentile(dist, 50);
    dist_diameter = max(dist);
    dist_eff_diameter = np.percentile(dist, 90);
    return dist_mean, dist_median, dist_diameter, dist_eff_diameter 

### Random Pairs LSCC Stats

In [9]:
%%time
mean, median, diameter, eff_diameter = getStatisticsForDistribution(
    getRandomShortestPathDistribution(lscc, 100000));
print(mean);
print(median);
print(diameter);
print(eff_diameter);

3.30025
3.0
9
4.0
CPU times: user 47.5 s, sys: 4 ms, total: 47.5 s
Wall time: 47.5 s


### Random Pairs LWCC Stats

In [11]:
%%time
mean, median, diameter, eff_diameter = getStatisticsForDistribution(
    getRandomShortestPathDistribution(lwcc, 100000));
print(mean);
print(median);
print(diameter);
print(eff_diameter);

3.26923076923
3.0
6
4.0
CPU times: user 1min 34s, sys: 64 ms, total: 1min 34s
Wall time: 1min 34s


### Random Sources BFS LSCC

In [9]:
%%time
mean, median, diameter, eff_diameter = getStatisticsForDistribution(
    getRandomSourcesSPDistribution(lscc, 100));
print(mean);
print(median);
print(diameter);
print(eff_diameter);

3.46251654676
3.0
9
4.0
CPU times: user 37.2 s, sys: 44 ms, total: 37.2 s
Wall time: 37.2 s


### Random Sources BFS LWCC

In [10]:
%%time
mean, median, diameter, eff_diameter = getStatisticsForDistribution(
    getRandomSourcesSPDistribution(lwcc, 100));
print(mean);
print(median);
print(diameter);
print(eff_diameter);

3.15322725995
3.0
13
4.0
CPU times: user 16.1 s, sys: 24 ms, total: 16.2 s
Wall time: 16.2 s
