In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import time
import random

In [2]:
FILE_PATH_PREFIX = './data/'
comments = ' '
delimiter = '\t'

In [5]:
%%time
lscc = nx.read_edgelist(FILE_PATH_PREFIX + 'soc_edges_lscc.txt', 
                     comments=comments, delimiter=delimiter, 
                     nodetype=int,create_using=nx.DiGraph())

CPU times: user 1.53 s, sys: 8 ms, total: 1.54 s
Wall time: 1.54 s


In [6]:
%%time
lwcc = nx.read_edgelist(FILE_PATH_PREFIX + 'soc_edges_lwcc.txt', 
                     comments=comments, delimiter=delimiter, 
                     nodetype=int,create_using=nx.DiGraph())

CPU times: user 1.61 s, sys: 24 ms, total: 1.64 s
Wall time: 1.63 s


In [10]:
def readEdgeFromFile(filename, comments=' ', delimiter='\t'):
    G = nx.read_edgelist(FILE_PATH_PREFIX + filename, comments=comments, 
                         delimiter=delimiter, nodetype=float,create_using=nx.DiGraph())
    unG = G.to_undirected();
    return G, unG;

In [11]:
def getRandomShortestPathDistribution(ccGraph, numOfSamples):
    graphnodes = list(ccGraph.nodes())
    nodesLength = len(graphnodes)-1
    shortestPaths = []
    for i in range(numOfSamples):
        try:
            shortestPaths.append(nx.shortest_path_length(ccGraph, 
                                                     graphnodes[random.randint(0, nodesLength)],
                                                     graphnodes[random.randint(0, nodesLength)]))
        except:
            pass
    return np.array(shortestPaths)

In [12]:
def getRandomSourcesSPDistribution(ccGraph, numOfSamples):
    graphnodes = list(ccGraph.nodes())
    nodesLength = len(graphnodes)-1
    shortestPaths = []
    for i in range(numOfSamples):
        try:
            shortestPaths = shortestPaths + list(nx.single_source_shortest_path_length(
                ccGraph,
                graphnodes[random.randint(0, nodesLength)]).values())[1:]
        except:
            pass
    return np.array(shortestPaths)

In [13]:
def getStatisticsForDistribution(dist):
    dist_mean = np.mean(dist);
    dist_median = np.percentile(dist, 50);
    dist_diameter = max(dist);
    dist_eff_diameter = np.percentile(dist, 90);
    return dist_mean, dist_median, dist_diameter, dist_eff_diameter 

### Random LSCC Stats

In [43]:
%%time
mean, median, diameter, eff_diameter = getStatisticsForDistribution(
    getRandomShortestPathDistribution(lscc, 40000));
print(mean);
print(median);
print(diameter);
print(eff_diameter);

4.410875
4.0
11
6.0
CPU times: user 2.87 s, sys: 4 ms, total: 2.87 s
Wall time: 2.87 s


### Random LWCC Stats

In [47]:
%%time
mean, median, diameter, eff_diameter = getStatisticsForDistribution(
    getRandomShortestPathDistribution(lwcc, 40000));
print(mean);
print(median);
print(diameter);
print(eff_diameter);

4.45299145299
4.0
12
6.0
CPU times: user 19.4 s, sys: 0 ns, total: 19.4 s
Wall time: 19.4 s


### Random Sources BFS LSCC

In [26]:
%%time
mean, median, diameter, eff_diameter = getStatisticsForDistribution(
    getRandomSourcesSPDistribution(lscc, 50));
print(mean);
print(median);
print(diameter);
print(eff_diameter);

4.41455527279
4.0
11
6.0
CPU times: user 10.6 s, sys: 8 ms, total: 10.7 s
Wall time: 10.6 s


### Random Sources BFS LWCC

In [39]:
%%time
mean, median, diameter, eff_diameter = getStatisticsForDistribution(
    getRandomSourcesSPDistribution(lwcc, 50));
print(mean);
print(median);
print(diameter);
print(eff_diameter);

4.36385241173
4.0
16
6.0
CPU times: user 1.32 s, sys: 4 ms, total: 1.32 s
Wall time: 1.32 s


## EXACT STATISTICS

### The following exact stats were calcualted using force.aalto.fi
### The code script for the generation of these stats is in the same folder named: exact_statistics.py

### Exact LWCC Stats

EXACT LWCC Mean: 4.4536130778 

EXACT LWCC Median: 4.0 

EXACT LWCC Diameter: 19

EXACT LWCC Effective Diameter: 6.0

### Exact LSCC Stats

EXACT LSCC Mean: 4.4048

EXACT LSCC Median: 4.0 

EXACT LSCC Diameter: 16

EXACT LSCC Effective Diameter: 6.0