In [None]:
import sys
sys.path.append('..')
import quarantines as Q
import quarantines_mongo as qm
import graph_generators as gg 
import networkx as nx 
import networkit as nk
import pymongo
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from prettytable import PrettyTable
from tabulate import tabulate
from scipy import stats
from pprint import pprint
import os 
import random
import experiment_helpers as eh
import utils
sns.set()

In [None]:
data = eh.gather_data()

In [13]:
### FIRST MAKE TABLE FOR SYNTHETIC GRAPH SUMMARY
# Want row to have 
# {Type, Parameter, Average Degree, Clustering, Avg Shortest Path, Powerlaw Exponent}
# ROWS: BA5, BA10, PLC5, PLC10, RW, NN, WS
SYNTH_GRAPHS = {_: eh.parse_data_name(_) for _ in [_ for _ in data if _.endswith('_8')]}
def make_row_floats(G):
    return (utils.avg_deg(G), 
            nx.average_clustering(G), 
            utils.avg_shortest_path(G, samples=10 ** 5),
            utils.get_powerlaw_exponent_cbin(G))
def format_float(float_tup):
    output = [] 
    output.append('%.2f' % float_tup[0])
    output.append('%.3f' % float_tup[1])
    output.append('%.2f' % float_tup[2])
    output.append('%.2f' % float_tup[3])
    return output

In [None]:

SYNTH_FLOATS = {k: make_row_floats(v) for k,v in SYNTH_GRAPHS.items()}

In [16]:
sorted(SYNTH_FLOATS.keys())

['ba_10_8',
 'ba_5_8',
 'nn_0.88_6_8',
 'plc_10_0.25_8',
 'plc_5_0.5_8',
 'rw_0.91_0.94_8',
 'ws_10_0.05_8']

In [28]:
row_strs = []
row_strs.append(['BA', '($m=5$)'] + format_float(SYNTH_FLOATS['ba_5_8']))
row_strs.append(['BA', '($m=10$)'] + format_float(SYNTH_FLOATS['ba_10_8']))
row_strs.append(['NN', '($u=0.88, k=6$)'] + format_float(SYNTH_FLOATS['nn_0.88_6_8']))
row_strs.append(['PLC', '($m=5, p=0.5$)'] + format_float(SYNTH_FLOATS['plc_5_0.5_8']))
row_strs.append(['PLC', '($m=10, p=0.25$)'] + format_float(SYNTH_FLOATS['plc_10_0.25_8']))
row_strs.append(['RW', '($q_e=0.91, q_v=0.94$)'] + format_float(SYNTH_FLOATS['rw_0.91_0.94_8']))
row_strs.append(['WS', '($k=10, p=0.05$)'] + format_float(SYNTH_FLOATS['ws_10_0.05_8']))



In [32]:
HEADERS = ['Net Type', 'Params', 'Deg', 'Cluster', 'Pathlength', 'Powerlaw Exp.']
print(tabulate(row_strs, headers=HEADERS, tablefmt='latex_booktabs').replace('\$', '$'))

\begin{tabular}{llrrrr}
\toprule
 Net Type   & Params                 &   Deg &   Cluster &   Pathlength &   Powerlaw Exp. \\
\midrule
 BA         & ($m=5$)                &  9.99 &     0.007 &         3.66 &            2.94 \\
 BA         & ($m=10$)               & 19.98 &     0.011 &         3.06 &            2.98 \\
 NN         & ($u=0.88, k=6$)        & 26.29 &     0.124 &         3.41 &            2.62 \\
 PLC        & ($m=5, p=0.5$)         &  9.99 &     0.178 &         3.53 &            2.67 \\
 PLC        & ($m=10, p=0.25$)       & 19.96 &     0.059 &         2.97 &            2.76 \\
 RW         & ($q\_e=0.91, q\_v=0.94$) & 19.32 &     0.285 &         3.45 &            2.76 \\
 WS         & ($k=10, p=0.05$)       & 10    &     0.574 &         7.47 &           12.92 \\
\bottomrule
\end{tabular}


In [35]:
### NEXT MAKE TABLE FOR REAL GRAPH SUMMARY
# Want row to have 
# {Name, N, Average Degree, Clustering, Avg Shortest Path, Powerlaw Exponent}
REAL_GRAPHS = {_: eh.parse_data_name(_) for _ in 
                              [_ for _ in data.keys() if _.startswith('arxiv') 
                                                      or _.startswith('fb')
                                                      or _.startswith('deezer')]}

In [37]:
REAL_FLOATS = {k: make_row_floats(v) for k,v in REAL_GRAPHS.items()}

41,773


In [45]:
REAL_ROWS = []
for k, v in sorted(REAL_FLOATS.items(), key=lambda p: p[0]):
    row = [k, f"{len(REAL_GRAPHS[k]) - 2:,}",] + format_float(v)
    REAL_ROWS.append(row)


[['arxiv.AstroPh', '18,770', '21.11', '0.631', '4.19', '2.83'],
 ['arxiv.CondMat', '23,131', '8.08', '0.633', '5.36', '3.11'],
 ['arxiv.GrQc', '5,240', '5.53', '0.530', '6.05', '2.88'],
 ['arxiv.HepPh', '12,006', '19.74', '0.611', '4.67', '2.31'],
 ['arxiv.HepTh', '9,875', '5.26', '0.471', '5.94', '3.28'],
 ['deezer.HR', '54,573', '18.26', '0.136', '4.50', '3.39'],
 ['deezer.HU', '47,538', '9.38', '0.116', '5.34', '3.79'],
 ['deezer.RO', '41,773', '6.02', '0.091', '6.35', '3.70'],
 ['fb.artist', '50,515', '32.44', '0.138', '3.69', '2.64'],
 ['fb.athletes', '13,866', '12.53', '0.276', '4.28', '2.85'],
 ['fb.company', '14,113', '7.41', '0.239', '5.31', '2.97'],
 ['fb.government', '7,057', '25.35', '0.411', '3.78', '2.58'],
 ['fb.new_sites', '27,917', '14.78', '0.295', '4.39', '2.85']]

In [47]:
REAL_HEADERS = ['Name', 'Nodes', 'Deg', 'Cluster', 'Pathlength', 'Powerlaw Exp.']
print(tabulate(REAL_ROWS, headers=HEADERS, tablefmt='latex_booktabs').replace('\$', '$'))

\begin{tabular}{llrrrr}
\toprule
 Net Type      & Params   &   Deg &   Cluster &   Pathlength &   Powerlaw Exp. \\
\midrule
 arxiv.AstroPh & 18,770   & 21.11 &     0.631 &         4.19 &            2.83 \\
 arxiv.CondMat & 23,131   &  8.08 &     0.633 &         5.36 &            3.11 \\
 arxiv.GrQc    & 5,240    &  5.53 &     0.53  &         6.05 &            2.88 \\
 arxiv.HepPh   & 12,006   & 19.74 &     0.611 &         4.67 &            2.31 \\
 arxiv.HepTh   & 9,875    &  5.26 &     0.471 &         5.94 &            3.28 \\
 deezer.HR     & 54,573   & 18.26 &     0.136 &         4.5  &            3.39 \\
 deezer.HU     & 47,538   &  9.38 &     0.116 &         5.34 &            3.79 \\
 deezer.RO     & 41,773   &  6.02 &     0.091 &         6.35 &            3.7  \\
 fb.artist     & 50,515   & 32.44 &     0.138 &         3.69 &            2.64 \\
 fb.athletes   & 13,866   & 12.53 &     0.276 &         4.28 &            2.85 \\
 fb.company    & 14,113   &  7.41 &     0.239 &         