In [1]:
import sys
sys.path.append('..')
import quarantines as Q
import quarantines_mongo as qm
import graph_generators as gg 
import networkx as nx 
import networkit as nk
import pymongo
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from prettytable import PrettyTable
from tabulate import tabulate
from scipy import stats
from pprint import pprint
import os 
import random
import experiment_helpers as eh
import utils
sns.set()

In [5]:
data = eh.gather_data()

dict_keys(['ba10_2Q', 'fb_artist_2Q'])

In [3]:
### FIRST MAKE TABLE FOR SYNTHETIC GRAPH SUMMARY
# Want row to have 
# {Type, Parameter, ogQ, bestQ-deg, perc-change, og-sp, bestQ-sp, perc-change}
# ROWS: BA5, BA10, PLC5, PLC10, RW, NN, WS
SYNTH_GRAPHS = {_: eh.parse_data_name(_) for _ in [_ for _ in data if _.endswith('_0.5')]}

def get_perc_change(x1, x2):
    return (x2-x1) / x1
    
def get_change_summary(data, name):
    # Gets change of {cc, avg_deg, shortest path, powerlaw exponent}
    # Returns dict of {'avg_deg', 'sp'}
    # where each has values of [og, bestQ, perc_change]
    
    og_graph = eh.parse_data_name(name)
    min_r_graph = eh.get_minR_graph(data, name)
    
    og_avg = utils.avg_deg(og_graph)
    og_sp = utils.avg_shortest_path(og_graph)
    
    avg = utils.avg_deg(min_r_graph)
    sp = utils.avg_shortest_path(min_r_graph)
    
    return {'avg_deg': [og_avg, avg, get_perc_change(og_avg, avg)], 
            'sp': [og_sp, sp, get_perc_change(og_sp, sp)]}

def make_row_floats(rowdict):
    output = format_trip(rowdict['avg_deg']) + format_trip(rowdict['sp'])
    return output
    
def format_trip(trip):
    output = [] 
    output.append('%.2f' % trip[0])
    output.append('%.2f' % trip[1])
    output.append(('+' if trip[2] > 0 else '') +  "{:.2%}".format(trip[2]))
    return output

def get_row(name):
    return make_row_floats(get_change_summary(data, name))
    

In [4]:
data.keys()

dict_keys(['ba10', 'ba5', 'plc10.25', 'plc5.5', 'rw.91.94', 'nn.886', 'ws10.05', 'fb.artist', 'fb.athletes', 'fb.company', 'fb.new_sites', 'fb.government', 'deezer.RO', 'deezer.HR', 'deezer.HU', 'arxiv.AstroPh', 'arxiv.CondMat', 'arxiv.HepPh', 'arxiv.GrQc', 'arxiv.HepTh', 'hiv50R1', 'hiv50R3', 'hiv50R5', 'hs5_10', 'hs15_10', 'hs30_10', 'hs60_10', 'plc_5_0.5_0.03125', 'ba_5_0.25', 'ws_10_0.05_0.125', 'plc_5_0.5_0.25', 'ws_10_0.05_0.03125', 'plc_5_0.5_0.0625', 'ba_5_0.0625', 'ba_10_0.03125', 'ws_10_0.05_0.25', 'ba_5_0.03125', 'ws_10_0.05_2', 'rw_0.91_0.94_0.03125', 'ws_10_0.05_0.0625', 'rw_0.91_0.94_0.125', 'ba_5_0.125', 'ws_10_0.05_1', 'ws_10_0.05_0.5', 'plc_10_0.25_0.03125', 'ba_5_1', 'plc_5_0.5_0.125', 'rw_0.91_0.94_0.0625', 'ba_5_0.5', 'ws_10_0.05_4', 'ba_10_0.0625', 'plc_10_0.25_0.125', 'ws_10_0.05_8', 'ba_5_2', 'nn_0.88_6_0.03125', 'rw_0.91_0.94_0.25', 'ba_5_8', 'plc_5_0.5_2', 'ba_10_0.25', 'plc_5_0.5_0.5', 'ba_10_2', 'ba_5_4', 'plc_5_0.5_8', 'plc_5_0.5_1', 'ba_10_0.125', 'plc_10_0

In [None]:
row_strs = []
row_strs.append(['BA', '($m=5$)'] + get_row('ba_5_8'))
row_strs.append(['BA', '($m=10$)'] + get_row('ba_10_8'))
row_strs.append(['NN', '($u=0.88, k=6$)'] + get_row('nn_0.88_6_8'))
row_strs.append(['PLC', '($m=5, p=0.5$)'] + get_row('plc_5_0.5_8'))
row_strs.append(['PLC', '($m=10, p=0.25$)'] + get_row('plc_10_0.25_8'))
row_strs.append(['RW', '($q_e=0.91, q_v=0.94$)'] + get_row('rw_0.91_0.94_8'))
row_strs.append(['WS', '($k=10, p=0.05$)'] + get_row('ws_10_0.05_8'))



In [None]:
HEADERS = ['Net Type', 'Params', 'Deg', 'Deg\'', '% Change', 'S.P.', 'S.P\'', '% Change']
print(tabulate(row_strs, headers=HEADERS, tablefmt='latex_booktabs').replace('\$', '$'))

In [None]:
REAL_GRAPHS = [_ for _ in data.keys() if _.startswith('arxiv') 
                             or _.startswith('fb')
                             or _.startswith('deezer')]



In [None]:
REAL_ROWS = []
print(REAL_GRAPHS)
for k in sorted(REAL_GRAPHS):
    print(k)
    row = [k] + get_row(k)
    REAL_ROWS.append(row)

In [None]:
HEADERS = ['Net Type', 'Deg', 'Deg\'', '% Change', 'S.P.', 'S.P\'', '% Change']
print(tabulate(REAL_ROWS, headers=HEADERS, tablefmt='latex_booktabs').replace('\$', '$'))

In [None]:
    
def get_change_summary(data, name):
    # Gets change of {cc, avg_deg, shortest path, powerlaw exponent}
    # Returns dict of {'avg_deg', 'sp'}
    # where each has values of [og, bestQ, perc_change]
    
    og_graph = eh.parse_data_name(name)
    min_r_graph = eh.get_minR_graph(data, name)
    
    og_avg = utils.avg_deg(og_graph)
    og_sp = utils.avg_shortest_path(og_graph)
    
    avg = utils.avg_deg(min_r_graph)
    sp = utils.avg_shortest_path(min_r_graph)
    
    return {'avg_deg': [og_avg, avg, get_perc_change(og_avg, avg)], 
            'sp': [og_sp, sp, get_perc_change(og_sp, sp)]}



In [None]:
get_change_summary(data, 'ba_10_0.5')


In [None]:
row_strs = []
row_strs.append(['BA', '($m=5$)'] + format_float(SYNTH_FLOATS['ba_5_8']))
row_strs.append(['BA', '($m=10$)'] + format_float(SYNTH_FLOATS['ba_10_8']))
row_strs.append(['NN', '($u=0.88, k=6$)'] + format_float(SYNTH_FLOATS['nn_0.88_6_8']))
row_strs.append(['PLC', '($m=5, p=0.5$)'] + format_float(SYNTH_FLOATS['plc_5_0.5_8']))
row_strs.append(['PLC', '($m=10, p=0.25$)'] + format_float(SYNTH_FLOATS['plc_10_0.25_8']))
row_strs.append(['RW', '($q_e=0.91, q_v=0.94$)'] + format_float(SYNTH_FLOATS['rw_0.91_0.94_8']))
row_strs.append(['WS', '($k=10, p=0.05$)'] + format_float(SYNTH_FLOATS['ws_10_0.05_8']))



In [None]:
HEADERS = ['Net Type', 'Params', 'Deg', 'Cluster', 'Pathlength', 'Powerlaw Exp.']
print(tabulate(row_strs, headers=HEADERS, tablefmt='latex_booktabs').replace('\$', '$'))

In [None]:
### NEXT MAKE TABLE FOR REAL GRAPH SUMMARY
# Want row to have 
# {Name, N, Average Degree, Clustering, Avg Shortest Path, Powerlaw Exponent}
REAL_GRAPHS = {_: eh.parse_data_name(_) for _ in 
                              [_ for _ in data.keys() if _.startswith('arxiv') 
                                                      or _.startswith('fb')
                                                      or _.startswith('deezer')]}

In [None]:
REAL_FLOATS = {k: make_row_floats(v) for k,v in REAL_GRAPHS.items()}

In [None]:
REAL_ROWS = []
for k, v in sorted(REAL_FLOATS.items(), key=lambda p: p[0]):
    row = [k, f"{len(REAL_GRAPHS[k]) - 2:,}",] + format_float(v)
    REAL_ROWS.append(row)


In [None]:
REAL_HEADERS = ['Name', 'Nodes', 'Deg', 'Cluster', 'Pathlength', 'Powerlaw Exp.']
print(tabulate(REAL_ROWS, headers=HEADERS, tablefmt='latex_booktabs').replace('\$', '$'))