In [1]:
import json
import altair as alt
import pandas as pd
import numpy as np
from altair_saver import save
import math

from os import listdir
from os.path import isfile, isdir, join, splitext

In [2]:
input_directory = "../pareto_volta"
output_drectory = "./"

In [3]:
graphs = [f for f in listdir(input_directory) if isfile(join(input_directory, f))]

In [4]:
reorder_algorithms = ["hub", "rand", "gorder", "RCM", "write_reorder2", "deg"]
# reorder_algorithms = ["hub", "gorder", "RCM", "write_reorder2", "deg"]

In [5]:
pretty_reorder_algo = {'hub': 'hub', 'deg': 'deg', 'RCM': 'RCM',
                        'rand': 'rand', 'write_reorder2': 'BOBA', 'gorder': 'Gorder'}

In [6]:
results = pd.DataFrame(columns=['Dataset', 'Reorder algorithm', 'graph_algorithm', 'Graph algorithm runtime', 'Reorder time'])

In [7]:
graph_names_ = ['coAuthorsCiteseer', 'coAuthorsDBLP', 'delaunay_n22', 'delaunay_n23', 'delaunay_n24'
                , 'great-britain_osm', 'hollywood-2009', 'rgg_n_2_22_s0', 'rgg_n_2_23_s0', 'rgg_n_2_24_s0'
                , 'roadNet-CA', 'road_usa', 'soc-LiveJournal1', 'ljournal-2008', 'arabic-2005', 'soc-orkut',
                'kron_g500-logn21', 'kron_g500-logn20']
is_scale_free_ = [True, True,
                  False, False, False, False,
                  True,
                  False, False, False,
                  False, False, 
                  True, True, True, True, True, True]
is_scale_free_ = [False, False,
                  False, False, False, False,
                  False,
                  False, False, False,
                  False, False, 
                  False, False, False, False, True, True]                  
ignore_graphs_ = ['coAuthorsCiteseer', 'coAuthorsDBLP']

def get_graph_name(graph):
        for g in graph_names_:
                if g in graph or graph in g:
                        return g

def is_ignored_graph_name(graph):
        for g in ignore_graphs_:
                if g in graph or graph in g:
                        return True
        return False


In [8]:
def get_graph_algorithm_name(graph):
        temp = ['tc', 'sssp', 'spmv', 'pr']
        for t in temp:
                if t in graph or graph in t:
                        return t

In [9]:
def get_reorder_algorithms(graph):
    for reorder_algo in reorder_algorithms:
        if graph.lower() in reorder_algo.lower() or reorder_algo.lower() in graph.lower():
            return reorder_algo
    print(graph)

In [10]:
def get_reoder_time(input_file, reorder_algo):
    file = open(input_file)
    loaded_json = json.load(file)
    # print(loaded_json)
    reorder_times = {}
    for graph in graph_names_:
        if is_ignored_graph_name(graph):
            continue
        else:
            reorder_times[graph] = {}
    for row in loaded_json:
        graph_name = get_graph_name(row['graph_name'])
        reoder_algo = pretty_reorder_algo[get_reorder_algorithms(row['graph_name'])]
        # results[graph_name][]
        try:
            reorder_times[graph_name][reoder_algo] = float(row['Reorder_Time'])
        except:
            print("Failed to get: ", graph_name, reoder_algo)
            print(row)    
    return reorder_times

In [11]:
reorder_times = get_reoder_time(input_directory + '/reorder/reorder.json', reorder_algorithms)
# get_reoder_time(input_directory + '/reorder-time/test.json', reorder_algorithms)


Failed to get:  coAuthorsCiteseer Gorder
{'Reorder_Time': 207, 'M': 1628268, 'N': 227320, 'graph_name': 'coAuthorsCiteseer.gorder.mtx'}
Failed to get:  coAuthorsDBLP Gorder
{'Reorder_Time': 360, 'M': 1955352, 'N': 299067, 'graph_name': 'coAuthorsDBLP.gorder.mtx'}
Failed to get:  coAuthorsCiteseer RCM
{'Reorder_Time': 67, 'M': 1628268, 'N': 227320, 'graph_name': 'coAuthorsCiteseer.rcm.mtx'}
Failed to get:  coAuthorsDBLP RCM
{'Reorder_Time': 115, 'M': 1955352, 'N': 299067, 'graph_name': 'coAuthorsDBLP.rcm.mtx'}
Failed to get:  coAuthorsCiteseer deg
{'Reorder_Time': 31, 'M': 1628268, 'N': 227320, 'graph_name': 'coAuthorsCiteseer.deg.mtx'}
Failed to get:  coAuthorsDBLP deg
{'Reorder_Time': 40, 'M': 1955352, 'N': 299067, 'graph_name': 'coAuthorsDBLP.deg.mtx'}
Failed to get:  coAuthorsCiteseer hub
{'Reorder_Time': 32, 'M': 1628268, 'N': 227320, 'graph_name': 'coAuthorsCiteseer.hub.mtx'}
Failed to get:  coAuthorsDBLP hub
{'Reorder_Time': 40, 'M': 1955352, 'N': 299067, 'graph_name': 'coAuthors

In [12]:
random_result_={}
for dataset in graph_names_:
    random_result_[dataset]={}
for graph in graphs:
    file = open(input_directory + '/' + graph)
    try: 
        loaded_json = json.load(file)
        algo_time = float(loaded_json['run-time'])
        for reorder_algo in reorder_algorithms:
            if reorder_algo in graph:
                graph_name = get_graph_name(graph)
                if(is_ignored_graph_name(graph_name)):
                    continue
                reorder_algo_name = pretty_reorder_algo[reorder_algo]
                graph_algo_name = get_graph_algorithm_name(graph)
                reorder_time = float(0.)
                if 'rand' in graph:
                    reorder_time = float(0.)
                    random_result_[graph_name][graph_algo_name] = algo_time
                    # print(random_result_)
                else:
                    try:
                        reorder_time = float(reorder_times[graph_name][reorder_algo_name])
                    except: 
                        print('Failed to find reorder time for:', graph_name, reorder_algo_name)

                # print(graph_name)
                # print(reorder_algo_name)
                if reorder_algo_name == 'hub' and graph_name == 'soc-orkut':
                    print( graph_algo_name,reorder_time, algo_time)
                results.loc[len(results.index)] = [graph_name, reorder_algo_name, graph_algo_name,
                                                    algo_time, reorder_time]
    except:
        print("Failed to open: ", graph)


Failed to open:  reorder_tc_runtime_bench.log
Failed to open:  reorder_pr_runtime_bench.log
spmv 270.0 11.716096019744873
Failed to open:  reorder_sssp_runtime_bench.log
pr 270.0 580.9358520507812
Failed to open:  reorder_spmv_runtime_bench.log
tc 270.0 8899.3197265625
sssp 270.0 40.4745475769043


In [13]:
def make_pareto(algo_name, ext = 'pdf'):
    to_plot = results[results['graph_algorithm'] == algo_name.lower()]
    # to_plot = to_plot[results['reorder_algorithm'] != 'rand']


# , scale=alt.Scale(scheme='Viridis')
    chart = alt.Chart(to_plot).mark_point(filled=True).encode(
        alt.Y('Reorder time', title='Reorder time (ms)', scale=alt.Scale(type='symlog')), 
        alt.X('Graph algorithm runtime', title='Graph algorithm runtime (ms)', scale=alt.Scale(type='log')), 
        color=alt.Color('Reorder algorithm'),
        shape=alt.Shape('Dataset')).properties(
        title=algo_name).configure_point(
        size=100)
    save(chart, algo_name + '.' + ext)
    return chart

In [14]:
from altair.expr import datum
def make_pareto_combo(data, name):
    base = alt.Chart(data).mark_point(filled=True).encode(
        alt.Y('Reorder time', title='Reorder time (ms)', scale=alt.Scale(type='symlog')), 
        alt.X('Graph algorithm runtime', title='Graph algorithm runtime (ms)', scale=alt.Scale(type='log')), 
        color=alt.Color('Reorder algorithm'),
        shape=alt.Shape('Dataset'),
        tooltip=['Dataset', 'Reorder algorithm', 'Graph algorithm runtime', 'Reorder time']).properties()
        # .interactive()

    
    chart = alt.vconcat()   
    row = alt.hconcat()

    for algo in ['TC', 'SSSP']:
        row |= base.transform_filter(datum.graph_algorithm == algo.lower()).properties(title=algo)
    chart &= row

    row = alt.hconcat()
    for algo in ['SpMV', 'PR']:
        row |= base.transform_filter(datum.graph_algorithm == algo.lower()).properties(title=algo)
    chart &= row
    save(chart, name)
    return chart

def make_cutoff_figure(x_max = 6):
    round=0.2
    x_max = math.ceil(x_max / round)

    cutoff = pd.DataFrame({
    'start': [0, 1],
    'stop': [1, x_max * round],
    'color': ['white', 'gray']})

    areas = alt.Chart(
        cutoff.reset_index()
    ).mark_rect(
        opacity=1.0
    ).encode(
        x='start',
        x2='stop',
        y=alt.value(0),
        y2=alt.value(300), 
        color=alt.Color('color', scale=None))
    return areas
def make_cutoff_line():
    return alt.Chart(pd.DataFrame({'x': [1.002]})).mark_rule(strokeWidth=2).encode(x='x')



def make_pareto_combo_normalized(data, name, y_axis_ticks_):
    # y_axis_ticks=[*range(0, 160000, 1000)]
    base = alt.Chart(data).mark_point(filled=True).encode(
        alt.Y('Reorder time', title='Reorder time (ms)', scale=alt.Scale(type='symlog'), axis=alt.Axis(values=y_axis_ticks_)), 
        alt.X('Graph algorithm runtime', title='Graph algorithm runtime normalized to random'), 
        color=alt.Color('Reorder algorithm'),
        shape=alt.Shape('Dataset'),
        tooltip=['Dataset', 'Reorder algorithm', 'Graph algorithm runtime', 'Reorder time']).properties().interactive()

    
    chart = alt.vconcat()   
    row = alt.hconcat()


    for algo in ['TC', 'SSSP']:
        c = base.transform_filter(datum.graph_algorithm == algo.lower()).properties(title=algo)
        # max_x = max(data[data['graph_algorithm'] == algo.lower()]['Graph algorithm runtime'])
        row |= (make_cutoff_line() + c) 
    chart &= row

    row = alt.hconcat()
    for algo in ['SpMV', 'PR']:
        c = base.transform_filter(datum.graph_algorithm == algo.lower()).properties(title=algo)
        max_x = max(data[data['graph_algorithm'] == algo.lower()]['Graph algorithm runtime'])
        row |= (make_cutoff_line() + c) 
    chart &= row


    # chart = chart + areas
    save(chart, name)
    return chart

In [15]:
# normalize and plot
# random_result = results[(results['Reorder algorithm'] == 'rand')].sort_values('Dataset')
reorder_algos_results = results[results['Reorder algorithm'] != 'rand']
reorder_algos_results['Graph algorithm runtime'] = reorder_algos_results.apply(
    # lambda row : row['Graph algorithm runtime'] / random_result_[row['Dataset']][row['graph_algorithm']] , axis = 1)
    lambda row : row['Graph algorithm runtime'] , axis = 1)


y_axis_ticks=[100, 1000, 5000, 10000, 15000, 25000]
regular_data=reorder_algos_results[reorder_algos_results.apply(lambda x:  
            not is_scale_free_[graph_names_.index(x['Dataset'])], axis=1)]
make_pareto_combo_normalized(regular_data, 'uniform_graphs_normalized.pdf', y_axis_ticks)


y_axis_ticks=[100, 2000, 10000, 20000, 40000, 60000, 100000, 160000, 180000, 25000]
scale_free_data=reorder_algos_results[reorder_algos_results.apply(lambda x:  
            is_scale_free_[graph_names_.index(x['Dataset'])], axis=1)]
make_pareto_combo_normalized(scale_free_data, 'scale_free_graphs_normalized.pdf', y_axis_ticks)
# print(reorder_algos_results['Graph algorithm runtime normalized to random'])
# for col in results:
    # print(row)
        # ds = row['Dataset']
    # algo = row['graph_algorithm']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reorder_algos_results['Graph algorithm runtime'] = reorder_algos_results.apply(
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve event source: window
WARN Can not resolve even

In [16]:

regular_data=results[results.apply(lambda x:  
            not is_scale_free_[graph_names_.index(x['Dataset'])], axis=1)]
make_pareto_combo(regular_data, 'uniform_graphs.pdf')


scale_free_data=results[results.apply(lambda x:  
            is_scale_free_[graph_names_.index(x['Dataset'])], axis=1)]
make_pareto_combo(scale_free_data, 'scale_free_graphs.pdf')


In [17]:
results[(results['Dataset'] == 'kron_g500-logn20') & (results['graph_algorithm'] == 'spmv')].sort_values('Graph algorithm runtime')
# results[('kron'results['Dataset']) & (results['graph_algorithm'] == 'spmv')].sort_values('Graph algorithm runtime')

Unnamed: 0,Dataset,Reorder algorithm,graph_algorithm,Graph algorithm runtime,Reorder time
266,kron_g500-logn20,RCM,spmv,2.43712,2049.0
132,kron_g500-logn20,BOBA,spmv,2.655642,17.0
275,kron_g500-logn20,deg,spmv,2.707149,139.0
148,kron_g500-logn20,Gorder,spmv,2.967654,41705.0
147,kron_g500-logn20,rand,spmv,3.530547,0.0
339,kron_g500-logn20,hub,spmv,4.590285,129.0


In [18]:
results[(results['Reorder algorithm'] == 'Gorder') & (results['graph_algorithm'] == 'tc')].sort_values('Dataset')


Unnamed: 0,Dataset,Reorder algorithm,graph_algorithm,Graph algorithm runtime,Reorder time
234,delaunay_n22,Gorder,tc,2.254541,3284.0
17,delaunay_n23,Gorder,tc,3.693773,6742.0
333,delaunay_n24,Gorder,tc,6.940569,13619.0
224,great-britain_osm,Gorder,tc,1.305702,2835.0
320,hollywood-2009,Gorder,tc,12249.155371,61808.0
69,kron_g500-logn20,Gorder,tc,180308.764063,41705.0
201,kron_g500-logn21,Gorder,tc,469259.334375,153116.0
296,rgg_n_2_22_s0,Gorder,tc,7.150694,6700.0
290,rgg_n_2_23_s0,Gorder,tc,14.628144,14233.0
262,rgg_n_2_24_s0,Gorder,tc,31.328359,29622.0


In [19]:
# make_pareto('SpMV')

In [20]:
y_axis_ticks=*range(0, 160000, 1000)

y_axis_ticks

SyntaxError: can't use starred expression here (2270159168.py, line 1)