In [1]:
import numpy as np
import pandas as pd

import json
import glob
import os
import re

In [2]:
graph_pattern = re.compile(".*Reading TPGR file '(.+)/graph.tpgr'.*")
total_runtime_pattern = re.compile(".*Ordering took ([0-9\\.]+) sec in total\\..*")
io_runtime_pattern = re.compile(".*Writing hierarchy to file took ([0-9\\.]+) sec in total\\..*")
no_io_runtime_pattern = re.compile(".*Ordering took ([0-9\\.]+) sec without writing to file\\..*")
thread_count_pattern = re.compile(".*Preprocessing running with (\d+) threads.*")

def parse_katch_contraction_output(path):
  stats = {}
  with open(path, 'r') as f:
    for line in f:
      match = graph_pattern.match(line)
      if match:
        stats['graph'] = match[1]

      match = total_runtime_pattern.match(line)
      if match:
        stats['running_time_s'] = float(match[1])

      match = io_runtime_pattern.match(line)
      if match:
        stats['io_running_time_s'] = float(match[1])

      match = no_io_runtime_pattern.match(line)
      if match:
        stats['no_io_running_time_s'] = float(match[1])
        
      match = thread_count_pattern.match(line)
      if match:
        stats['num_threads'] = int(match[1])

  return stats

In [3]:
paths = glob.glob(os.environ['TDCCH_EXP_OUTPUT_DIR'] + "/katch/contraction/*.out")
katch_contraction_stats = pd.DataFrame.from_records([parse_katch_contraction_output(path) for path in paths])
katch_contraction_stats.set_index('graph', inplace=True)

In [4]:
graph_pattern = re.compile(".*Reading BTCH file '(.+)/katch/hierarchy.tch'.*")
avg_runtime_pattern = re.compile(".*avg\\. running time \\(including route extraction\\) = ([0-9\\.]+) msec.*")
avg_runtime_pattern_wo_path = re.compile(".*avg\\. running time \\(without route extraction\\)   = ([0-9\\.]+) msec.*")

def parse_katch_query_output(path):
  stats = {}
  with open(path, 'r') as f:
    for line in f:
      match = graph_pattern.match(line)
      if match:
        stats['graph'] = match[1]

      match = avg_runtime_pattern.match(line)
      if match:
        stats['running_time_ms'] = float(match[1])

      match = avg_runtime_pattern_wo_path.match(line)
      if match:
        stats['running_time_wo_path_ms'] = float(match[1])

  return stats

In [5]:
paths = glob.glob(os.environ['TDCCH_EXP_OUTPUT_DIR'] + "/katch/uniform_queries/*.out")
katch_query_stats = pd.DataFrame.from_records([parse_katch_query_output(path) for path in paths])
katch_query_stats.set_index('graph', inplace=True)

In [6]:
runtime_pattern = re.compile(".*Needed (\\d+)musec\\..*")

def parse_tds_contraction_output(path):
  stats = { 'running_time_s': 0.0, 'num_threads': 1 }

  with open(path, 'r') as f:
    for line in f:
      if not 'graph' in stats:
        stats['graph'] = line.strip()
      else:
        match = runtime_pattern.match(line)
        if match:
          stats['running_time_s'] += int(match[1]) / 1000000

  return stats

In [7]:
paths = glob.glob(os.environ['TDCCH_EXP_OUTPUT_DIR'] + "/tds/contraction/*.out")
tds_contraction_stats = pd.DataFrame.from_records([parse_tds_contraction_output(path) for path in paths])
tds_contraction_stats.set_index('graph', inplace=True)

In [8]:
source_node_pattern = re.compile(".*source node : (\\d+).*")
source_time_pattern = re.compile(".*source time \\[ms since midnight\\] : (\\d+).*")
target_node_pattern = re.compile(".*target node : (\\d+).*")
runtime_pattern = re.compile(".*TD-S query running time \\[musec\\] : (\\d+).*")
ground_truth_pattern = re.compile(".*Exact target time \\[ms since midnight\\] : (\\d+).*")
ea_pattern = re.compile(".*TD-S target time \\[ms since midnight\\] : (\\d+).*")

def parse_tds_query_output(path):
  stats = []
  graph = None

  with open(path, 'r') as f:
    for line in f:
      if not graph:
        graph = line.strip()
      else:
        match = source_node_pattern.match(line)
        if match:
          stats.append({ 'graph': graph, 'from': int(match[1]) })

        match = source_time_pattern.match(line)
        if match:
          stats[-1]['departure_time'] = int(match[1]) / 1000

        match = target_node_pattern.match(line)
        if match:
          stats[-1]['to'] = int(match[1])

        match = runtime_pattern.match(line)
        if match:
          stats[-1]['running_time_ms'] = int(match[1]) / 1000

        match = ground_truth_pattern.match(line)
        if match:
          stats[-1]['ground_truth'] = int(match[1]) / 1000

        match = ea_pattern.match(line)
        if match:
          stats[-1]['earliest_arrival'] = int(match[1]) / 1000

  return stats

In [9]:
paths = glob.glob(os.environ['TDCCH_EXP_OUTPUT_DIR'] + "/tds/uniform_queries/*.out")
tds_queries = pd.DataFrame.from_records([run for path in paths for run in parse_tds_query_output(path)])

In [10]:
tds_queries['rel_error'] = (tds_queries['earliest_arrival'] - tds_queries['ground_truth']) / (tds_queries['ground_truth'] - tds_queries['departure_time']) * 100

In [11]:
paths = glob.glob(os.environ['TDCCH_EXP_OUTPUT_DIR'] + "/queries_all_graphs/*.json")
data = [json.load(open(path)) for path in paths]

tdcch_queries = pd.DataFrame.from_records([{ 
    'graph': run['args'][1],
    **algo } 
    for run in data for algo in run["algo_runs"] if algo['algo'] == "Floating TDCCH Query"])

In [12]:
paths = glob.glob(os.environ['TDCCH_EXP_OUTPUT_DIR'] + "/customization_all_graphs/*.json")
data = [json.load(open(path)) for path in paths]

tdcch_customization_stats = pd.DataFrame.from_records([{ 
    'graph': run['args'][1],
    'num_threads': run['num_threads'], 
    'customization_running_time_s': algo['main']['running_time_ms'] / 1000,
    **algo } 
    for run in data for algo in run["algo_runs"] if algo['algo'] == "Floating TDCCH Customization"])

In [13]:
runtime_pattern = re.compile(".*running time : (\\d+)musec.*")

def parse_flowcutter_partition_output(path):
  stats = { 'running_time_s': 0.0 }

  with open(path, 'r') as f:
    for line in f:
      if not 'graph' in stats:
        stats['graph'] = line.strip()
      else:
        match = runtime_pattern.match(line)
        if match:
          stats['running_time_s'] += int(match[1]) / 1000000

  return stats

paths = glob.glob(os.environ['TDCCH_EXP_OUTPUT_DIR'] + "/partition/*.out")
tdcch_partition_stats = pd.DataFrame.from_records([parse_flowcutter_partition_output(path) for path in paths])

In [14]:
paths = glob.glob(os.environ['TDCCH_EXP_OUTPUT_DIR'] + "/contraction/*.json")
data = [json.load(open(path)) for path in paths]

tdcch_contraction_stats = pd.DataFrame.from_records([{ 
    'graph': run['args'][1],
    'running_time_s': sum([algo['running_time_ms'] / 1000 for algo in run["algo_runs"] if algo['algo'] == "CCH Contraction"]) } 
    for run in data])

In [15]:
def dir_size(start_path = '.'):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    return total_size

In [16]:
katch_prepro_space = pd.Series({ graph[:len(os.environ['TDCCH_GRAPH_DIR'])] + '/' + graph[len(os.environ['TDCCH_GRAPH_DIR']):]: dir_size(graph + 'katch') for graph in glob.glob(os.environ['TDCCH_GRAPH_DIR'] + "/*/day/*/") }) / 1000000000
tds_prepro_space = pd.Series({ graph[:len(os.environ['TDCCH_GRAPH_DIR'])] + '/' + graph[len(os.environ['TDCCH_GRAPH_DIR']):]: dir_size(graph + 'tds') for graph in glob.glob(os.environ['TDCCH_GRAPH_DIR'] + "/*/day/*/") }) / 1000000000
tdcch_prepro_space = pd.Series({ graph[:len(os.environ['TDCCH_GRAPH_DIR'])] + '/' + graph[len(os.environ['TDCCH_GRAPH_DIR']):]: dir_size(graph + 'cch') + dir_size(graph + 'customized') for graph in glob.glob(os.environ['TDCCH_GRAPH_DIR'] + "/*/day/*/") }) / 1000000000

In [29]:
query_time = 'Avg. query running time [ms]'
unpack_time = 'Avg. path unpacking running time [ms]'
prepro_time = 'Preprocessing running time [s]'
space = 'Preprocessing data [GB]'
error = 'Avg. rel. error [%]'
customization_time = 'Customization running time [s]'
threads = '#threads'

the_table = pd.DataFrame.from_records([])
the_table['KaTCH', prepro_time] = katch_contraction_stats['running_time_s']
the_table['KaTCH', threads] = katch_contraction_stats['num_threads']
the_table['KaTCH', space] = katch_prepro_space
the_table['KaTCH', query_time] = katch_query_stats['running_time_wo_path_ms'].fillna("OOM")
the_table['KaTCH', unpack_time] = (katch_query_stats['running_time_ms'] - katch_query_stats['running_time_wo_path_ms']).fillna("OOM")
the_table['TD-S', prepro_time] = tds_contraction_stats['running_time_s']
the_table['TD-S', threads] = tds_contraction_stats['num_threads']
the_table['TD-S', space] = tds_prepro_space
the_table['TD-S', query_time] = tds_queries.groupby('graph')['running_time_ms'].mean()
the_table['TD-S', error] = tds_queries.groupby('graph')['rel_error'].mean()
the_table['TD-CCH', prepro_time] = tdcch_partition_stats.groupby('graph')['running_time_s'].mean() + tdcch_contraction_stats.groupby('graph')['running_time_s'].mean()
the_table['TD-CCH', space] = tdcch_prepro_space
the_table['TD-CCH', query_time] = tdcch_queries.groupby('graph')['running_time_ms'].mean()
the_table['TD-CCH', unpack_time] = tdcch_queries.groupby('graph')['unpacking_running_time_ms'].mean()
the_table['TD-CCH', customization_time] = tdcch_customization_stats.groupby('graph')['customization_running_time_s'].mean()
the_table['TD-CCH', threads] = tdcch_customization_stats.groupby('graph')['num_threads'].max()
the_table.columns = pd.MultiIndex.from_tuples(the_table.columns)

rel_work_graph = os.environ['TDCCH_GRAPH_DIR'] + '/de/day/dido/'
the_table.loc[rel_work_graph, ('TDCALT', prepro_time)] = 540
the_table.loc[rel_work_graph, ('TDCALT', space)] = 0.2344
the_table.loc[rel_work_graph, ('TDCALT', query_time)] = 5.36
the_table.loc[rel_work_graph, ('TDCALT', threads)] = 1

the_table.loc[rel_work_graph, ('TDCALT-K1.15', prepro_time)] = 540
the_table.loc[rel_work_graph, ('TDCALT-K1.15', space)] = 0.2344
the_table.loc[rel_work_graph, ('TDCALT-K1.15', query_time)] = 1.87
the_table.loc[rel_work_graph, ('TDCALT-K1.15', error)] = 0.05
the_table.loc[rel_work_graph, ('TDCALT-K1.15', threads)] = 1

the_table.loc[rel_work_graph, ('eco L-SHARC', prepro_time)] = 4680
the_table.loc[rel_work_graph, ('eco L-SHARC', space)] = 1.026672
the_table.loc[rel_work_graph, ('eco L-SHARC', query_time)] = 6.31
the_table.loc[rel_work_graph, ('eco L-SHARC', threads)] = 1

the_table.loc[rel_work_graph, ('heu SHARC', prepro_time)] = 12360
the_table.loc[rel_work_graph, ('heu SHARC', space)] = 0.642256
the_table.loc[rel_work_graph, ('heu SHARC', query_time)] = 0.69
the_table.loc[rel_work_graph, ('heu SHARC', error)] = 'n/r'
the_table.loc[rel_work_graph, ('heu SHARC', threads)] = 1

the_table.loc[rel_work_graph, ('TCH', prepro_time)] = 378
the_table.loc[rel_work_graph, ('TCH', space)] = 4.66456
the_table.loc[rel_work_graph, ('TCH', query_time)] = 0.75
the_table.loc[rel_work_graph, ('TCH', threads)] = 8

the_table.loc[rel_work_graph, ('ATCH (1.0)', prepro_time)] = 378
the_table.loc[rel_work_graph, ('ATCH (1.0)', space)] = 1.120432
the_table.loc[rel_work_graph, ('ATCH (1.0)', query_time)] = 1.24
the_table.loc[rel_work_graph, ('ATCH (1.0)', threads)] = 8

the_table.loc[rel_work_graph, ('inex. TCH (0.1)', prepro_time)] = 378
the_table.loc[rel_work_graph, ('inex. TCH (0.1)', space)] = 1.340768
the_table.loc[rel_work_graph, ('inex. TCH (0.1)', query_time)] = 0.7
the_table.loc[rel_work_graph, ('inex. TCH (0.1)', error)] = 0.02
the_table.loc[rel_work_graph, ('inex. TCH (0.1)', threads)] = 8

the_table.loc[rel_work_graph, ('inex. TCH (1.0)', prepro_time)] = 378
the_table.loc[rel_work_graph, ('inex. TCH (1.0)', space)] = 1.003232
the_table.loc[rel_work_graph, ('inex. TCH (1.0)', query_time)] = 0.69
the_table.loc[rel_work_graph, ('inex. TCH (1.0)', error)] = 0.27
the_table.loc[rel_work_graph, ('inex. TCH (1.0)', threads)] = 8

the_table.loc[rel_work_graph, ('TDCRP (0.1)', prepro_time)] = 273
the_table.loc[rel_work_graph, ('TDCRP (0.1)', customization_time)] = 16
the_table.loc[rel_work_graph, ('TDCRP (0.1)', space)] = 0.778208
the_table.loc[rel_work_graph, ('TDCRP (0.1)', query_time)] = 1.92
the_table.loc[rel_work_graph, ('TDCRP (0.1)', unpack_time)] = 'n/i'
the_table.loc[rel_work_graph, ('TDCRP (0.1)', error)] = 0.05
the_table.loc[rel_work_graph, ('TDCRP (0.1)', threads)] = 16

the_table.loc[rel_work_graph, ('TDCRP (1.0)', prepro_time)] = 273
the_table.loc[rel_work_graph, ('TDCRP (1.0)', customization_time)] = 8
the_table.loc[rel_work_graph, ('TDCRP (1.0)', space)] = 0.360976
the_table.loc[rel_work_graph, ('TDCRP (1.0)', query_time)] = 1.66
the_table.loc[rel_work_graph, ('TDCRP (1.0)', unpack_time)] = 'n/i'
the_table.loc[rel_work_graph, ('TDCRP (1.0)', error)] = 0.68
the_table.loc[rel_work_graph, ('TDCRP (1.0)', threads)] = 16

the_table.loc[rel_work_graph, ('FLAT', prepro_time)] = 158760
the_table.loc[rel_work_graph, ('FLAT', space)] = 54.625875
the_table.loc[rel_work_graph, ('FLAT', query_time)] = 1.269
the_table.loc[rel_work_graph, ('FLAT', unpack_time)] = 'n/i'
the_table.loc[rel_work_graph, ('FLAT', error)] = 0.01534
the_table.loc[rel_work_graph, ('FLAT', threads)] = 6

the_table.loc[rel_work_graph, ('CFLAT', prepro_time)] = 104220
the_table.loc[rel_work_graph, ('CFLAT', space)] = 34.630256
the_table.loc[rel_work_graph, ('CFLAT', query_time)] = 0.585
the_table.loc[rel_work_graph, ('CFLAT', unpack_time)] = 'n/r'
the_table.loc[rel_work_graph, ('CFLAT', error)] = 0.0079
the_table.loc[rel_work_graph, ('CFLAT', threads)] = 6

the_table = the_table.stack(0) \
    .loc[[os.environ['TDCCH_GRAPH_DIR'] + graph for graph in ['/de/day/dido/', '/ptv17-de-car/day/di/', '/ptv17-eur-car/day/di/']]] \
    .rename(index={ os.environ['TDCCH_GRAPH_DIR'] + old: new for (old, new) in [('/de/day/dido/', 'Ger06'), ('/ptv17-de-car/day/di/', 'Ger17'), ('/ptv17-eur-car/day/di/', 'Eur17')] }) \
    .reindex(['TDCALT', 'TDCALT-K1.15', 'eco L-SHARC', 'heu SHARC', 'KaTCH', 'TCH', 'ATCH (1.0)', 'inex. TCH (0.1)', 'inex. TCH (1.0)', 'TDCRP (0.1)', 'TDCRP (1.0)', 'FLAT', 'CFLAT', 'TD-S', 'TD-CCH'], level=1) \
    .reindex(['Ger06', 'Ger17', 'Eur17'], level=0)[[threads, prepro_time, customization_time, space, query_time, unpack_time, error]]

the_table[threads] = the_table[threads].astype(int)
the_table[prepro_time] = the_table[prepro_time].round().astype(int)
the_table[customization_time] = the_table[customization_time].dropna().round().astype(int)
the_table[space] = the_table[space].round(2)
query_column = pd.to_numeric(the_table[query_time], errors='coerce').dropna().round(3)
the_table.loc[query_column.index, query_time] = query_column
unpack_column = pd.to_numeric(the_table[unpack_time], errors='coerce').dropna().round(3)
the_table.loc[unpack_column.index, unpack_time] = unpack_column
error_column = pd.to_numeric(the_table[error], errors='coerce').dropna().round(4)
the_table.loc[error_column.index, error] = error_column

the_table[customization_time].fillna('-', inplace=True)
the_table[error].fillna('-', inplace=True)
the_table[unpack_time].fillna('n/r', inplace=True)

the_table

Unnamed: 0_level_0,Unnamed: 1_level_0,#threads,Preprocessing running time [s],Customization running time [s],Preprocessing data [GB],Avg. query running time [ms],Avg. path unpacking running time [ms],Avg. rel. error [%]
graph,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Ger06,TDCALT,1,540,-,0.23,5.36,n/r,-
Ger06,TDCALT-K1.15,1,540,-,0.23,1.87,n/r,0.05
Ger06,eco L-SHARC,1,4680,-,1.03,6.31,n/r,-
Ger06,heu SHARC,1,12360,-,0.64,0.69,n/r,n/r
Ger06,KaTCH,16,170,-,4.66,0.431,0.197,-
Ger06,TCH,8,378,-,4.66,0.75,n/r,-
Ger06,ATCH (1.0),8,378,-,1.12,1.24,n/r,-
Ger06,inex. TCH (0.1),8,378,-,1.34,0.7,n/r,0.02
Ger06,inex. TCH (1.0),8,378,-,1.0,0.69,n/r,0.27
Ger06,TDCRP (0.1),16,273,16,0.78,1.92,n/i,0.05


In [31]:
relevant = ['Ger06', 'Ger17', 'Eur17']

def gen_mapper(col):
    def bold_min(val):
        print(val)
        if pd.to_numeric(val, errors='coerce') == pd.to_numeric(col, errors='coerce').dropna().min():
           return "\\textbf{{{}}}".format(val)
        else:
           return "{}".format(val)
    return bold_min

for graph in relevant:
  for col in [prepro_time, customization_time, space, query_time, unpack_time]:
    the_table.loc[[graph], col] = the_table.loc[[graph], col].map(gen_mapper(the_table.loc[[graph], col]))
    
the_table

540
540
4680
12360
170
378
378
378
378
273
273
158760
104220
547
\textbf{120}
-
-
-
-
-
-
-
-
-
16.0
\textbf{8.0}
-
-
-
20.0
\textbf{0.23}
\textbf{0.23}
1.03
0.64
4.66
4.66
1.12
1.34
1.0
0.78
0.36
54.63
34.63
3.61
1.07
5.36
1.87
6.31
0.69
\textbf{0.431}
0.75
1.24
0.7
0.69
1.92
1.66
1.269
0.585
1.672
0.537
n/r
n/r
n/r
n/r
0.197
n/r
n/r
n/r
n/r
n/i
n/i
n/i
n/r
n/r
\textbf{0.148}
874
617
\textbf{134}
-
-
\textbf{95.0}
42.81
5.28
\textbf{1.51}
\textbf{0.712}
2.28
1.559
0.67
n/r
\textbf{0.272}
3089
3368
\textbf{836}
-
-
\textbf{541.0}
146.97
18.84
\textbf{5.48}
OOM
4.027
\textbf{3.985}
OOM
n/r
\textbf{0.749}


Unnamed: 0_level_0,Unnamed: 1_level_0,#threads,Preprocessing running time [s],Customization running time [s],Preprocessing data [GB],Avg. query running time [ms],Avg. path unpacking running time [ms],Avg. rel. error [%]
graph,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Ger06,TDCALT,1,540,-,\textbf{0.23},5.36,n/r,-
Ger06,TDCALT-K1.15,1,540,-,\textbf{0.23},1.87,n/r,0.05
Ger06,eco L-SHARC,1,4680,-,1.03,6.31,n/r,-
Ger06,heu SHARC,1,12360,-,0.64,0.69,n/r,n/r
Ger06,KaTCH,16,\textbf{170},-,4.66,\textbf{0.431},\textbf{0.197},-
Ger06,TCH,8,378,-,4.66,0.75,n/r,-
Ger06,ATCH (1.0),8,378,-,1.12,1.24,n/r,-
Ger06,inex. TCH (0.1),8,378,-,1.34,0.7,n/r,0.02
Ger06,inex. TCH (1.0),8,378,-,1.0,0.69,n/r,0.27
Ger06,TDCRP (0.1),16,273,\textbf{16.0},0.78,1.92,n/i,0.05


In [20]:
pd.to_numeric(the_table.loc[['Ger06'], customization_time], errors='coerce').dropna().min()

8.0