In [1]:
import os
import pandas as pd

In [2]:
def get_problems_from_dirs():
    problems = []
    for f in os.listdir('.'):
        if '.' not in f:
            problems.append(f)
    return problems

available_problems = get_problems_from_dirs()
available_problems

available_problems = ["easy", "bays29", "berlin52", "eil101", "xqf131"]

In [3]:
def get_value_from_file(file, string):
    with open(file, 'r') as f:
        for line in f:
            if string in line:
                value_location = line.find(string) + len(string)
                return line[value_location:].strip()
    return None

problem_name = available_problems[3]
tsp_file_path = f"{problem_name}/{problem_name}.tour"
get_value_from_file(tsp_file_path, 'COMMENT: Tour length')

In [4]:
stats_tuples = {
    ('.tsp', 'DIMENSION :', 'dim'),
    ('.tour', 'COMMENT : Tour length', 'optimal_tour_length'), 
    ('_solution.tour', 'COMMENT : Tour length: ', 'tour_length'),
    ('_solution.tour', 'COMMENT : Time elapsed: ', 'time_elapsed')
}

def get_problem_stats(problem_name, file_key_map):
    stats = {}
    stats['problem_name'] = problem_name
    for file, str_key, key in file_key_map:
        file_path = f"{problem_name}/{problem_name}{file}"
        value = get_value_from_file(file_path, str_key)
        stats[key] = value
    return stats

get_problem_stats(problem_name, stats_tuples)

{'problem_name': 'eil101',
 'time_elapsed': '375.34 [s]',
 'optimal_tour_length': '629',
 'dim': '101',
 'tour_length': '911.0381'}

In [5]:
problem_stats = []

for problem_name in available_problems:
    problem_stats.append(get_problem_stats(problem_name, stats_tuples))

df = pd.DataFrame(problem_stats)
df['dim'] = df['dim'].astype(int)
df['optimal_tour_length'] = df['optimal_tour_length'].astype(float)
df['tour_length'] = df['tour_length'].astype(float).round(2)
df['time_elapsed'] = df['time_elapsed'].str.replace('[s]', '').str.strip().astype(float)
df['tour_length_ratio'] = (df['tour_length'] / df['optimal_tour_length']).round(2)
df.set_index('problem_name', inplace=True)
df = df[['dim', 'optimal_tour_length', 'tour_length', 'tour_length_ratio', 'time_elapsed']]

df

Unnamed: 0_level_0,dim,optimal_tour_length,tour_length,tour_length_ratio,time_elapsed
problem_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
easy,6,12.17,12.17,1.0,2.55
bays29,29,2020.0,2344.0,1.16,40.67
berlin52,52,7542.0,10191.4,1.35,54.64
eil101,101,629.0,911.04,1.45,375.34
xqf131,131,564.0,2046.89,3.63,197.33
