> Notebook from [https://github.com/squillero/python-accelerated](https://github.com/squillero/python-accelerated)  
> Copyright © 2021 [Giovanni Squillero](https://github.com/squillero). 
> Free for personal or classroom use; see [LICENCE.md](https://github.com/squillero/python-accelerated/blob/master/LICENSE.md) for details.  

In [1]:
import logging
from pprint import pprint
logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s', datefmt='%H:%M:%S', 
                    level=logging.INFO)

import csv
import pickle
from collections import namedtuple
import os
import statistics

In [2]:
file_name = os.path.join(os.getcwd(), 'data_files', 'benchmarks.csv')

In [3]:
data = list()
try:
    with open(file_name) as csv_file:
        data = csv_file.readlines()
except OSError as problem:
    logging.error(f"Yeuch: {problem}")

In [5]:
# get column names
data[0].split(';')

['GRAPH 0',
 'GRAPH 0 SIZE',
 'GRAPH 1',
 'GRAPH 1 SIZE',
 'ALGORITHM',
 'SOLUTION SIZE',
 'QUALITY MEASURE',
 'QUALITY SCORE',
 'WALL CLOCK TIME',
 'CPU TIME USED',
 'TIMEOUT',
 'PARALLEL',
 'RANDOMIZED',
 'ALPHA',
 'SOLUTIONS',
 'SOLUTION OUTPUT\n']

In [6]:
# Record = namedtuple('Record', 'GRAPH0 GRAPH0_SIZE GRAPH1 GRAPH1_SIZE ALGORITHM SOLUTION_SIZE ' +
#                              'QUALITY_MEASURE QUALITY_SCORE WALL_CLOCK_TIME CPU_TIME_USED TIMEOUT ' + 
#                              'PARALLEL RANDOMIZED ALPHA SOLUTIONS SOLUTION_OUTPUT')
Record = namedtuple('Record', [s.strip().replace(' ', '_') for s in data[0].split(';')])

In [7]:
records = list()
for d in data[1:]:
    #if d == '\n': continue     ## try/execpt is better
    try:
        records.append(Record(*d[:-1].split(';')))
    except:
        logging.warning(f"Can't parse: {repr(d)}")



In [8]:
sum(float(r.CPU_TIME_USED) for r in records) / len(records) 

575.5679499250001

In [9]:
# problema 2: Mediana della 'QUALITY_SCORE' quando 'QUALITY_MEASURE' e` 'linear-priority'

In [17]:
%timeit statistics.median([float(r.QUALITY_SCORE) for r in records * 10 if r.QUALITY_MEASURE == 'linear-priority'])
%timeit statistics.median(float(r.QUALITY_SCORE) for r in records * 10 if r.QUALITY_MEASURE == 'linear-priority')

88.9 µs ± 4.29 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
94.6 µs ± 3.99 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


## USING CSV

In [25]:
file_name = os.path.join(os.getcwd(), 'data_files', 'benchmarks.csv')

In [6]:
data = list()
try:
    with open(file_name) as csv_file:
        dialect = csv.Sniffer().sniff(csv_file.read(2**11))
        logging.warning(dialect)
        csv_file.seek(0)
        data = [x for x in csv.DictReader(csv_file, dialect=dialect)]
except:
    pass



In [7]:
data[0]

{'GRAPH 0': 'graphs/foie_gras/binary/120core_200classes_g0.txt',
 'GRAPH 0 SIZE': '238',
 'GRAPH 1': 'graphs/foie_gras/binary/120core_200classes_g1.txt',
 'GRAPH 1 SIZE': '240',
 'ALGORITHM': 'ALIKE',
 'SOLUTION SIZE': '11',
 'QUALITY MEASURE': 'linear-priority',
 'QUALITY SCORE': '20011',
 'WALL CLOCK TIME': '100.305271707',
 'CPU TIME USED': '555.459108',
 'TIMEOUT': 'YES',
 'PARALLEL': 'YES',
 'RANDOMIZED': 'YES',
 'ALPHA': '0.25',
 'SOLUTIONS': '3',
 'SOLUTION OUTPUT': '[(size: 11, quality: 11): { g0-n16 g1-n203} { g0-n20 g1-n23} { g0-n52 g1-n155} { g0-n62 g1-n76} { g0-n97 g1-n144} { g0-n120 g1-n191} { g0-n172 g1-n29} { g0-n184 g1-n204} { g0-n219 g1-n139} { g0-n231 g1-n197} { g0-n236 g1-n33} ] [(size: 11, quality: 11): { g0-n20 g1-n23} { g0-n52 g1-n155} { g0-n62 g1-n76} { g0-n97 g1-n144} { g0-n120 g1-n191} { g0-n172 g1-n29} { g0-n184 g1-n204} { g0-n218 g1-n82} { g0-n219 g1-n139} { g0-n231 g1-n197} { g0-n236 g1-n33} ] [(size: 11, quality: 11): { g0-n20 g1-n23} { g0-n52 g1-n155} { g0