# Statistics of the RUNS

In [1]:
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
def get_formatted_data(output_name, error_name, launches, kind='parallel'):
    """
    output_name: name of the output file of the run;
    error_name: name of the error file of the run;
    launches: a list. Every element is the numbere of core involved in the run.
              i.e. launces = [1, 1, 2, 3, 5] means that in the output and error
              files there are the result of a run with one core, followed by
              another run with one core, followed by a run with two cores, followed
              by a run with three cores, followd by a run with five cores.
    
    It outputs a list of dictionaries, one for each launch. The information provided
    in the dictionary are the following:
    - P: the number of cores;
    - trials: the numbrer of trials of the process (Niter);
    - estimate: the estimate of pi resulting from the process;
    - ealpsed_t: the elapsed time as measured by /usr/bin/time;
    - procs_t: a list of the wall times of each processor. The 0th element is the wall
               time of the master node;
    - CUP: the usage of CPU (normalied: 100% is 1.00).
    """
    assert(kind in ['serial', 'parallel'])
    data = []
    trials_re = re.compile(" # of trials = (.*) ,")
    estimate_re = re.compile("estimate of pi is (.*) ")
    if kind == 'parallel':
        master_t_re = re.compile("master processor : (.*) ")
    elif kind == 'serial':
        master_t_re = re.compile("walltime : (.*)")
    elapsed_t_re = re.compile("system (.*)elapsed ")
    cpu_re = re.compile("elapsed (.*)%CPU ")
    with open(output_name, "r") as output, open(error_name, "r") as error:
        for launch in launches:
            o_chunk = ""
            o_line_n = 0
            e_chunk = ""
            while o_line_n <= launch:
                o_line = output.readline()
                if o_line != '\n':
                    o_line_n += 1
                    o_chunk += o_line
            #global last_chunk_o 
            #last_o_chunk = o_chunk
            trials = int(trials_re.search(o_chunk).group(1))
            estimate = float(estimate_re.search(o_chunk).group(1))
            master_t = float(master_t_re.search(o_chunk).group(1))
            procs_t = [master_t] + [float(re.compile("on processor {} : (.*) ".format(proc)).search(o_chunk).group(1)) for proc in range(1, launch)]
            e_chunk += error.readline()
            e_chunk += error.readline()
            elapsed_s = elapsed_t_re.search(e_chunk).group(1).split(":")
            elapsed_t = float(elapsed_s[-1]) + 60 * float(elapsed_s[-2])
            cpu = float(cpu_re.search(e_chunk).group(1))/100
            data.append({"P": launch,
                         "trials": trials,
                         "estimate": estimate,
                         "elapsed_t": elapsed_t,
                         "procs_t": procs_t,
                         "CPU": cpu
                         })
        return data

## SERIAL extraction

In [3]:
serial_launches = 3 * [1]
serial_exps = [8, 9, 10, 11]

serial_basename = "serial/serial-"
serial_output_extension = ".output"
serial_error_extension = ".error"

serial_arguments = {s: (serial_basename + "{:02d}".format(s) + serial_output_extension, serial_basename + "{:02d}".format(s) + serial_error_extension, serial_launches) for s in serial_exps}
serial_results = {key: get_formatted_data(*args, kind='serial') for key, args in serial_arguments.items()}

## STRONG extraction

In [4]:
strong_launches = [launch for launch in [1, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48] for i in range(3)]
strong_exps = [8, 9, 10, 11]

strong_basename = "strong/parallel-"
strong_output_extension = ".output"
strong_error_extension = ".error"

strong_arguments = {s: (strong_basename + "{:02d}".format(s) + strong_output_extension, strong_basename + "{:02d}".format(s) + strong_error_extension, strong_launches) for s in strong_exps}
strong_results = {key: get_formatted_data(*(args), kind='parallel') for key, args in strong_arguments.items()}

## WEAK extraction

In [5]:
weak_launches_1 = 3 * [4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48]
weak_exps_1 = [8, 9, 10]
weak_launches_2 = 3 * [12, 24, 48]
weak_exps_2 = [11]

weak_basename = "weak/w_parallel-"
weak_output_extension = ".output"
weak_error_extension = ".error"

weak_arguments = {s: (weak_basename + "{:02d}".format(s) + weak_output_extension, weak_basename + "{:02d}".format(s) + weak_error_extension, weak_launches_1) for s in weak_exps_1}
weak_arguments.update({s: (weak_basename + "{:02d}".format(s) + weak_output_extension, weak_basename + "{:02d}".format(s) + weak_error_extension, weak_launches_2) for s in weak_exps_2})
weak_results = {key: get_formatted_data(*(args), kind='parallel') for key, args in weak_arguments.items()}

## Some Sanity Checks

In [36]:
strong_dev_p = []
for proc in strong_results.values():
    for el in proc:
        na = np.array(el['procs_t'])
        strong_dev_p.append({'P': el['P'], 'trials': el['trials'] ,'dev_p':(na.max() - na.min()) / na.mean()})

In [37]:
strong_dev_p

[{'P': 1, 'trials': 100000000, 'dev_p': 0.0},
 {'P': 1, 'trials': 100000000, 'dev_p': 0.0},
 {'P': 1, 'trials': 100000000, 'dev_p': 0.0},
 {'P': 4, 'trials': 100000000, 'dev_p': 0.0007172259537795924},
 {'P': 4, 'trials': 100000000, 'dev_p': 0.0006803592960423562},
 {'P': 4, 'trials': 100000000, 'dev_p': 0.0038881726886659355},
 {'P': 8, 'trials': 100000000, 'dev_p': 0.0025837109065373473},
 {'P': 8, 'trials': 100000000, 'dev_p': 0.002540850472120587},
 {'P': 8, 'trials': 100000000, 'dev_p': 0.0020932401975485818},
 {'P': 12, 'trials': 99999996, 'dev_p': 0.04039074566537952},
 {'P': 12, 'trials': 99999996, 'dev_p': 0.02253106720064463},
 {'P': 12, 'trials': 99999996, 'dev_p': 0.002133391588432467},
 {'P': 16, 'trials': 100000000, 'dev_p': 0.040385633551864836},
 {'P': 16, 'trials': 100000000, 'dev_p': 0.01570456377569949},
 {'P': 16, 'trials': 100000000, 'dev_p': 0.0012744555331606524},
 {'P': 20, 'trials': 100000000, 'dev_p': 0.053435052567623986},
 {'P': 20, 'trials': 100000000, 'dev

In [43]:
weak_dev_p = []
for proc in weak_results.values():
    for el in proc:
        na = np.array(el['procs_t'])
        weak_dev_p.append({'P': el['P'], 'trials': el['trials'] ,'dev_p':(na.max() - na.min()) / na.mean()})

In [39]:
weak_dev_p

[{'P': 4, 'trials': 400000000, 'dev_p': 0.00016485794254748608},
 {'P': 8, 'trials': 800000000, 'dev_p': 0.0059068640095943075},
 {'P': 12, 'trials': 1200000000, 'dev_p': 0.0014436958975898731},
 {'P': 16, 'trials': 1600000000, 'dev_p': 0.006521834536222626},
 {'P': 20, 'trials': 2000000000, 'dev_p': 0.00697234547923162},
 {'P': 24, 'trials': 2400000000, 'dev_p': 0.010291425595403947},
 {'P': 28, 'trials': 2800000000, 'dev_p': 0.05759188563266893},
 {'P': 32, 'trials': 3200000000, 'dev_p': 0.015164633252799218},
 {'P': 36, 'trials': 3600000000, 'dev_p': 0.0268793849187522},
 {'P': 40, 'trials': 4000000000, 'dev_p': 0.015306468197068289},
 {'P': 44, 'trials': 4400000000, 'dev_p': 0.05524071818635234},
 {'P': 48, 'trials': 4800000000, 'dev_p': 0.06530113746397712},
 {'P': 4, 'trials': 400000000, 'dev_p': 0.006492163983658044},
 {'P': 8, 'trials': 800000000, 'dev_p': 0.004383516825340456},
 {'P': 12, 'trials': 1200000000, 'dev_p': 0.0011293424732646857},
 {'P': 16, 'trials': 1600000000, '

In [42]:
max([el['dev_p'] for el in weak_dev_p])

0.09634714444728269