# Imports


In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt

# Access functions

In [2]:
def retrieve_time_seq_pth(path):
    """
    This function retrieves the results of a certain pthread computation.

    Input:
     - path: inner path to the folder containing a result (consider to address a path/job.out)

    Output:
     - A numpy array of 3 unsigned integers (matched patterns and the 2 checksums),
       or
     - [-1, -1, -1] meaning an absence of attemps
       or
     - [-2, -2, -2] meaning a non-finished run due to interruption
       or
     - [-3, -3, -3] meaning an aborted run
    """
    # Checking that the computation was at least attempted
    if "job.out" and "job.err" in os.listdir(path):
        f = open(path + "/job.err", "r")
        # Checking that the computation had no errors
        if (len(f.readlines()) > 0):
            return np.array([-3, -3, -3])

        f = open(path + "/job.out", "r")

        # Checking that the computation has gone through correctly
        lines = f.readlines()
        if len(lines) > 0:
            result = lines[2].split()[1:]
            result[0] = result[0][:-1]
            result[1] = result[1][:-1]
            return np.array(result, dtype=np.uint64)
        else:
            return np.array([-2, -2, -2])
        
    return np.array([-1, -1, -1])


In [3]:
def retrieve_time_mpi(path):
    """
    This function retrieves the results of a certain mpi computation.

    Input:
     - path: inner path to the folder containing a result (consider to address a path/job.out)

    Output:
     - A numpy array of 3 unsigned integers (matched patterns and the 2 checksums),
       or
     - [-1, -1, -1] meaning an absence of attemps
       or
     - [-2, -2, -2] meaning a non-finished run due to interruption
       or
     - [-3, -3, -3] meaning an aborted run
    """
    # Checking that the computation was at least attempted
    if "job.out.0" and "job.err.0" in os.listdir(path):
        f = open(path + "/job.out.0", "r")
        # Checking that the computation had no errors
        try:
          lines = f.readlines()
          result = lines[2].split()[1:]
          result[0] = result[0][:-1]
          result[1] = result[1][:-1]
          return np.array(result, dtype=np.uint64)
        except:
          return np.array([-3, -3, -3])
    
    # Interrupted runs only produce the log file
    elif "job.log" in os.listdir(path):
       return np.array([-2, -2, -2])
    
    return np.array([-1, -1, -1])


In [4]:
# Test

retrieve_time_mpi("results/logs/mpi/nodes_2/seq_length_20/patterns_20/mean_path_length_4/2")

array([-2, -2, -2])

In [5]:
def retrieve_settings(path):
    """
    This function retrieves all the settings for a certain measurement.

    Input:
     - path: inner path to the folder containing a result (consider to address a path/job.out)

    Output: 
     - A numpy array of settings in format np.int64
    """
    options = path.split("/")
    settings = np.array([], dtype=np.int64)
    for set in options:
        if not str.isalpha(set.split("_")[-1]):
            settings = np.append(settings, np.int64(set.split("_")[-1]))
            
    return settings

In [6]:
# Test

retrieve_settings("results/logs/sequential/seq_length_25/patterns_15/mean_path_length_4")
[os.path.isdir("results/logs/sequential" + "/" + x) for x in os.listdir("results/logs/sequential")]

[True, False, True, True]

In [7]:
# These indexes allow us to rapidly swap the number of parameter in consideration during generation.

sequential_idx = {"seq_length": 0, "patterns": 1, "mean_path_length": 2, "test_n": 3}
pthreads_idx = {"threads": 0, "seq_length": 1, "patterns": 2, "mean_path_length": 3, "test_n": 4}
mpi_idx = {"nodes": 0, "seq_length": 1, "patterns": 2, "mean_path_length": 3, "test_n": 4}

# Dataset creation

In [8]:
def create_dataset(path, dataset, retrieval_f):
    """
    This function recursively explores the results folder and gathers data about the measurements.

    Input:
     - path: the path of the dataset
     - dataset: whatever numpy array of np.uint with the same dimension as the measurement variables

     Output: 
      - The dataset as a numpy array of np.uint. The shape of the array is (measurements, variables),
        where the last 3 variables are the results of the computation
    """
    new_dataset = np.copy(dataset)
    if not os.path.isdir(path):
        return dataset
    elif not np.any([os.path.isdir(path + "/" + x) for x in os.listdir(path)]):
        results = retrieval_f(path)
        settings = retrieve_settings(path)
        row = np.append(settings, results.astype(np.int64)).reshape(1, -1)
        return np.append(new_dataset, row, axis=0)
    else:
        for dir in os.listdir(path):
            new_dataset = create_dataset(path + "/" + dir, new_dataset, retrieval_f)

    return new_dataset

In [9]:
# Testing

sequential = create_dataset("results/logs/sequential", np.zeros(shape=(1, len(sequential_idx) + 3), dtype=np.int64), retrieval_f=retrieve_time_seq_pth)[1:,]
create_dataset("results/logs/mpi", np.zeros(shape=(1, len(mpi_idx) + 3), dtype=np.int64), retrieval_f=retrieve_time_mpi)[1:,]

array([[      4,      15,      15, ...,   32776,   42103,   45435],
       [      4,      15,      15, ...,   32776,   42103,   45435],
       [      4,      15,      15, ...,   32776,   42103,   45435],
       ...,
       [      8,      10,      20, ..., 1048576,   20039,    6014],
       [      8,      10,      20, ..., 1048576,   20039,    6014],
       [      8,      10,      20, ..., 1048576,   20039,    6014]])

## Data retrieval


In [10]:
pthreads = create_dataset("results/logs/pthreads", np.zeros(shape=(1, len(pthreads_idx) + 3), dtype=np.int64), retrieval_f=retrieve_time_seq_pth)[1:,]
sequential = create_dataset("results/logs/sequential", np.zeros(shape=(1, len(sequential_idx) + 3), dtype=np.int64), retrieval_f=retrieve_time_seq_pth)[1:, :]
mpi = create_dataset("results/logs/mpi", np.zeros(shape=(1, len(mpi_idx) + 3), dtype=np.int64), retrieval_f=retrieve_time_mpi)[1:,]

In [11]:
sequential[np.lexsort(np.transpose(sequential)[::-1])]

array([[   10,    10,     4, ...,  1147,   368, 39054],
       [   10,    10,     4, ...,  1147,   368, 39054],
       [   10,    10,     4, ...,  1147,   368, 39054],
       ...,
       [   20,    25,    20, ...,    -2,    -2,    -2],
       [   20,    25,    20, ...,    -2,    -2,    -2],
       [   20,    25,    20, ...,    -2,    -2,    -2]])

# Checking correctness of computation

In [12]:
def correctness(path, method_idx, retrieval_f):
    """
    This function checks the correctness of a computation and tests it against sequential's results.

    Input:
     - path: path to the results
    
    Output:
     - self-described
    """
    # Creating the results datasets
    sequential = create_dataset("results/logs/sequential", np.zeros(shape=(1, len(sequential_idx) + 3), dtype=np.int64), retrieve_time_seq_pth)[1:,]
    sequential = sequential[np.lexsort(np.transpose(sequential)[::-1])]
    dataset = create_dataset(path, np.zeros(shape=(1, len(method_idx) + 3), dtype=np.int64), retrieval_f)[1:,]
    dataset = dataset[np.lexsort(np.transpose(dataset)[::-1])]
    total_tests_sequential = sequential.shape[0]
    total_tests_dataset = dataset.shape[0]

    # First check: runs with errors
    errors = dataset[np.where(dataset[:, -3] == -3)]
    if errors.shape[0] == 0:
        print("1. No errors")
    else:
        print(f"1. There where {errors.shape[0]} errors:")
        print(errors[:, :-3])

    # Second checks: runs non - terminated (where sequential terminated)
    errors = np.unique(dataset[np.where(dataset[:, -3] == -2)][:, 1:-3], axis=0)
    seq_errors = np.unique(sequential[np.where(sequential[:, -3] == -2)][:, :-3], axis=0)
    if errors.shape[0] == 0 or np.array_equal(errors, seq_errors):
        print(f"2. All tests terminated on sequential terminated on the given method")
    else:
        print(f"2. {errors.shape[0]} parallel tests did not terminate:")
        print(errors)
        print(f"2. While {seq_errors.shape[0]} sequential tests did not terminate:")
        print(seq_errors)

    # Third check: non - run tests
    non_run = dataset[np.where(dataset[:, -3] == -1)][:, :-3]
    if non_run.shape[0] == 0:
        print(f"3. All tests where run")
    else:
        print(f"3. {non_run.shape[0]} tests have not been run:")
        print(non_run)

    # Fourth check: correctness
    dataset_runs = dataset[np.where(dataset[:, -3] >= 0)]
    #dataset_runs = np.unique(dataset_runs[:, 1:], axis=0)
    for i in range(dataset_runs.shape[0]):
        flag = False
        for j in range(sequential.shape[0]):
            # The same setup has been found
            if np.array_equal(dataset_runs[i, 1:-3], sequential[j, :-3]):
                # The sequential run did not finish, thus it makes no sense to count it as an error
                if np.array_equal(sequential[j, -3:], np.array([-2, -2, -2], dtype=np.int64)):
                    flag = True

                # The sequential did finish, so let's compare the actual values
                if np.array_equal(dataset_runs[i, -3:], sequential[j, -3:]):
                    flag = True
        if not flag:
            print(f"4. Configuration {np.array2string(dataset_runs[i, :-3])} has wrong results")
    print("4. No other errors were found")

In [16]:
correctness("results/logs/mpi_exp", mpi_idx, retrieve_time_mpi)

1. No errors
2. 31 parallel tests did not terminate:
[[20 10  4 10]
 [20 15  4  1]
 [20 15  4  2]
 [20 15  4  3]
 [20 15  4  4]
 [20 15  4  5]
 [20 15  4  6]
 [20 15  4  7]
 [20 15  4  8]
 [20 15  4  9]
 [20 15  4 10]
 [20 20  4  1]
 [20 20  4  2]
 [20 20  4  3]
 [20 20  4  4]
 [20 20  4  5]
 [20 20  4  6]
 [20 20  4  7]
 [20 20  4  8]
 [20 20  4  9]
 [20 20  4 10]
 [20 25  4  1]
 [20 25  4  2]
 [20 25  4  3]
 [20 25  4  4]
 [20 25  4  5]
 [20 25  4  6]
 [20 25  4  7]
 [20 25  4  8]
 [20 25  4  9]
 [20 25  4 10]]
2. While 320 sequential tests did not terminate:
[[10 25  4  1]
 [10 25  4  2]
 [10 25  4  3]
 ...
 [20 25 20  8]
 [20 25 20  9]
 [20 25 20 10]]
3. 1680 tests have not been run:
[[ 2 10 10  4  1]
 [ 2 10 10  4  2]
 [ 2 10 10  4  3]
 ...
 [ 8 20 25 20  8]
 [ 8 20 25 20  9]
 [ 8 20 25 20 10]]
4. No other errors were found
