# Interpret the results obtained from each of the experiments

The results from the experiments involve interpreting and plotting several quantities
- CPU Utilization of the slaves during the experiments
- IO Wait of the slaves during the experiments
- Accuracy of the final model after 10 epochs
- Response time of the model after 10 epochs

(Possible other result to see)
- Throughput of the model

In [34]:
import math
import numpy as np
import pandas as pd
from collections import defaultdict
import glob
import os
import pickle

# For the ANOVA test
import researchpy as rp
import statsmodels.api as sm
from statsmodels.formula.api import ols


## Extract from the log files the Accuracy and the Response Time

The files are in the folder from the master server, and we get a final line in the files with
the total wall clock time and the final accuracy

The folder structure taken into consideration is the following, from the root folder for the set
of experiments in question (2k or full) we get 4 folders, 1 for the master and 1 for each slave,
and inside these folders we get 1 folder for each of the replications. Inside we should find the
pickle files and the log files in case of the master



In [45]:
# Read the folder os the master and search for the accuracy to build the dictionary
# Right now for testing it just has one replication

# This is the path we should change to either analyze the 2k or the full factorial
path_root_master = './experiments/master/'

# get the log files
logs = glob.glob(os.path.join(path_root_master, '*.log'))

# Extract the accuracy and the time
results = defaultdict(dict)
# Just take the final valuie for the ANOVA test
res_final = defaultdict(dict)


print(f'Extracting the data from {len(logs)} files')
for log in logs:
    acc = []
    with open(log, 'r') as f:
        for line in f:
            if 'Top1Accuracy is Accuracy' in line:
                _accuracy = line.strip().split(']')[-1].split(',')[-1].split(':')[-1].replace(')', '')
                acc.append(float(_accuracy.strip()))
                if len(acc) == 10:
                    # Then save the time here
                    time = float(line.strip().split(']')[-2].split('[')[-1].split(' ')[-1].replace('s', ''))

    # Add the results to the dictionary
    variables = log.split('-')[1:]
    name = '-'.join(variables).replace('.log', '')
    results[name]['accuracy'] = acc
    results[name]['time'] = time

    # Fill in the anova dictionary too
    res_final[name]['time'] = time
    res_final[name]['accuracy'] = acc[-1]

df = pd.DataFrame(results)

# We should get the last accuracy to run ANOVA but to plot this is better
df


Extracting the data from 8 files


Unnamed: 0,cpu1-batch64-njobs1,cpu8-batch64-njobs1,cpu1-batch512-njobs1,cpu8-batch512-njobs1,cpu1-batch64-njobs5,cpu8-batch64-njobs5,cpu1-batch512-njobs5,cpu8-batch512-njobs5
accuracy,"[0.8599, 0.903, 0.9259, 0.9361, 0.9424, 0.9481...","[0.8637, 0.9091, 0.9246, 0.9361, 0.9429, 0.948...","[0.5593, 0.676, 0.7608, 0.7994, 0.821, 0.8393,...","[0.4959, 0.6507, 0.7305, 0.7918, 0.8258, 0.849...","[0.8599, 0.8986, 0.9168, 0.9307, 0.9397, 0.946...","[0.888, 0.9217, 0.9337, 0.9402, 0.9459, 0.9498...","[0.4623, 0.6467, 0.7415, 0.7884, 0.8222, 0.846...","[0.57, 0.68, 0.7516, 0.8102, 0.8397, 0.8587, 0..."
time,494.061,508.238,178.125,89.6031,552.378,905.821,187.054,98.4803


# Extract the CPU and IO wait from the pickle files

In this case we need the slave folders (we will check that the folder contains the word slave to analyze the files)
and we will extract the pickle files from where we will get the CPU load and the IO wait percentage

There is a thing to take in mind. The experiment runs for a default of 5 minutes, so if the experiment lasted
less than 300 seconds, which we can get from the results dictionary from before, we should trim the vectors taking
into account that each measurement is taken every 2 seconds so it just comprises that experiment and doesn't
introduce noise from the next

In [37]:


root_folder_slaves = './experiments/'

dirs = os.listdir(root_folder_slaves)
slave_dirs = [f for f in dirs if 'slave' in f]


loads = defaultdict(dict)
# The shape of the results is
# name of the file:
#     - slave 1:
#         - cpu: list
#         - iowait: list
#     - slave 2:
#         ...

# Do the same in all the folders
for folder in slave_dirs:
    print('Processing files for the', folder)
    # get just the pickle files
    files = glob.glob(os.path.join(root_folder_slaves, folder, '*.pkl'))

    # Extract the dictionary from each file
    for f in files:

        # Get the name of the experiment to index the other dict
        variables = f.split('-')[1:]
        exp_name = '-'.join(variables).replace('.pkl', '')
        # print(exp_name)

        with open(f, 'rb') as pickle_file:
            metrics = pickle.load(pickle_file)

        # Now we need to check to just get the metrics concerning the experiment
        # And not the following. We leave a margin of 10 seconds or 5 list positions just in case
        # to not cut too tightly
        exp_length = results[exp_name]['time']
        if exp_length < 300:
            max_list_length = math.ceil(exp_length/2) + 5
            print(f'Exp length was {exp_length}, so max list positions are {max_list_length}')
            if max_list_length < len(metrics['cpu']):
                loads[exp_name][folder] = dict()
                loads[exp_name][folder]['cpu'] = metrics['cpu'][:max_list_length]
                loads[exp_name][folder]['iowait'] = metrics['iowait'][:max_list_length]
                continue
        loads[exp_name][folder] = metrics


Processing files for the slave1
Exp length was 178.124531698, so max list positions are 95
Exp length was 89.60311073, so max list positions are 50
Exp length was 187.05435665, so max list positions are 99
Exp length was 98.48028244, so max list positions are 55
Processing files for the slave2
Exp length was 178.124531698, so max list positions are 95
Exp length was 89.60311073, so max list positions are 50
Exp length was 187.05435665, so max list positions are 99
Exp length was 98.48028244, so max list positions are 55
Processing files for the slave3
Exp length was 178.124531698, so max list positions are 95
Exp length was 89.60311073, so max list positions are 50
Exp length was 187.05435665, so max list positions are 99
Exp length was 98.48028244, so max list positions are 55


## Plot the loads for each of the slaves for each of the experiments

In some experiments some of the slaves have zero load cause they are not used, in those cases
we should filter out the lists whose average is too close to zero. In other cases, we can just average the
load among all of the slaves.

In [43]:
results.items()


dict_items([('cpu1-batch64-njobs1', {'accuracy': [0.8599, 0.903, 0.9259, 0.9361, 0.9424, 0.9481, 0.9511, 0.9548, 0.9572, 0.9588], 'time': 494.060797193}), ('cpu8-batch64-njobs1', {'accuracy': [0.8637, 0.9091, 0.9246, 0.9361, 0.9429, 0.9484, 0.9524, 0.955, 0.9578, 0.9601], 'time': 508.23787919}), ('cpu1-batch512-njobs1', {'accuracy': [0.5593, 0.676, 0.7608, 0.7994, 0.821, 0.8393, 0.8569, 0.8681, 0.8772, 0.8843], 'time': 178.124531698}), ('cpu8-batch512-njobs1', {'accuracy': [0.4959, 0.6507, 0.7305, 0.7918, 0.8258, 0.8494, 0.864, 0.8735, 0.884, 0.8903], 'time': 89.60311073}), ('cpu1-batch64-njobs5', {'accuracy': [0.8599, 0.8986, 0.9168, 0.9307, 0.9397, 0.9466, 0.9517, 0.9561, 0.9584, 0.9612, 0.9648], 'time': 552.377511198}), ('cpu8-batch64-njobs5', {'accuracy': [0.888, 0.9217, 0.9337, 0.9402, 0.9459, 0.9498, 0.9537, 0.9564, 0.9577, 0.9596], 'time': 905.821335593}), ('cpu1-batch512-njobs5', {'accuracy': [0.4623, 0.6467, 0.7415, 0.7884, 0.8222, 0.8468, 0.8634, 0.8751, 0.8839, 0.8908], 'tim

## Run the ANOVA test with all of the results


In [46]:



# Get the last accuracy instead of the list for the accuracy of each of the experiments

df = pd.DataFrame(res_final)

df

Unnamed: 0,cpu1-batch64-njobs1,cpu8-batch64-njobs1,cpu1-batch512-njobs1,cpu8-batch512-njobs1,cpu1-batch64-njobs5,cpu8-batch64-njobs5,cpu1-batch512-njobs5,cpu8-batch512-njobs5
time,494.060797,508.237879,178.124532,89.603111,552.377511,905.821336,187.054357,98.480282
accuracy,0.9588,0.9601,0.8843,0.8903,0.9648,0.9596,0.8908,0.8934
