**Copyright © 2018 University of Stirling**

# Processing Raw Output from Experiments on 2 Objectives

The experiments in jMetal are configured to output the Pareto fronts from each run (30 in our experiments) to different subfolders saved as `FUN.tsv` and `VAR.tsv` files. The re-evaluation data is saved in the same folders as `FUN-reevaled.tsv`. These files we refer to as the raw output from the optimisation.

Minimal processing is done in this script. Each data point from the raw output is parsed and stored as a row in a Pandas dataframe object. There is one dataframe per experiment (4 experiments) where an experiment refers to a configuration of the optimiser.

Each dataframe is saved in the Pickle file format (`pkl`) to the specified outout directory. This script is only used to process the raw output and is not needed if the data are already provided in the Pickle file format.

The accompanying script `plotting-two-objectives.ipynb` loads these dataframes from the output directory to plot the results.

In [0]:
from __future__ import absolute_import, division, print_function

import numpy as np
import pandas as pd
import os

Here we configure the relative path and filename information for the data to be processed.

In [0]:
# Location of raw data files.
DATA_PATH = "/content/{experiment}.{runid}/"
FUNC_FILE_NAME = "FUN.tsv"
VAR_FILE_NAME = "VAR.tsv"
REEVAL_FILE_NAME = "FUN-reevaled.tsv"

# If false, reevaluation data is ignored and origonal data is used as output by
# the origonal runs of the experiment. Note that plotting of reevaluation data
# may be turned off independantly in the plotting script even if included here.
USING_REEVAL = True

# Number of times the experiment is run.
# (0, NUM_RUNS-1) replaces {runid} in the data path.
NUM_RUNS = 30

# Number of variables.
NUM_VARIABLES = 10

# Data formatting.
COL_SEPERATOR = ' '

# Output folder (will be created if does not exist).
OUTPUT_PATH = "/content/antibiotic"

# Each entry will produce an output Pandas dataframe to the requested file.
# Key replaces {experiment} in the data path.
EXPERIMENTS = {"condor2d/results.totalantibiotic.constrained"
                   : {"save-to" : "totalantibiotic-constrained.pkl",
                      "objectives" : ("failurerate",
                                      "totalantibiotic")
                     },
               "condor2d/results.totalantibiotic.unconstrained"
                   : {"save-to" : "totalantibiotic-unconstrained.pkl",
                      "objectives" : ("failurerate",
                                      "totalantibiotic")
                     },
               "condor2d/results.maximumconcentration.constrained"
                   : {"save-to" : "maximumconcentration-constrained.pkl",
                      "objectives" : ("failurerate",
                                      "maximumconcentration")
                     },
               "condor2d/results.maximumconcentration.unconstrained"
                   : {"save-to" : "maximumconcentration-unconstrained.pkl",
                      "objectives" : ("failurerate",
                                      "maximumconcentration")
                     }
              }

The following helper functions are used for processing the raw output from experiments.

In [0]:
# Parses a line from the raw FUN file,
# e.g. '0.0 150.0 \n' is read as [0.0, 150.0]
def parse_function_file_line(fun_line):
    global COL_SEPERATOR
    return [np.float64(element) \
            for element in fun_line.strip("\n\r ").split(COL_SEPERATOR)]

# Parses a line from the raw VAR file,
# e.g. '58 36 18 29 9 0 0 0 0 0 \n' is read as [58 36 18 29 9 0 0 0 0 0]
def parse_variable_file_line(var_line):
    global COL_SEPERATOR
    return [np.int32(element) \
            for element in var_line.strip("\n\r ").split(COL_SEPERATOR)]

# Parses a line from the raw FUN-reevaled file,
# e.g. '0.011360000000000037 \n' is read as 0.011360000000000037
def parse_reevaluation_file_line(reeval_line):
    return np.float64(reeval_line.strip("\n\r "))

This section loads the raw data and stores all data points in Pandas dataframes.

In [0]:
for experiment in EXPERIMENTS:
    
    # Get the experiment objects to set the column headings
    objectives = EXPERIMENTS[experiment]["objectives"]

    experiment_path = DATA_PATH.replace("{experiment}", experiment)

    # Creates a dataframe for storing all ouput candidates
    df = pd.DataFrame()
    df['runid'] = pd.Series(dtype=np.int32)
    for i in range(NUM_VARIABLES):
        df['x' + str(i)] = pd.Series(dtype=np.int32)
    for objective in objectives:
        df[objective] = pd.Series(dtype=np.float64)
    if USING_REEVAL:
        df["failurerate_reeval"] = pd.Series(dtype=np.float64)
    row_index = 0

    # Loops over each run of this experiment (0 to NUM_RUNS-1)
    for runid in range(NUM_RUNS):

        # Using the helper methods above, processes the raw output from the tsv
        # files into the data frame
        run_path = experiment_path.replace("{runid}", str(runid))
        if USING_REEVAL:
            with open(run_path + FUNC_FILE_NAME) as fun, \
                 open(run_path + VAR_FILE_NAME) as var, \
                 open(run_path + REEVAL_FILE_NAME) as reeval:
                for fun_line, var_line, reeval_line in zip(fun, var, reeval):
                    func_values = parse_function_file_line(fun_line)
                    var_values = parse_variable_file_line(var_line)
                    reeval = parse_reevaluation_file_line(reeval_line)
                    df.loc[row_index] \
                        = [runid] + var_values + func_values + [reeval]
                    row_index += 1
        else:
            with open(run_path + FUNC_FILE_NAME) as fun, \
                 open(run_path + VAR_FILE_NAME) as var:
                for fun_line, var_line in zip(fun, var):
                    func_values = parse_function_file_line(fun_line)
                    var_values = parse_variable_file_line(var_line)
                    df.loc[row_index] \
                        = [runid] + var_values + func_values
                    row_index += 1

    # stores the data frame
    EXPERIMENTS[experiment]['dataframe'] = df
    
    # prints the number of solutions in the experiment
    print('Loaded {0} candidate solutions from experiment "{1}"'.format(
        len(df), experiment))

The data is saved in pickle format to the designated output directory:

In [0]:
if not os.path.isdir(OUTPUT_PATH):
    os.mkdir(OUTPUT_PATH)

for experiment in EXPERIMENTS:
    objectives = EXPERIMENTS[experiment]["objectives"]
    save_to = EXPERIMENTS[experiment]["save-to"]
    df = EXPERIMENTS[experiment]['dataframe']
    file_name = "{0}/{1}".format(OUTPUT_PATH, save_to)
    df.to_pickle(file_name)
    print('Saved pickle file "{0}"'.format(file_name))