**Copyright © 2018 University of Stirling**

# Processing Raw Output from Experiments on 3 Objectives (Continued)

In [0]:
from __future__ import absolute_import, division, print_function

import pandas as pd
import numpy as np
from tqdm import tqdm

Here we configure the relative path and filename information for the data to be processed.

In [0]:
DATA_PATH = "/content/antibiotic"
USING_REEVAL = True
FAILURE_RATE_COL = "failurerate_reeval" if USING_REEVAL else "failurerate"
EXPERIMENT = {"title" : "Total Antibiotic vs Failure Rate",
              "series" : ("constrained.pkl", "unconstrained.pkl"),
              "objectives" : (FAILURE_RATE_COL,
                              "totalantibiotic",
                              "maximumconcentration",
                              "actual_len")
             }

CONSTRAINED_LIMIT = 0.01 # In the range [0.0, 1.0]
MAX_FAILURE_RATES = (None, CONSTRAINED_LIMIT)

This section loads the Pandas dataframes from the Pickle (`pkl`) files.

The first few rows of the data from the objectives columns is printed. Other columns omitted.

In [0]:
print(EXPERIMENT['title'])

objectives = list(EXPERIMENT['objectives'])

dataframes = [None] * len(EXPERIMENT['series'])

for index, series in enumerate(EXPERIMENT['series']):

    load_from = DATA_PATH + "/" + series
    df = pd.read_pickle(load_from)

    other_objective = objectives[1]
    df[FAILURE_RATE_COL + "_x100"] = df[FAILURE_RATE_COL] * 100.0

    dataframes[index] = df

    print(df.head(5))
    print("...")

print()

In [0]:
################################################################################
from itertools import permutations

objectives = EXPERIMENT["objectives"]
for index in range(len(EXPERIMENT['series'])):
    df = dataframes[index]
    for x, y in permutations(objectives, 2):
        df.plot.scatter(x=x, y=y)
################################################################################

Due to sampling error. Some candidate solutions may be produced from the contrained optimisation which have failure rate above the constraint. Here we remove this data from the dataframe in memory.

In [0]:
for index, df in enumerate(dataframes):

    max_failure_rate = MAX_FAILURE_RATES[index]

    if max_failure_rate is not None:

        dataframes[index] = df[df[FAILURE_RATE_COL] <= max_failure_rate]

In [0]:
################################################################################
from itertools import permutations

objectives = EXPERIMENT["objectives"]
for index in range(len(EXPERIMENT['series'])):
    df = dataframes[index]
    for x, y in permutations(objectives, 2):
        df.plot.scatter(x=x, y=y)
################################################################################

Since each experiment was run multiple times, we have multiple Pareto fronts for each experiment. These functions are used to find a new Pareto front from the combined data.

In [0]:
# Returns True if a dominates b, or a = b
def dominates_or_equal(a, b, objectives):
    for o in objectives:
        if b[o] < a[o]:
            return False
    return True

# Finds the Pareto front in the given dataframe and returns two new dataframes,
# one with the Pareto-optimal points and one with the dominated points. If
# two or more points are equal, only one of these will be selected for the new
# front.
def find_pareto_front(df, objectives, desc=None):
    pareto_front = {}
    removed_rows = {}
    for i, candidate in tqdm(df.iterrows(), total=len(df), desc=desc):
        indices_to_delete = set()
        for j, existing in pareto_front.items():
            if existing is not None:
                if dominates_or_equal(existing, candidate, objectives):
                    removed_rows[i] = candidate
                    break
                elif dominates_or_equal(candidate, existing, objectives):
                    removed_rows[j] = existing
                    indices_to_delete.add(j)
        else:
            pareto_front[i] = candidate
        for i in indices_to_delete:
            del(pareto_front[i])
    df1 = pd.DataFrame(columns=list(df), dtype=np.float64)
    for index, row in pareto_front.items():
        df1.loc[index] = row
    return df1

 Here we find the combined Pareto front for each experiment and replace the old dataframes.
 
 This operation may take a few minutes.

In [0]:
objectives = list(EXPERIMENT['objectives'])
    
combined_dataframes = [None] * len(EXPERIMENT['series'])

for index, df in enumerate(dataframes):

    old_len = len(df)

    #print("Combining {0} data points".format(old_len))

    combined_dataframes[index] = find_pareto_front(df, objectives, \
        desc="Combining dataframe {0} of {1}".format(index+1, len(dataframes)))

    new_len = len(combined_dataframes[index])

    print(" {0} pareto optimal points found".format(new_len))

In [0]:
################################################################################
from itertools import permutations

objectives = EXPERIMENT["objectives"]
for index in range(len(EXPERIMENT['series'])):
    df = combined_dataframes[index]
    for x, y in permutations(objectives, 2):
        df.plot.scatter(x=x, y=y)
################################################################################

In [0]:
for index, series in enumerate(EXPERIMENT['series']):

    load_from = DATA_PATH + "/" + series
    
    save_to = load_from.replace('.pkl', '-combined.pkl')
    
    df = combined_dataframes[index]
    
    df.to_pickle(save_to)
    
    print('Saved pickle file "{0}"'.format(save_to))