# Results of simulation of different fairness policies

These experiments use accumulated deficits to try to ensure that applications always receive their computed allocation of GPU time, even in the event of new jobs coming in and old jobs finishing, by keeping track of the difference between the GPU time the application should have received, and the GPU time the application actually received. Allocation of jobs to GPUs is performed in a round-based fashion, with GPUs instructed to run jobs for a fixed interval of time on all GPUs.

# Import statements

In [None]:
# Imports for plotting.
from matplotlib import pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
from matplotlib.ticker import MultipleLocator
from pylab import *
import seaborn as sns
from matplotlib import rc
rc('text',
   usetex=True)
sns.set_style('ticks')
font = {
    'font.family':'Times New Roman',
    'font.weight': 200,
    'font.size': 10,
}
sns.set_style(font)
flatui = ['#002A5E', '#FD151B', '#8EBA42', '#348ABD', '#988ED5', '#BDB76B', '#8EBA42', '#FFB5B8']
sns.set_palette(flatui)
paper_rc = {
    'lines.linewidth': 3,
    'lines.markersize': 10,
    'mathtext.fontset': 'custom',
    'mathtext.rm': 'Times New Roman',
    'mathtext.bf': 'Times New Roman:bold',
}
sns.set_context("paper", font_scale=2,  rc=paper_rc)
current_palette = sns.color_palette()

In [None]:
# Other imports.
import os
import pickle
import re

In [None]:
LOAD_FROM_PICKLED_DATA = False

In [None]:
reading_f = None
writing_f = None
if LOAD_FROM_PICKLED_DATA:
    reading_f = open("fairness_with_priorities/data.pickle", 'rb')
else:
    writing_f = open("fairness_with_priorities/data.pickle", 'wb')

# Get logfile paths

In [None]:
def get_logfile_paths_helper(directory_name):
    logfile_paths = []
    for root, _, file_names in os.walk(directory_name):
        if len(file_names) > 0:
            logfile_paths.extend(
                [os.path.join(root, file_name)
                 for file_name in file_names])
    return logfile_paths

def get_logfile_paths(directory_name):
    logfile_paths = []
    for logfile_path in get_logfile_paths_helper(directory_name):
        m = re.match(
            r'.*v100=(\d+)\.p100=(\d+)\.k80=(\d+)/(.*)/seed=(\d+)/'
             'lambda=(\d+\.\d+)\.log', logfile_path)
        if m is None: continue
        v100s = int(m.group(1))
        p100s = int(m.group(2))
        k80s = int(m.group(3))
        policy = m.group(4)
        seed = int(m.group(5))
        l = float(m.group(6))
        logfile_paths.append((v100s, p100s, k80s, policy, seed,
                              l, logfile_path))
    return logfile_paths

In [None]:
labels = {"fifo": "FIFO",
          "fifo_perf": "FIFO+perf",
          "fifo_packed": "FIFO+perf+packed",
          "max_min_fairness": "LAS",
          "max_min_fairness_perf": "LAS+perf",
          "max_min_fairness_packed": "LAS+perf+packed"}

# Plotting functions

In [None]:
def prune(logfile_paths, v100s, p100s, k80s, policy, seed=None):
    if seed is None:
        return sorted([(x[5], x[6], x[4]) for x in logfile_paths
                       if x[0] == v100s and x[1] == p100s and
                       x[2] == k80s and x[3] == policy])
    else:
        return sorted([(x[5], x[6]) for x in logfile_paths
                       if x[0] == v100s and x[1] == p100s and
                       x[2] == k80s and x[3] == policy and
                       x[4] == seed])

In [None]:
def plot_metric_vs_inverse_lambda(logfile_paths,
                                  v100s, p100s, k80s,
                                  policies, metric_fns,
                                  metric_fn_labels,
                                  metric_label,
                                  xmin=0,
                                  xmax=None,
                                  ymin=0,
                                  ymax=None,
                                  output_filename=None):
    plt.figure(figsize=(8, 3.5))
    ax = plt.subplot2grid((1, 1), (0, 0), colspan=1)

    if reading_f is not None:
        data = pickle.load(reading_f)
    else:
        data = {"input_job_rate": [], "metric": [], "seed": [],
                "policy": []}
        for policy in policies:
            relevant_logfile_paths = list(reversed(prune(
                logfile_paths, v100s, p100s, k80s, policy)))
            for metric_fn_label, metric_fn in zip(metric_fn_labels, metric_fns):
                lambdas = [x[0] for x in relevant_logfile_paths]
                input_job_rates = [3600.0 / x for x in lambdas]
                metrics = [metric_fn(x[1]) for x in relevant_logfile_paths]
                seeds = [x[2] for x in relevant_logfile_paths]
                policies = [labels[policy] + " (%s)" % metric_fn_label
                            for i in range(len(metrics))]

                import pandas as pd
                data["input_job_rate"] += input_job_rates
                data["metric"] += metrics
                data["seed"] += seeds
                data["policy"] += policies
        pickle.dump(data, writing_f)

    sns.lineplot(x='input_job_rate', y='metric', style='policy',
                 hue='policy',
                 data=data, ci='sd',
                 markers=True)

    ax.set_xlabel("Input job rate (jobs/hr)")
    ax.set_ylabel(metric_label)
    ax.set_xlim([xmin, xmax])
    ax.set_ylim([ymin, ymax])
    sns.despine()
    
    plt.legend()
    
    if output_filename is not None:
        with PdfPages(output_filename) as pdf:
            pdf.savefig(bbox_inches='tight')
    
    plt.show()

In [None]:
def average_jct_low_priority_fn(logfile_path, min_job_id=None,
                                max_job_id=None):
    job_completion_times = []
    with open(logfile_path, 'rb') as f:
        f.seek(-8192, os.SEEK_END)
        text = f.read().decode('utf-8')
        lines = text.split('\n')
        for line in lines[-5:]:
            m = re.match(r'Average job completion time \(low priority\): (\d+\.\d+) seconds', line)
            if m is not None:
                return float(m.group(1)) / 3600
    return None

In [None]:
def average_jct_high_priority_fn(logfile_path, min_job_id=None,
                                 max_job_id=None):
    job_completion_times = []
    with open(logfile_path, 'rb') as f:
        f.seek(-8192, os.SEEK_END)
        text = f.read().decode('utf-8')
        lines = text.split('\n')
        for line in lines[-5:]:
            m = re.match(r'Average job completion time \(high priority\): (\d+\.\d+) seconds', line)
            if m is not None:
                return float(m.group(1)) / 3600
    return None

# Plot average JCT versus input job rate for single-GPU traces

In [None]:
logfile_paths = sorted(get_logfile_paths(
    "/lfs/1/deepak/gpusched/scheduler/logs/single_gpu_trace_fairness_priorities_final/"))

In [None]:
print("V100s=64, P100s=0, K80s=0; Jobs 4000-5000")
plot_metric_vs_inverse_lambda(
    logfile_paths,
    v100s=64, p100s=0, k80s=0,
    policies=["max_min_fairness", "max_min_fairness_perf"],
    metric_fns=[lambda x: average_jct_low_priority_fn(
                    x, min_job_id=4000, max_job_id=5000),
                lambda x: average_jct_high_priority_fn(
                    x, min_job_id=4000, max_job_id=5000)],
    metric_fn_labels=["low", "high"],
    metric_label="Average JCT\n(hours)",
    xmax=3.1,
    ymax=300.0,
    output_filename="fairness_with_priorities/single_gpu_mmf_v100=64_p100=0_k80=0.pdf"
)

In [None]:
print("V100s=36, P100s=36, K80s=36; Jobs 4000-5000")
plot_metric_vs_inverse_lambda(
    logfile_paths,
    v100s=36, p100s=36, k80s=36,
    policies=["max_min_fairness", "max_min_fairness_perf"],
    metric_fns=[lambda x: average_jct_low_priority_fn(
                    x, min_job_id=4000, max_job_id=5000),
                lambda x: average_jct_high_priority_fn(
                    x, min_job_id=4000, max_job_id=5000)],
    metric_fn_labels=["low", "high"],
    metric_label="Average JCT\n(hours)",
    xmax=3.1,
    ymax=300.0,
    output_filename="fairness_with_priorities/single_gpu_mmf_v100=36_p100=36_k80=36.pdf"
)

In [None]:
print("V100s=48, P100s=32, K80s=16; Jobs 4000-5000")
plot_metric_vs_inverse_lambda(
    logfile_paths,
    v100s=48, p100s=32, k80s=16,
    policies=["max_min_fairness", "max_min_fairness_perf"],
    metric_fns=[lambda x: average_jct_low_priority_fn(
                    x, min_job_id=4000, max_job_id=5000),
                lambda x: average_jct_high_priority_fn(
                    x, min_job_id=4000, max_job_id=5000)],
    metric_fn_labels=["low", "high"],
    metric_label="Average JCT\n(hours)",
    xmax=3.1,
    ymax=300.0,
    output_filename="fairness_with_priorities/single_gpu_mmf_v100=48_p100=32_k80=16.pdf"
)

# Plot average JCT versus input job rate for multi-GPU traces

In [None]:
multigpu_logfile_paths = sorted(get_logfile_paths(
    "/lfs/1/deepak/gpusched/scheduler/logs/multi_gpu_trace_fairness_priorities_final_v2/"))

In [None]:
print("V100s=64, P100s=0, K80s=0; Jobs 4000-5000")
plot_metric_vs_inverse_lambda(
    multigpu_logfile_paths,
    v100s=64, p100s=0, k80s=0,
    policies=["max_min_fairness", "max_min_fairness_perf"],
    metric_fns=[lambda x: average_jct_low_priority_fn(
                    x, min_job_id=4000, max_job_id=5000),
                lambda x: average_jct_high_priority_fn(
                    x, min_job_id=4000, max_job_id=5000)],
    metric_fn_labels=["low", "high"],
    metric_label="Average JCT\n(hours)",
    xmax=1.6,
    ymax=300.0,
    output_filename="fairness_with_priorities/multi_gpu_mmf_v100=64_p100=0_k80=0.pdf"
)

In [None]:
print("V100s=36, P100s=36, K80s=36; Jobs 4000-5000")
plot_metric_vs_inverse_lambda(
    multigpu_logfile_paths,
    v100s=36, p100s=36, k80s=36,
    policies=["max_min_fairness", "max_min_fairness_perf"],
    metric_fns=[lambda x: average_jct_low_priority_fn(
                    x, min_job_id=4000, max_job_id=5000),
                lambda x: average_jct_high_priority_fn(
                    x, min_job_id=4000, max_job_id=5000)],
    metric_fn_labels=["low", "high"],
    metric_label="Average JCT\n(hours)",
    xmax=1.6,
    ymax=300.0,
    output_filename="fairness_with_priorities/multi_gpu_mmf_v100=36_p100=36_k80=36.pdf"
)

In [None]:
print("V100s=48, P100s=32, K80s=16; Jobs 4000-5000")
plot_metric_vs_inverse_lambda(
    multigpu_logfile_paths,
    v100s=48, p100s=32, k80s=16,
    policies=["max_min_fairness", "max_min_fairness_perf"],
    metric_fns=[lambda x: average_jct_low_priority_fn(
                    x, min_job_id=4000, max_job_id=5000),
                lambda x: average_jct_high_priority_fn(
                    x, min_job_id=4000, max_job_id=5000)],
    metric_fn_labels=["low", "high"],
    metric_label="Average JCT\n(hours)",
    xmax=1.6,
    ymax=300.0,
    output_filename="fairness_with_priorities/multi_gpu_mmf_v100=48_p100=32_k80=16.pdf"
)

In [None]:
if reading_f is not None:
    reading_f.close()
if writing_f is not None:
    writing_f.close()