In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from IPython.display import Markdown, display

from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

from experiment_utils import Experiment, ExperimentFilter, Plotting
from experiment_utils.roco2_python import Kernel

In [None]:
def printmd(string):
    display(Markdown(string))

In [None]:
experiments = Experiment.get_experiments()
experiments = list(filter(ExperimentFilter.by_experiment_name('rapl-accuracy'), experiments))
experiment = ExperimentFilter.get_latest(experiments)
experiment

In [None]:
data = pd.read_csv(experiment.path / 'results.csv', sep=',')

In [None]:
data

In [None]:
# @arg rapl_metric The filter of the RAPL metric that will be plotted
# @arg rapl_label The label of the RAPL metric
# @arg plot_name The name of the plot
# @arg disable_fit Set to true to disable adding the fit to the plot
# @arg fit_kernel_filter Function to select which kernels are used for the fit
def plot(rapl_metric: str, rapl_label: str, plot_name: str, disable_fit: bool, fit_kernel_filter):
    power_patterns = {
        'metricq': 'Reference Measurement [W]',
        'perf-freq': 'Average CPU frequency [GHz]',
        'threads': 'Number of threads'
    }

    pattern_x = 'metricq'
    pattern_y = rapl_metric

    printmd("### Number of experiments: {}".format(len(data)))

    label_x = power_patterns[pattern_x]
    label_y = rapl_label
    power_x_columns = data.filter(regex=pattern_x)
    power_y_columns = data.filter(regex=pattern_y)
    printmd("### Using X-{} power metrics: {} [{}]" \
            .format(label_x, pattern_x, ', '.join(power_x_columns.columns.values)))
    printmd("### Using Y-{} power metrics: {} [{}]" \
            .format(label_y, pattern_y, ', '.join(power_y_columns.columns.values)))

    data['kernel'] = data.experiment.apply(Kernel)

    data['power_x'] = power_x_columns.sum(1)
    data['power_y'] = power_y_columns.sum(1)

    printmd('### Kernels:')
    printmd('\n'.join([' * {}'.format(x) for x in data.kernel.unique()]))

    printmd('### Frequencies:')
    printmd('\n'.join([' * {}'.format(x) for x in data['perf-freq'].unique()]))

    printmd('### Thread counts:')
    printmd('\n'.join([' * {}'.format(x) for x in data.threads.unique()]))

    degree = 2

    model = make_pipeline(PolynomialFeatures(degree), Ridge())
    model_data = data.loc[fit_kernel_filter]
    xses = model_data.power_x.to_numpy().reshape(-1, 1)
    yses = model_data.power_y.to_numpy().reshape(-1, 1)
    fit = model.fit(xses, yses)
    names = {
    1: 'linear',
    2: 'quadratic',
    3: 'cubic'
    }
    model_name = names[degree]

    plt.rcParams['figure.figsize'] = 10,6
    plt.rcParams['font.size'] = 13
    plt.rcParams['legend.fontsize'] = plt.rcParams['font.size']

    for kernel, k_group in data.groupby('kernel'):
        plt.plot(k_group.power_x, k_group.power_y, color=kernel.color, marker=kernel.marker, ls='', label=kernel.name)
    if not disable_fit:
        x_min, x_max = plt.gca().get_xlim()
        x_plot = np.linspace(x_min, x_max, 1000)
        fit_y_vals = model.predict(x_plot.reshape(-1,1))
        offset_in_percent = 1
        x_plot_minus_percent = list(map(lambda x: (1 - offset_in_percent / 100) * x, x_plot))
        x_plot_plus_percent = list(map(lambda x: (1 + offset_in_percent / 100) * x, x_plot))
        plt.plot(x_plot_minus_percent, fit_y_vals, ls=':', color='black', label='{} fit - {}%'.format(model_name, offset_in_percent))
        plt.plot(x_plot, fit_y_vals, ls=':', color='black', label='{} fit'.format(model_name))
        plt.plot(x_plot_plus_percent, fit_y_vals, ls=':', color='black', label='{} fit + {}%'.format(model_name, offset_in_percent))
    plt.xlabel(label_x)
    plt.ylabel(label_y)
    # plot.ideal(x_max, ls='-', color='gray', label='identity')

    # plt.gca().set_xlim(xmin=200)
    # plt.gca().set_ylim(ymin=750)
    plt.legend(loc='best', numpoints=1, fontsize=plt.rcParams['font.size'], ncol=2)

    Plotting.savefig(experiment, f'{plot_name}.pdf', annotations_x_offset=0.05, annotations_y_offset=0.0125, annotations_y_spacing=0.025)

    plt.show()

In [None]:
def true_filter(_row):
    return _row.index

def no_memory_filter(row):
    kernel = row['kernel']
    return (kernel == Kernel.idle) | (kernel == Kernel.busywait) | (kernel == Kernel.compute) | (kernel == Kernel.addpd) | (kernel == Kernel.mulpd) | (kernel == Kernel.sqrt)

In [None]:
# The '/' is important, otherwise 'sysfs-powercap-rapl' metrics gets added to the individual submetrics because of the regex
plot(rapl_metric='sysfs-powercap-rapl/', rapl_label='RAPL Package + DRAM [W]', plot_name='rapl-accuracy', disable_fit=False, fit_kernel_filter=true_filter)
plot(rapl_metric='sysfs-powercap-rapl/package-[0-9]+$', rapl_label='RAPL Package [W]', plot_name='rapl-accuracy-package', disable_fit=False, fit_kernel_filter=no_memory_filter)
plot(rapl_metric='sysfs-powercap-rapl/package-[0-9]+-dram', rapl_label='RAPL DRAM [W]', plot_name='rapl-accuracy-dram', disable_fit=True, fit_kernel_filter=true_filter)