In [None]:
import pandas as pd
import numpy as np
import json

In [None]:
data_folder = "data/"
filename = "stressng_kepler_query.json"

In [None]:
file = open(data_folder + filename)
data = file.read()
data = json.loads(data)

In [None]:
result = {}

for metric in data:
    temp = []
    if metric == 'kepler_container_joules_total':
        for container_metric in data[metric]:
            tmp = [[container_metric['metric']['container_id'], container_metric['metric']['instance'], values[0], values[1]] for values in container_metric['values']]
            tmp_df = pd.DataFrame(tmp, columns =['container_id', 'node', 'time', 'value'])
            temp.append(tmp_df)
    elif metric == 'node_cpu_seconds_total':
        for node_metric in data[metric]:
            if node_metric['metric']['mode'] == "idle":
                continue
            tmp = [[node_metric['metric']['instance'], node_metric['metric']['cpu'], values[0], values[1]] for values in node_metric['values']]
            tmp_df = pd.DataFrame(tmp, columns =['node', 'cpu', 'time', 'value'])
            temp.append(tmp_df)
    elif metric == 'kepler_node_core_joules_total':
        for node_metric in data[metric]:
            tmp = [[node_metric['metric']['instance'], node_metric['metric']['package'], values[0], values[1]] for values in node_metric['values']]
            tmp_df = pd.DataFrame(tmp, columns =['node', 'cpu', 'time', 'value'])
            temp.append(tmp_df)
    else:
        continue
    t = pd.concat(temp)
    t['value'] = t['value'].astype(float)
    result[metric] = t.reset_index(drop = True)

In [None]:
result['kepler_container_joules_total']

In [None]:
result['node_cpu_seconds_total']

In [None]:
energy = result['kepler_container_joules_total']
energy_agg = energy.groupby(['node', 'time'], as_index=False)['value'].sum()

In [None]:
energy_agg

In [None]:
cpu_util = result['node_cpu_seconds_total']
cpu_util_agg = cpu_util.groupby(['node', 'time'], as_index=False)['value'].sum()

In [None]:
cpu_util_agg

In [None]:
node_power = result['kepler_node_core_joules_total']
node_power = node_power.groupby(['node', 'time'], as_index=False)['value'].sum()

In [None]:
node_power['node'] = '172.19.0.2:9100'
energy_agg['node'] = '172.19.0.2:9100'

In [None]:
node_power

In [None]:
final = pd.merge(cpu_util_agg, energy_agg, on = ['time', 'node'])
final.rename(columns={'value_x':'util', 'value_y':'energy'}, inplace=True)
final.sort_values('time', inplace=True)

final = pd.merge(final, node_power, on = ['time', 'node'])
final.rename(columns={'value':'power'}, inplace=True)

In [None]:
final

In [None]:
import model

In [None]:
m = model.UtilisationPowerModel()
node_power_models = {}
for node in final['node'].unique():
    node_df = final[final['node'] == node]
    node_df['util'] = node_df['util'].diff()
    node_df['energy'] = node_df['energy'].diff()
    node_df['power'] = node_df['power'].diff()
    node_df.fillna(0, inplace=True)
    node_df = node_df[(node_df[['util']] != 0).all(axis=1)]
    max_util = max(node_df['util'])
    node_df['util'] = node_df['util']/max_util
    node_power_models[node] = m.get_model(node_df[['util', 'power']])

In [None]:
node_power_models