In [3]:

!pip install urllib3 scikit-learn numpy pandas xgboost matplotlib seaborn scipy


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m


In [6]:
import urllib.request
import json
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error


cpu_time_url = "https://raw.githubusercontent.com/sustainable-computing-io/kepler-metal-ci/refs/heads/main/docs/validation/2024-09-25/validator-v0.7.11-244-g99b1f94a/artifacts/vm-kepler_process_bpf_cpu_time_ms_total--absolute.json"
# joules_url = "https://raw.githubusercontent.com/sustainable-computing-io/kepler-metal-ci/refs/heads/main/docs/validation/2024-09-25/validator-v0.7.11-244-g99b1f94a/artifacts/vm-kepler_node_core_joules_total--absolute.json"
joules_url = "https://raw.githubusercontent.com/sustainable-computing-io/kepler-metal-ci/refs/heads/main/docs/validation/2024-09-25/validator-v0.7.11-244-g99b1f94a/artifacts/metal-kepler_vm_package_joules_total--absolute.json"

with urllib.request.urlopen(cpu_time_url) as url:
    cpu_data = json.load(url)

with urllib.request.urlopen(joules_url) as url:
    joules_data = json.load(url)

cpu_timestamps = cpu_data['timestamps']
cpu_values = cpu_data['values']

joules_timestamps = joules_data['timestamps']
joules_values = joules_data['values']

cpu_df = pd.DataFrame({
    'timestamps': cpu_timestamps,
    'cpu_values': cpu_values
})

joules_df = pd.DataFrame({
    'timestamps': joules_timestamps,
    'joules_values': joules_values
})

merged_df = pd.merge(cpu_df, joules_df, on='timestamps')

X = merged_df['cpu_values'].values.reshape(-1, 1)
y = merged_df['joules_values'].values

# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

import xgboost as xgb
def print_result(model_name, y_test, y_pred):
    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    print(f'{model_name}: r2 = {r2}, mse = {mse}, mape = {mape}')

# XGBoost model
model = xgb.XGBRegressor(objective="reg:linear", random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print_result('XGB', y_test, y_pred)

# Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print_result('LR', y_test, y_pred)

# Curve fitting model
from scipy.optimize import curve_fit

def logistic_model(x, a, b, c):
    return c / (1 + np.exp(-(x - b) / a))

popt, _ = curve_fit(logistic_model, X_train.flatten(), y_train)
y_pred = logistic_model(X_test.flatten(), *popt)
print_result('CurveFit', y_test, y_pred)


XGB: r2 = 0.9292401707454121, mse = 40.663559610788525, mape = 0.15093738883437527
LR: r2 = 0.9051502169892285, mse = 54.50733624090517, mape = 0.34299149711478477
CurveFit: r2 = -0.06262941603439653, mse = 610.6613746568701, mape = 1.6650724546708782


  popt, _ = curve_fit(logistic_model, X_train.flatten(), y_train)
