In [2]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import json
import datetime
from sklearn.metrics import mean_squared_error
import numpy as np

# ------------------ EDIT ONLY HERE ------------------------
branch = 'modelServer'
#branch = 'main'
date = '2024-07-30'  # Replace with anything from 2024-07-26
# ----------------------------------------------------------

# Base URL and date
base_url = 'https://github.com/sustainable-computing-io/kepler-metal-ci/tree/{branch}/docs/validation/{date}/'

# JSON file names
json_files = {
    'vm': 'kepler_node_package_joules_total--dynamic.json',
    'metal': 'kepler_vm_package_joules_total--dynamic.json'
}

# Python Code to replace MAPE
def percentage_error(actual, predicted):
    res = np.empty(actual.shape)
    for j in range(actual.shape[0]):
        if actual[j] != 0:
            res[j] = (actual[j] - predicted[j]) / actual[j]
        else:
            res[j] = predicted[j] / np.mean(actual)
    return res

def mean_absolute_percentage_error(y_true, y_pred): 
    return np.mean(np.abs(percentage_error(np.asarray(y_true), np.asarray(y_pred)))) * 100

# Fetch the HTML content for the given date
date_url = base_url.format(branch=branch, date=date)
response = requests.get(date_url)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    # Find the script tag containing the JSON data
    script_tag = soup.find('script', {'type': "application/json", 'data-target': "react-app.embeddedData"})
    if script_tag:
        # Extract the JSON data and parse it
        json_data = json.loads(script_tag.string)

        folder_names = [item['name'] for item in json_data.get('payload', {}).get('tree', {}).get('items', []) if item.get('contentType') == 'directory']

        # Process each folder (report ID)
        for folder_name in folder_names:
            # Construct base URL for the current folder's JSON files (using raw.githubusercontent.com)
            folder_url = f'https://raw.githubusercontent.com/sustainable-computing-io/kepler-metal-ci/{branch}/docs/validation/{date}/{folder_name}/'

            df_metal = None
            df_vm = None

            # Process each known JSON file
            for data_type, json_file in json_files.items():
                json_url = folder_url + json_file
                response = requests.get(json_url)
                if response.status_code == 200:
                    data = response.json()
                    timestamps = data['timestamps']
                    values = data['values']

                    timestamps = [datetime.datetime.fromtimestamp(ts) for ts in timestamps]

                    # Create DataFrame and assign to corresponding variable
                    if data_type == 'metal':
                        df_metal = pd.DataFrame({'Timestamp': timestamps, 'Watts': values})
                    elif data_type == 'vm':
                        df_vm = pd.DataFrame({'Timestamp': timestamps, 'Watts': values})
                else:
                    print(f"Failed to fetch data for {json_file} in folder {folder_name}")

            # Plot the data if both node and VM data were fetched successfully
            if df_metal is not None and df_vm is not None:

                # Calculate the MSE and MAPE values aligning on the timeline
                df_merged = pd.merge(df_metal, df_vm, on='Timestamp', how='inner', suffixes=('_metal', '_vm'))

                # Calculate MSE and MAPE using the merged DataFrame 
                mse = mean_squared_error(df_merged['Watts_metal'], df_merged['Watts_vm'])
                mape = mean_absolute_percentage_error(df_merged['Watts_metal'], df_merged['Watts_vm'])

                fig, ax1 = plt.subplots(figsize=(14, 7))

                # Plot node data on the first y-axis (ax1)
                ax1.plot(df_metal['Timestamp'], df_metal['Watts'], marker='x', color='#024abf', label='metal data')
                ax1.set_xlabel('Timestamp')
                ax1.set_ylabel('Metal [Watts]', color='#024abf')
                ax1.tick_params(axis='y')

                # Create a second y-axis (ax2) sharing the same x-axis
                ax2 = ax1.twinx()

                # Plot VM data on the second y-axis (ax2)
                ax2.plot(df_vm['Timestamp'], df_vm['Watts'], marker='o', color='#ff742e', label='vm data')
                ax2.set_ylabel('VM [Watts]', color='#ff742e')
                ax2.tick_params(axis='y')

                # Title and grid
                plt.title(f'Kepler Metal & VM / {folder_name} / Branch: {branch} / Date: {date}')
                plt.xticks(rotation=45)
                plt.grid(True)

                # Combine legends from both axes
                lines, labels = ax1.get_legend_handles_labels()
                lines2, labels2 = ax2.get_legend_handles_labels()

                ax2.legend(lines + lines2, labels + labels2, loc='upper left')

                textstr = f'MSE: {mse:.2f}\nMAPE: {mape:.2f}%'
                ax1.text(0.98, 0.97, textstr, transform=ax1.transAxes, fontsize=14,
                         verticalalignment='top', horizontalalignment='right',
                         bbox=dict(facecolor='white', alpha=0.5))

                plt.show()

else:
    print(f"Failed to fetch data for date {date}")

ModuleNotFoundError: No module named 'requests'