Import necessary packages

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

Load the Data

In [None]:
# Load Scala benchmark data
scala_idle_data = pd.read_csv('../data/20250309091438_ping_pong_akka_actor_benchmark_idle_power.csv')
scala_bench_data = pd.read_csv('../data/20250309091449_ping_pong_akka_actor_benchmark_power_metrics.csv')

# Load Erlang benchmark data
erlang_idle_data = pd.read_csv('../data/20250309091714_ping_pong_benchmark_idle_power.csv')
erlang_bench_data = pd.read_csv('../data/20250309091725_ping_pong_benchmark_power_metrics.csv')

Preview the Data

In [None]:
print("Scala Idle Data Sample:")
print(scala_idle_data.shape)
scala_idle_data.head()

In [None]:
print("Scala Benchmark Data Sample:")
print(scala_bench_data.shape)
scala_bench_data.head()

In [None]:
print("Erlang Idle Data Sample:")
print(erlang_idle_data.shape)
erlang_idle_data.head()

In [None]:
print("Erlang Benchmark Data Sample:")
print(erlang_bench_data.shape)
erlang_bench_data.head()

Cleaning the Data


In [None]:
def clean_data(df):
    df_copy = df.copy() 

    # Convert Timestamp to datetime format (extract date-time pattern)
    df_copy['Timestamp'] = pd.to_datetime(
        df['Timestamp'].str.extract(r'(\w{3} \w{3} \d{1,2} \d{2}:\d{2}:\d{2} \d{4})')[0], 
        errors='coerce'
    )

    # Extract elapsed time in milliseconds
    df_copy['Time Elapsed (ms)'] = pd.to_numeric(
        df['Timestamp'].str.extract(r'\((\d+\.\d+)ms elapsed\)')[0], 
        errors='coerce'
    )
    
    # Extract numeric temperature values
    df_copy['CPU Temp(C)'] = df['CPU Temp(C)'].str.extract(r'(\d+\.\d+|\d+)').astype(float)

    # Calculate Energy columns (Power * Time Elapsed)
    power_columns = ["CPU Core Power(W)", "GT Power(W)", "DRAM Power(W)", "(CPUs+GT+SA) Power(W)"]
    for column in power_columns:
        energy_column = column.replace('Power(W)', 'Energy(J)')
        df_copy[energy_column] = df_copy[column] * df_copy['Time Elapsed (ms)'] / 1000  # Convert ms to seconds


    return df_copy

In [None]:
scala_idle_data_clean = clean_data(scala_idle_data)
scala_bench_data_clean = clean_data(scala_bench_data)
erlang_idle_data_clean = clean_data(erlang_idle_data)
erlang_bench_data_clean = clean_data(erlang_bench_data)

Verify Cleaned Data

In [None]:
print("Scala Idle Cleaned Data Sample:")
print(scala_idle_data_clean.shape)
scala_idle_data_clean.head()


In [None]:
print("Scala Benchmark Cleaned Data Sample:")
print(scala_bench_data_clean.shape)
scala_bench_data_clean.head()


In [None]:
print("Erlang Idle Cleaned Data Sample:")
print(erlang_idle_data_clean.shape)
erlang_idle_data_clean.head()


In [None]:
print("Erlang Benchmark Cleaned Data Sample:")
print(erlang_bench_data_clean.shape)
erlang_bench_data_clean.head()

Handling Outliers


In [None]:
def remove_outliers(df, columns):
    """
    Removes outliers from the specified columns using the IQR method.
    """
    df_clean = df.copy()
    
    for column in columns:
        Q1 = df_clean[column].quantile(0.25)
        Q3 = df_clean[column].quantile(0.75)
        IQR = Q3 - Q1
        
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        
        df_clean = df_clean[(df_clean[column] >= lower_bound) & (df_clean[column] <= upper_bound)]
    
    return df_clean

In [None]:
# Columns of interest for outlier removal
columns_of_interest = [
    "CPU Core Power(W)", "GT Power(W)", "DRAM Power(W)", 
    "(CPUs+GT+SA) Power(W)", "Avg Num Cores Active", "CPU Temp(C)"
]

scala_idle_data_clean_outlier = remove_outliers(scala_idle_data_clean, columns_of_interest)
scala_bench_data_clean_outlier = remove_outliers(scala_bench_data_clean, columns_of_interest)
erlang_idle_data_clean_outlier = remove_outliers(erlang_idle_data_clean, columns_of_interest)
erlang_bench_data_clean_outlier = remove_outliers(erlang_bench_data_clean, columns_of_interest)

In [None]:
print("Scala Idle Cleaned Data After Outlier Removal: ")
print(scala_idle_data_clean_outlier.shape)
scala_idle_data_clean_outlier.head()


In [None]:
print("Scala Benchmark Cleaned Data After Outlier Removal: ")
print(scala_bench_data_clean_outlier.shape)
scala_bench_data_clean_outlier.head()

In [None]:
print("Erlang Idle Cleaned Data After Outlier Removal: ")
print(erlang_idle_data_clean_outlier.shape)
erlang_idle_data_clean_outlier.head()

In [None]:
print("Erlang Benchmark Cleaned Data After Outlier Removal: ")
print(erlang_bench_data_clean_outlier.shape)
erlang_bench_data_clean_outlier.head()

Visualization

In [None]:
# def plot_graphs(idle_df, bench_df, title_prefix):
#     """
#     Creates separate figures for each metric, plotting idle (left) and benchmark (right).
#     """
#     columns_to_plot = [
#         "CPU Core Power(W)", "GT Power(W)", "DRAM Power(W)", 
#         "(CPUs+GT+SA) Power(W)", "Avg Num Cores Active", "CPU Temp(C)"
#     ]
    
#     for column in columns_to_plot:
#         fig, axes = plt.subplots(1, 2, figsize=(12, 5))  # 1 row, 2 columns

#         # Idle (Left Side)
#         axes[0].plot(idle_df["Timestamp"], idle_df[column], color='blue', linestyle='--', label="Idle")
#         axes[0].set_title(f"Idle - {column}", fontsize=12)
#         axes[0].set_xlabel("Time", fontsize=10)
#         axes[0].set_ylabel(column)
#         axes[0].legend()
#         axes[0].grid(True)

#         # Format x-axis for better readability
#         axes[0].xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
#         axes[0].tick_params(axis='x', rotation=45)
          
#         # Benchmark (Right Side)
#         axes[1].plot(bench_df["Timestamp"], bench_df[column], color='red', linestyle='-', label="Benchmark")
#         axes[1].set_title(f"Benchmark - {column}")
#         axes[1].set_xlabel("Time")
#         axes[1].set_ylabel(column)
#         axes[1].legend()
#         axes[1].grid(True)

#         # Format x-axis for better readability
#         axes[1].xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
#         axes[1].tick_params(axis='x', rotation=45)

#         # Main figure title
#         fig.suptitle(f"{title_prefix} - {column}", fontsize=14)
        
#         plt.tight_layout(rect=[0, 0, 1, 0.95])  # Adjust layout to fit title
#         plt.show()
#         plt.close(fig)


def plot_graphs(idle_df, bench_df, title_prefix):
    """
    Creates side-by-side subplots for each metric, plotting idle (left) and benchmark (right)
    using Seaborn for a more aesthetically pleasing design.
    """
    columns_to_plot = [
        "CPU Core Power(W)", "GT Power(W)", "DRAM Power(W)", 
        "(CPUs+GT+SA) Power(W)", "Avg Num Cores Active", "CPU Temp(C)"
    ]
    
    # Set Seaborn theme
    sns.set_theme(style="whitegrid", palette="muted")
    
    for column in columns_to_plot:
        # Create side-by-side subplots
        fig, axes = plt.subplots(1, 2, figsize=(12, 5))  # 1 row, 2 columns

        # Plot Idle data on the first subplot
        sns.lineplot(data=idle_df, x="Timestamp", y=column, label="Idle", color='blue', linestyle='--', ax=axes[0], errorbar=None)
        axes[0].set_title(f"Idle - {column}", fontsize=12)
        axes[0].set_xlabel("Time", fontsize=10)
        axes[0].set_ylabel(column, fontsize=10)
        axes[0].legend()
        axes[0].grid(True)

        # Format x-axis for better readability
        axes[0].xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
        axes[0].tick_params(axis='x', rotation=45)

        # Plot Benchmark data on the second subplot
        sns.lineplot(data=bench_df, x="Timestamp", y=column, label="Benchmark", color='red', ax=axes[1], errorbar=None)
        axes[1].set_title(f"Benchmark - {column}", fontsize=12)
        axes[1].set_xlabel("Time", fontsize=10)
        axes[1].set_ylabel(column, fontsize=10)
        axes[1].legend()
        axes[1].grid(True)

        # Format x-axis for better readability
        axes[1].xaxis.set_major_formatter(mdates.DateFormatter('%H:%M:%S'))
        axes[1].tick_params(axis='x', rotation=45)

        # Main figure title
        fig.suptitle(f"{title_prefix} - {column}", fontsize=14)
        
        # Adjust layout to avoid overlap
        plt.tight_layout(rect=[0, 0, 1, 0.95])  # Adjust layout to fit title
        plt.show()


In [None]:
plot_graphs(scala_idle_data_clean_outlier, scala_bench_data_clean_outlier, "Scala Benchmark - Idle vs Active")
plot_graphs(erlang_idle_data_clean_outlier, erlang_bench_data_clean_outlier, "Erlang Benchmark - Idle vs Active")

Compute Average Power Consumption

In [None]:
metrics_columns = ["CPU Core Power(W)", "GT Power(W)", "DRAM Power(W)", "(CPUs+GT+SA) Power(W)","CPU Temp(C)", "Avg Num Cores Active"]

In [None]:
def calculate_average_benchmark_metrics(df, metrics_columns):
    """
    Calculates the average power consumption for the specified columns.
    """
    avg_power = df[metrics_columns].mean()
    return avg_power

scala_idle_avgs = calculate_average_benchmark_metrics(scala_idle_data_clean_outlier, metrics_columns)
erlang_idle_avgs = calculate_average_benchmark_metrics(erlang_idle_data_clean_outlier, metrics_columns)

scala_bench_avgs = calculate_average_benchmark_metrics(scala_bench_data_clean_outlier, metrics_columns)
erlang_bench_avgs = calculate_average_benchmark_metrics(erlang_bench_data_clean_outlier, metrics_columns)

scala_net_avgs = np.maximum(scala_bench_avgs - scala_idle_avgs, 0)
erlang_net_avgs = np.maximum(erlang_bench_avgs - erlang_idle_avgs, 0)


print("Net Average Metrics Comparison:\n")
print("Scala Benchmark:")
print(scala_net_avgs, "\n")
print("Erlang Benchmark:")
print(erlang_net_avgs)

Compute Energy Consumption

In [None]:
energy_columns = ["CPU Core Energy(J)", "GT Energy(J)", "DRAM Energy(J)", "(CPUs+GT+SA) Energy(J)"]

In [None]:
def calculate_net_energy(df, energy_columns):
    """
    Calculates the net energy consumption by subtracting idle energy from benchmark energy.
    """
    avg_energy = df[energy_columns].mean()

    return avg_energy

scala_idle_avg_energy = calculate_average_benchmark_metrics(scala_idle_data_clean_outlier, energy_columns)
erlang_idle_avg_energy = calculate_average_benchmark_metrics(erlang_idle_data_clean_outlier, energy_columns)

scala_bench_avg_energy = calculate_average_benchmark_metrics(scala_bench_data_clean_outlier, energy_columns)
erlang_bench_avg_energy = calculate_average_benchmark_metrics(erlang_bench_data_clean_outlier, energy_columns)

scala_net_avg_energy = np.maximum(scala_bench_avg_energy - scala_idle_avg_energy, 0)
erlang_net_avg_energy = np.maximum(erlang_bench_avg_energy - erlang_idle_avg_energy, 0)


print("Net Average Energy (Joules) Comparison:\n")
print("Scala Benchmark:")
print(scala_net_avg_energy, "\n")
print("Erlang Benchmark:")
print(erlang_net_avg_energy)
