# Experiments analysis

In [None]:
import msgpack
import edge_sim_py
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.ticker as mtick

## Base functions

### Latency

In [None]:
def get_latency_data():
    user_data = []

    for log in logs:
        simulation_data = {
            "dataset": log,
            "mean": 0,
            "median": 0,
            "p95": 0
        }

        # Opening file
        user_msgpack_file = f"{base_dir}/algorithm={log[0]};dataset={log[1]};seed={seed}/User.msgpack"
        user_file = open(user_msgpack_file, "rb")
        user_msgpack = msgpack.load(user_file)
        user_df = pd.DataFrame(user_msgpack)

        # Collecting information
        latency = user_df[["Object", "Delays"]]

        # Persisting information
        simulation_data["mean"] = latency["Delays"].mean()
        simulation_data["median"] = latency["Delays"].median()
        simulation_data["p95"] = latency["Delays"].quantile(0.95)

        # Closing file
        user_file.close()

        # Appending data
        user_data.append(simulation_data)

    return pd.DataFrame(user_data)

### Provisioning Time

In [None]:
def get_provisioning_time_data(step):
    service_data = []

    for log in logs:
        simulation_data = {
            "dataset": log,
            "mean (total)": 0,
            "mean (w/o cache)": 0,
            "median (total)": 0,
            "median (w/o cache)": 0,
            "p95 (total)": 0,
            "p95 (w/o cache)": 0,
        }

        # Opening file
        service_msgpack_file = f"{base_dir}/algorithm={log[0]};dataset={log[1]};seed={seed}/Service.msgpack"
        service_file = open(service_msgpack_file, "rb")
        service_msgpack = msgpack.load(service_file)
        service_df = pd.DataFrame(service_msgpack)

        # Collecting information
        migrations_last_step = service_df[service_df["Time Step"] == step][["Object", "Average Migration Duration", "Average Migration Without Using Cache Duration"]]

        # Persisting information
        simulation_data[f"mean (total)"] = migrations_last_step["Average Migration Duration"].mean()
        simulation_data[f"mean (w/o cache)"] = migrations_last_step["Average Migration Without Using Cache Duration"].mean()
        simulation_data[f"median (total)"] = migrations_last_step["Average Migration Duration"].median()
        simulation_data[f"median (w/o cache)"] = migrations_last_step["Average Migration Without Using Cache Duration"].median()
        simulation_data[f"p95 (total)"] = migrations_last_step["Average Migration Duration"].quantile(0.95)
        simulation_data[f"p95 (w/o cache)"] = migrations_last_step["Average Migration Without Using Cache Duration"].quantile(0.95)

        # Closing file
        service_file.close()

        # Appending data
        service_data.append(simulation_data)

    return pd.DataFrame(service_data)

### Service Reallocations

In [None]:
def get_reallocations_data(step):
    service_data = []

    for log in logs:
        simulation_data = {
            "dataset": log,
            "total": 0,
            "w/o cache": 0,
        }

        # Opening file
        service_msgpack_file = f"{base_dir}/algorithm={log[0]};dataset={log[1]};seed={seed}/Service.msgpack"
        service_file = open(service_msgpack_file, "rb")
        service_msgpack = msgpack.load(service_file)
        service_df = pd.DataFrame(service_msgpack)

        # Collecting information
        migrations_last_step = service_df[service_df["Time Step"] == step][["Object", "Number of Finished Migrations", "Number of Finished Migrations Without Using Cache"]]

        # Persisting information
        simulation_data[f"total"] = migrations_last_step["Number of Finished Migrations"].sum()
        simulation_data[f"w/o cache"] = migrations_last_step["Number of Finished Migrations Without Using Cache"].sum()

        # Closing file
        service_file.close()

        # Appending data
        service_data.append(simulation_data)

    return pd.DataFrame(service_data)

### Registry Usage

In [None]:
def get_registry_usage_data():
    registry_data = []

    for log in logs:
        simulation_data = {
            "dataset": log,
            "mean": 0,
            "median": 0,
            "p95": 0,
        }

        # Opening file
        registry_msgpack_file = f"{base_dir}/algorithm={log[0]};dataset={log[1]};seed={seed}/ContainerRegistry.msgpack"
        registry_file = open(registry_msgpack_file, "rb")
        registry_msgpack = msgpack.load(registry_file)
        registry_df = pd.DataFrame(registry_msgpack)

        # Collecting information
        registry_filtered_data = (
            registry_df[registry_df["P2P"] == True][["Object", "Provisioning", "Not Provisioning"]]
            if log[1] == "p2p"
            else registry_df[["Object", "Provisioning", "Not Provisioning"]]
        )
        registry_data_grouped = registry_filtered_data.groupby("Object").sum()
        registry_data_grouped["Total Steps"] = registry_data_grouped.sum(axis=1)
        registry_data_grouped["Provisioning Percentage"] = registry_data_grouped["Provisioning"] / registry_data_grouped["Total Steps"]
        registry_data_grouped["Not Provisioning Percentage"] = registry_data_grouped["Not Provisioning"] / registry_data_grouped["Total Steps"]

        # Persisting information
        simulation_data["mean"] = registry_data_grouped["Provisioning Percentage"].mean()
        simulation_data["median"] = registry_data_grouped["Provisioning Percentage"].median()
        simulation_data["p95"] = registry_data_grouped["Provisioning Percentage"].quantile(0.95)

        # Closing file
        registry_file.close()

        # Appending data
        registry_data.append(simulation_data)

    return pd.DataFrame(registry_data)

### Number of Registries

In [None]:
def get_number_of_registries_data():
    registry_data = {}

    for index, log in enumerate(logs):
        # Opening file
        registry_msgpack_file = f"{base_dir}/algorithm={log[0]};dataset={log[1]};seed={seed}/ContainerRegistry.msgpack"
        registry_file = open(registry_msgpack_file, "rb")
        registry_msgpack = msgpack.load(registry_file)
        registry_df = pd.DataFrame(registry_msgpack)

        # Collecting information
        registries_per_timestep = registry_df[registry_df["Time Step"] > 0].groupby(["Time Step"]).count()["Object"]

        # Persisting information
        registry_data[labels[index].capitalize()] = registries_per_timestep

        # Closing file
        registry_file.close()

    registries_per_timestep_df = pd.concat(registry_data, axis=1)

    return registries_per_timestep_df

### Server Utilization

In [None]:
def normalize_cpu_and_memory(cpu, memory) -> float:
    """Normalizes the CPU and memory values.

    Args:
        cpu (float): CPU value.
        memory (float): Memory value.

    Returns:
        normalized_value (float): Normalized value.
    """
    normalized_value = (cpu * memory) ** (1 / 2)
    return normalized_value

def get_server_utilization_data():
    server_data = {}

    for index, log in enumerate(logs):
        # Opening file
        server_msgpack_file = f"{base_dir}/algorithm={log[0]};dataset={log[1]};seed={seed}/EdgeServer.msgpack"
        server_file = open(server_msgpack_file, "rb")
        server_msgpack = msgpack.load(server_file)
        server_df = pd.DataFrame(server_msgpack)

        # Collecting information
        server_df = server_df[["Object", "CPU", "RAM", "CPU Demand", "RAM Demand", "Time Step"]]
        server_df["Normalized Utilization"] = server_df.apply(lambda row: normalize_cpu_and_memory(row["CPU Demand"], row["RAM Demand"])/normalize_cpu_and_memory(row["CPU"], row["RAM"]), axis=1)
        
        server_per_timestep = server_df[server_df["Time Step"] > 0].groupby(["Time Step"])["Normalized Utilization"].mean()

        # Persisting information
        server_data[labels[index].capitalize()] = server_per_timestep

        # Closing file
        server_file.close()

    return pd.concat(server_data, axis=1)

### Plot Data

In [None]:
def calculate_x(x, width, i, n):
    return x + width * (i - n / 2) + width / 2


def plot_data_with_grouped_bar(
    keys: list,
    data: pd.DataFrame,
    xlabel: str,
    ylabel: str,
    bbox_to_anchor: tuple,
    yscale: str = "linear",
    yticks: list = None,
):
    # Plotting bar chart
    fig, ax = plt.subplots(figsize=(10, 6))

    width = 0.8

    hatches = ["", "/", "|", "\\", "x", "o"]

    x = np.arange(len(data))

    for key in keys:
        ax.bar(
            x=calculate_x(x, width/len(keys), keys.index(key), len(keys)),
            height=data[key],
            width=width/len(keys),
            label=key.title(),
            hatch=hatches.pop(0),
            color="#aaaaaa",
            edgecolor="black",
            linewidth=2,
            error_kw={"elinewidth": 2, "capthick": 2, "capsize": 5}
        )

    # Setting labels and ticks
    ax.legend(fontsize=20, bbox_to_anchor=bbox_to_anchor, loc="upper center", ncol=2)
    ax.set_xlabel(xlabel, fontsize=24, fontweight="bold", labelpad=10)
    ax.set_ylabel(ylabel, fontsize=24, fontweight="bold", labelpad=10, loc="center")
    ax.set_xticks(x, labels)
    ax.tick_params(axis="x", labelsize=24)
    ax.tick_params(axis="y", labelsize=24)
    ax.set_yscale(yscale)
    if yscale == "log":
        ax.set_yticks(yticks)
        ax.get_yaxis().set_major_formatter(mtick.ScalarFormatter())
    if ylabel == "% of Steps Active":
        ax.get_yaxis().set_major_formatter(mtick.PercentFormatter(1.0, decimals=0))

    # Saving figure
    plt.tight_layout()
    plt.show()

In [None]:
def plot_data_over_time(
    labels: list,
    data: pd.DataFrame,
    xlabel: str,
    ylabel: str,
    bbox_to_anchor,
):
    fig, ax = plt.subplots(figsize=(10, 6))

    ax.set_xlabel(xlabel, fontsize=24, fontweight="bold", labelpad=10)
    ax.set_ylabel(ylabel, fontsize=24, fontweight="bold", labelpad=10, loc="center")
    ax.tick_params(axis="x", labelsize=24)
    ax.tick_params(axis="y", labelsize=24)

    p = ax.plot(
        data,
        linewidth=2,
    )

    # Adding legend
    ax.legend(labels, fontsize=20, bbox_to_anchor=bbox_to_anchor, loc="upper center", ncol=3)

    # Saving figure
    plt.tight_layout()
    plt.show()

## Results

In [None]:
base_dir = "logs"

last_step = 3600

logs = [
    ("central", "central"),
    ("community", "community"),
    ("p2p", "p2p"),
    ("dynamic", "p2p"),
]

seed = "1"

labels = ["Central", "Community", "P2P", "Dynamic"]

### User latency

In [None]:
latency_df = get_latency_data()

In [None]:
latency_df

In [None]:
plot_data_with_grouped_bar(
    ["mean", "median", "p95"],
    latency_df,
    "Registry Provisioning Strategy",
    "Latency",
    (0.5, 1.2)
)

### Provisioning Time

In [None]:
provisioning_time_df = get_provisioning_time_data(last_step)

In [None]:
provisioning_time_df

In [None]:
plot_data_with_grouped_bar(
    ["mean (total)", "mean (w/o cache)", "median (total)", "median (w/o cache)", "p95 (total)", "p95 (w/o cache)"],
    provisioning_time_df,
    "Registry Provisioning Strategy",
    "Provisioning Time",
    (0.5, 1.2)
)

### Service reallocations

In [None]:
reallocations_df = get_reallocations_data(last_step)

In [None]:
reallocations_df

In [None]:
plot_data_with_grouped_bar(
    ["total", "w/o cache"],
    reallocations_df,
    "Registry Provisioning Strategy",
    "Service Reallocations",
    (0.5, 1.35),
    "log",
    [100, 1000, 10000]
)

### Registry Usage

In [None]:
registry_usage_df = get_registry_usage_data()

In [None]:
registry_usage_df

In [None]:
plot_data_with_grouped_bar(
    ["mean", "median", "p95"],
    registry_usage_df,
    "Registry Provisioning Strategy",
    "% of Steps Active",
    (0.5, 1.2),
)

### Number of registries

In [None]:
number_of_registries_df = get_number_of_registries_data()

In [None]:
plot_data_over_time(
    labels,
    number_of_registries_df,
    "Time Steps",
    "Number of Registries",
    (0.5, 1.2)
)

### Server Utilization

In [None]:
server_utilization_df = get_server_utilization_data()

In [None]:
plot_data_over_time(
    labels,
    server_utilization_df,
    "Time Steps",
    "Server Utilization",
    (0.5, 1.2)
)