# Analysis of Edge Computing Metrics
Here, we analyze the first two of our four research questions.

In [None]:
import numpy as np
import pandas as pd

# Imports
import utils
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns

from pathlib import Path

notebook_path = os.path.dirname(os.path.realpath("00_edge_computing_benchmark_analysis.ipynb"))

In [None]:
# Seaborn configuration
# Alternative font: Linux Libertine
sns.set_theme(context="paper", style="whitegrid", palette="colorblind", font="Times New Roman", font_scale=3)
sns.color_palette(palette="colorblind")
sns.set(rc={"figure.figsize": (14, 3)})
sns.set(font_scale=1)

In [None]:
# Helper function to expand the experiment name
def expand_experiment_name(df: pd.DataFrame):
    # 2023-05-31_17:38_flbench_experiment-plan-baseline_shakespeare_lstm_None_local_1_rounds_1_clients_0_dropout_nodp_0_prec_16_172.24.33.9_client_1
    col_names = ["date", "time", "experiment", "inventory", "dataset", "model", "strategy", "data_dist",
                 "training_rounds", "pl1", "clients", "pl2", "dropout", "pl3", "dp", "noise_multiplier", "pl4",
                 "precision", "ip_addr", "pl5", "client_id"]
    df[col_names] = df["name"].str.split("_", expand=True)
    df.drop(["pl1", "pl2", "pl3", "pl4"], inplace=True, axis=1)
    df["timestamp"] = df[["date", "time"]].apply(" ".join, axis=1)
    df["timestamp"] = pd.to_datetime(df["timestamp"], format="%Y-%m-%d %H:%M")
    return df

In [None]:
# Data download / load from disk
wandb_entity = "..."                                                                                                        # <-- Add you credentials for W&B here.
wandb_project = "..."                                                                                                       # <-- Add you credentials for W&B here.
wandb_run_filter_keywords = ["..."]                                                                                         # <-- Filter for whatever keyword you are looking for.
log_file_name = "wandb_baseline_logs.csv"

if not Path(f"{notebook_path}/data/{log_file_name}").exists():
	# If logs have been downloaded already, they will be fetched from disk.
	df = utils.download_data_from_wandb(entity=wandb_entity, project=wandb_project, keywords=wandb_run_filter_keywords)
	utils.write_df_to_disk(df, filename=log_file_name)
	df = expand_experiment_name(df)
	utils.write_df_to_disk(df, filename=log_file_name)
else:
	df = pd.read_csv(f"{notebook_path}/data/{log_file_name}", index_col=0)
	df = expand_experiment_name(df)

In [None]:
# We replace IP addresses with device type names
df["ip_addr2"] = df["ip_addr"]
print(df["ip_addr"].unique())
df["ip_addr2"].replace(["172.24.32.1", "172.24.32.2", "172.24.32.3", "172.24.32.52", "172.24.32.53", "172.24.32.54", "172.24.33.72", "172.24.33.74", "172.24.33.82", "172.24.33.9"], ["3raspi", "3raspi", "3raspi", "2jnano", "2jnano", "2jnano", "1vm", "1vm", "1vm", "0gpu"], inplace=True)
df.rename({"ip_addr2": "device"}, inplace=True, axis=1)

# For VMs we use SelfWatts estimate of a 4 CPU (x86) VM in the same system as ours.
# Source (Fig .4): https://inria.hal.science/hal-03173410/document
df.loc[df["device"] == "1vm", "power/wattage"] = 50_000 # mW
print(df["device"].unique())

# RQ 1: Microbenchmark for embedded devices


In [None]:
subset = df[["timing/train/batch_load_time_s", "timing/train/forward_time", "timing/train/loss_calc_time_s", "timing/train/backward_s", "timing/train/optimizer_s", "device", "dataset", "model"]].copy(deep=True)
subset.rename({"timing/train/batch_load_time_s": "0batch_load", "timing/train/forward_time": "1forward", "timing/train/loss_calc_time_s": "2loss", "timing/train/backward_s": "3back", "timing/train/optimizer_s": "4optim"}, inplace=True, axis=1)
# We need this cleaning as on the Jnano's the first batch takes 170x longer than all other batches to load, which is an issue with the way how pytorch interacts with the custom CUDA on the Nanos.
subset = subset.loc[subset["0batch_load"] < subset["0batch_load"].quantile(.997)]
subset["total_step_time"] = subset["0batch_load"] + subset["1forward"] + subset["2loss"] + subset["3back"] + subset["4optim"]

subset["model"] = subset["model"].replace({"cnn": "0cnn", "lstm": "1lstm", "densenet": "3densenet", "resnet": "2resnet"})
pvt = subset.pivot_table(index=["dataset", "model", "device"], values=["0batch_load", "1forward", "2loss", "3back", "4optim", "total_step_time"], aggfunc=np.mean)
pvt = pvt.round(2)

fig, ax1 = plt.subplots(1)
pvt.plot(kind="bar", stacked=True, xlabel="", ylabel="Time (in s)", ax=ax1, y=["0batch_load", "1forward", "2loss", "3back", "4optim"])
ax1.legend(["Batch Loading", "Forward Step", "Loss Calculation", "Backward Step", "Optimizer Step"])
utils.format_xaxis(ax1=ax1, ymax=8)
ax1.bar_label(ax1.containers[-1], size=12)

# Legend format
plt.ylim(0, 8.5)

utils.write_figure_to_disk(plt=plt, file_name="microbenchmark", chapter_name="evaluations")

In [None]:
# Speedup table
# We use the Jetson Nanos as baseline and provide a speed up comparison for all other devices
# pvt.reset_index(inplace=True)
speed_pvt = subset.pivot_table(index=["dataset", "model", "device"], values=["0batch_load", "1forward", "2loss", "3back", "4optim", "total_step_time"], aggfunc=[np.mean, np.std])
speed_pvt.reset_index(inplace=True)
speed_data_raw = []
for dataset in ["blond", "mnist", "shakespeare"]:
	for model in ["0cnn", "1lstm", "2resnet", "3densenet"]:
		for basis in ["0gpu", "1vm", "2jnano", "3raspi"]:
			basis_set = speed_pvt.loc[(speed_pvt["device"] == basis) & (speed_pvt["dataset"] == dataset) & (speed_pvt["model"] == model)].copy()

			if len(basis_set) == 0:
				continue

			for target in ["0gpu", "1vm", "2jnano", "3raspi"]:
				target_set = speed_pvt.loc[(speed_pvt["device"] == target) & (speed_pvt["dataset"] == dataset) & (speed_pvt["model"] == model)].copy()
				for step in ["0batch_load", "1forward", "2loss", "3back", "4optim", "total_step_time"]:
					data = {
						"Dataset": dataset,
						"Model": model,
						"basis": basis,
						"target": target,
						"Step": step,
						"mean": target_set[("mean", step)].tolist()[0] / basis_set[("mean", step)].tolist()[0],
						"std": target_set[("std", step)].tolist()[0] / basis_set[("std", step)].tolist()[0],
					}
					speed_data_raw.append(data)

speed_info = pd.DataFrame(speed_data_raw)
pvt_speed = speed_info.pivot_table(index=["Dataset", "Model", "basis"], columns=["Step", "target"], values=["mean", "std"], aggfunc=np.mean)
pvt_speed.fillna(1, inplace=True)
# pvt_speed = pvt_speed.round(2).astype(str)

for val_type, step, device in [i for i in pvt_speed.columns]:
	if val_type != "mean":
		continue

	pvt_speed[(val_type, step, device)] = pvt_speed[(val_type, step, device)].round(decimals=2).astype(str)+ "±" + pvt_speed[("std", step, device)].round(decimals=1).astype(str)
    # if "_stddev" not in acc:
    #     piv3[(acc, dataset, model)] = piv3[(acc, dataset, model)].round(decimals=4).astype(str)+ "±" + piv3[(f"{acc}_stddev", dataset, model)].round(decimals=2).astype(str)
    # else:
    #     piv3.drop((acc, dataset, model), inplace=True, axis=1)


s = pvt_speed["mean"].style.highlight_max(props='cellcolor:[HTML]{FFFF00}; color:{red}; itshape:; bfseries:;')
s.clear()
s.table_styles = []
s.format({
	("Numeric", "Integers"): '\${}',
	("Numeric", "Floats"): '{:.2f}',
    ("Non-Numeric", "Strings"): str.upper
})
print(s.to_latex(column_format="lll|rrrr|rrrr|rrrr|rrrr|rrrr|rrrr", position="", position_float="centering", hrules=True, label="tab:speedup-comparison", caption="Speedup comparison across device types, datasets, and ML models.", multirow_align="t", multicol_align="r"))
# column_format="rrllllllllllllllllllllllll", position="h", position_float="centering", hrules=True, label="table:5", caption="Speedup comparison across device types, datasets, and ML models.", multirow_align="t", multicol_align="r"

# RQ 2: Energy & Cost Efficiency

In [None]:
# Download system metrics for GPU power consumption
sys_file_name = "wandb_system_logs_gpu_v2.csv"
sys_ids = df.loc[df["device"] == "0gpu", "id"].unique().tolist()

if not Path(f"{notebook_path}/data/{sys_file_name}").exists():
	# If logs have been downloaded already, they will be fetched from disk.
	sys_metrics = utils.download_system_metrics(run_ids=sys_ids, entity=wandb_entity, project=wandb_project)
	df = expand_experiment_name(sys_metrics)
	utils.write_df_to_disk(sys_metrics, filename=sys_file_name)
else:
	sys_metrics = pd.read_csv(f"{notebook_path}/data/{sys_file_name}", index_col=0)

sys_metrics["ip_addr2"] = sys_metrics["ip_addr"]
sys_metrics["ip_addr2"].replace(["172.24.32.1", "172.24.32.2", "172.24.32.3", "172.24.32.52", "172.24.32.53", "172.24.32.54", "172.24.33.72", "172.24.33.74", "172.24.33.82", "172.24.33.9"], ["3raspi", "3raspi", "3raspi", "2jnano", "2jnano", "2jnano", "1vm", "1vm", "1vm", "0gpu"], inplace=True)
sys_metrics["model"] = sys_metrics["model"].replace({"cnn": "0cnn", "lstm": "1lstm", "densenet": "2densenet", "resnet": "3resnet"})
sys_metrics.rename({"ip_addr2": "device"}, inplace=True, axis=1)
sys_metrics.set_index(["dataset", "model", "device"], inplace=True)

In [None]:
# Here we consider the training samples only.
summary_metrics_file_name = "wandb_summary_metrics.csv"
run_ids = df["id"].unique().tolist()
sample_sizes = {
	"blond": {"batch_size": 128, "total_samples": 13164},
	"mnist": {"batch_size": 32, "total_samples": 270000},
	# We cut the shakespeare experiment short for runtime reasons. Results are scaled to full trainset size of 0.8 * 3,380,926
	"shakespeare": {"batch_size": 32, "total_samples": 8000} # Only the GPU trained on the full dataset.
}

e_subset = df[["id", "dataset", "model", "device", "timing/train/batch_load_time_s", "timing/train/forward_time", "timing/train/loss_calc_time_s", "timing/train/backward_s", "timing/train/optimizer_s", "power/wattage", "id"]].copy(deep=True)
e_subset["model"] = e_subset["model"].replace({"cnn": "0cnn", "lstm": "1lstm", "densenet": "2densenet", "resnet": "3resnet"})
e_subset.set_index(["dataset", "model", "device"], inplace=True)

# For VMs we use SelfWatts estimate of a 4 CPU (x86) VM in the same system as ours.
# Source (Fig .4): https://inria.hal.science/hal-03173410/document
vm_power = 50
gpu_power = sys_metrics.groupby(sys_metrics.index)["system.gpu.0.powerWatts"].mean() * 1000
wattage_df = e_subset.groupby(e_subset.index)["power/wattage"].mean()
idx_values = e_subset.index.tolist()

wattage_df = wattage_df.combine(gpu_power, func=max)
e_subset = e_subset.loc[e_subset["timing/train/batch_load_time_s"] < e_subset["timing/train/batch_load_time_s"].quantile(.997)]
pvt2 = e_subset.pivot_table(index=e_subset.index, values=["power/wattage", "timing/train/batch_load_time_s", "timing/train/forward_time", "timing/train/loss_calc_time_s", "timing/train/backward_s", "timing/train/optimizer_s"], aggfunc=[np.mean, np.std])
idx_list = pvt2.index.tolist()
pvt2.index = pd.MultiIndex.from_tuples(idx_list)

pvt2[("mean", "power/wattage")] = wattage_df

idx_list = pvt2.index.tolist()

for dataset, model, device in idx_list:
	pvt2.loc[(dataset, model, device), ("mean", "timing/training_time")] = (pvt2.loc[(dataset, model, device), ("mean", "timing/train/batch_load_time_s")] + pvt2.loc[(dataset, model, device), ("mean", "timing/train/forward_time")] + pvt2.loc[(dataset, model, device), ("mean", "timing/train/loss_calc_time_s")] + pvt2.loc[(dataset, model, device), ("mean", "timing/train/backward_s")] + pvt2.loc[(dataset, model, device), ("mean", "timing/train/optimizer_s")]) * (sample_sizes[dataset]["total_samples"] / sample_sizes[dataset]["batch_size"])
	pvt2.loc[(dataset, model, device), ("std", "timing/training_time")] = (pvt2.loc[(dataset, model, device), ("std", "timing/train/batch_load_time_s")] + pvt2.loc[(dataset, model, device), ("std", "timing/train/forward_time")] + pvt2.loc[(dataset, model, device), ("std", "timing/train/loss_calc_time_s")] + pvt2.loc[(dataset, model, device), ("std", "timing/train/backward_s")] + pvt2.loc[(dataset, model, device), ("std", "timing/train/optimizer_s")]) * (sample_sizes[dataset]["total_samples"] / sample_sizes[dataset]["batch_size"])

	pvt2.loc[(dataset, model, device), ("mean", "throughput_sps")] = sample_sizes[dataset]["total_samples"] / pvt2.loc[(dataset, model, device), ("mean", "timing/training_time")]
	pvt2.loc[(dataset, model, device), ("std", "throughput_sps")] = sample_sizes[dataset]["total_samples"] / pvt2.loc[(dataset, model, device), ("std", "timing/training_time")]

pvt2.drop([("mean", "timing/train/batch_load_time_s"), ("mean", "timing/train/forward_time"), ("mean", "timing/train/loss_calc_time_s"), ("mean", "timing/train/backward_s"), ("mean", "timing/train/optimizer_s"), ("std", "timing/train/batch_load_time_s"), ("std", "timing/train/forward_time"), ("std", "timing/train/loss_calc_time_s"), ("std", "timing/train/backward_s"), ("std", "timing/train/optimizer_s")], axis=1)
# Convert milliwatt to watt
pvt2[("mean", "power/wattage")] = pvt2[("mean", "power/wattage")] / 1000
pvt2[("std", "power/wattage")] = pvt2[("std", "power/wattage")] / 1000
# Get total energy consumption for experiment
pvt2[("mean", "power/total_energy_wh")] = pvt2[("mean", "power/wattage")] * (pvt2[("mean", "timing/training_time")] / (60 * 60))
pvt2[("std", "power/total_energy_wh")] = pvt2[("std", "power/wattage")] * (pvt2[("std", "timing/training_time")] / (60 * 60))
for device in ["1vm", "2jnano", "3raspi"]:
	pvt2.loc[("shakespeare", "1lstm", device), ("mean", "power/total_energy_wh")] = pvt2.loc[("shakespeare", "1lstm", device), ("mean", "power/total_energy_wh")] * ((3_380_926*0.8) / 10000)  # We interpolate the training performance based on 10k samples from the logs.
	pvt2.loc[("shakespeare", "1lstm", device), ("std", "power/total_energy_wh")] = pvt2.loc[("shakespeare", "1lstm", device), ("std", "power/total_energy_wh")] * ((3_380_926*0.8) / 10000)  # We interpolate the training performance based on 10k samples from the logs.

# Create sample efficiency metric
pvt2[("mean", "sample_efficiency")] = pvt2[("mean", "throughput_sps")] / pvt2[("mean", "power/wattage")]
pvt2[("std", "sample_efficiency")] = pvt2[("std", "throughput_sps")] / pvt2[("std", "power/wattage")]
pvt2 = pvt2.round(0)
print(pvt2)

In [None]:
# Sample efficiency chart
fig, ax1 = plt.subplots(1)
plt.rcParams['text.usetex'] = True
pvt2[("mean", "sample_efficiency")].plot(kind="bar", stacked=False, xlabel="", ylabel="$\\eta_e = \\frac{SPS}{W}$", ax=ax1, yerr=pvt2["std"])
utils.format_xaxis(ax1=ax1, ymax=112)
ax1.bar_label(ax1.containers[-1], size=14)
utils.write_figure_to_disk(plt=plt, file_name="sample_efficiency", chapter_name="evaluations")

In [None]:
# Done with Edge Evaluations