In [1]:
# Federated Training Analysis
# Visualizing training performance, energy usage, and hardware effects

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

pd.set_option("display.max_columns", None)


In [None]:
# Load metrics CSV
df = pd.read_csv("results/metrics.csv")
df.head()


In [None]:
# Map clients to hardware profiles (adjust if your mapping differs)
HW_MAP = {
    0: "gpu", 8: "gpu",
    1: "cpu-medium", 2: "cpu-medium", 7: "cpu-medium",
    3: "cpu-slow", 4: "cpu-slow", 5: "cpu-slow", 6: "cpu-slow", 9: "cpu-slow",
}

df["hardware"] = df["client_id"].map(HW_MAP)
df.head()


In [None]:
loss_by_round = df.groupby("round")["train_loss"].mean()

plt.figure(figsize=(6,4))
plt.plot(loss_by_round.index, loss_by_round.values, marker="o")
plt.title("Average Train Loss vs Round")
plt.xlabel("Round")
plt.ylabel("Train Loss")
plt.grid(True)
plt.show()


In [None]:
energy_hw = df.groupby("hardware")["energy_joules"].mean().sort_values()

plt.figure(figsize=(6,4))
sns.barplot(x=energy_hw.index, y=energy_hw.values)
plt.title("Average Energy Consumption by Hardware Type")
plt.ylabel("Energy (Joules)")
plt.show()


In [None]:
plt.figure(figsize=(8,5))
sns.boxplot(data=df, x="hardware", y="train_time_sec")
plt.title("Training Time Distribution per Hardware Profile")
plt.ylabel("Train Time (seconds)")
plt.show()


In [None]:
plt.figure(figsize=(6,5))
sns.scatterplot(data=df, x="energy_joules", y="train_loss", hue="hardware")
plt.title("Energy vs Accuracy Tradeoff")
plt.xlabel("Energy (J)")
plt.ylabel("Train Loss")
plt.show()
