In [1]:
import pandas as pd
import numpy as np

from pathlib  import Path

In [2]:
directories = [f"execution_{i}" for i in range(1, 26)]

In [3]:
dfs = []
for index, directory in enumerate(directories):
    p = Path(directory)
    
    df_training_config = pd.read_csv(p/"training_config.csv", names = [
        "server_round",			
		"shuffle",
		"batch_size",
		"initial_epoch",
		"epochs",
		"steps_per_epoch", 
		"validation_split",
		"validation_batch_size"
    ])

    df_results = pd.read_csv(p/"metrics_results.csv", names = [
        "server_round",
        "total_num_clients",
        "total_num_examples",
        "accuracy",
        "loss",
        "training_time",
        "val_accuracy",
        "val_loss", 
        "starting_time", 
		"ending_time",
        "evaluation_total_num_clients",
        "evaluation_total_num_examples",
        "evaluation_accuracy",
        "evaluation_loss",
        "evaluation_time"
    ])

    df = df_results.merge(df_training_config, on = "server_round")
    df.drop(columns = ["shuffle", "starting_time", "ending_time", "steps_per_epoch", "initial_epoch", "validation_batch_size"], inplace = True)
    df.insert(0, 'experiment', index+1)
    dfs.append(df)

df_merged = pd.concat(dfs)

In [4]:
df_merged.to_csv('results_merged.csv')

In [5]:
df_top_results = df_merged[["batch_size", "epochs"]].drop_duplicates()
df_top_results.index = np.arange(1,26)

In [6]:
df_top_results["max_accuracy"] = df_merged.groupby(by="experiment")["evaluation_accuracy"].max()
df_top_results["final_accuracy"] = df_merged.groupby(by="experiment")["evaluation_accuracy"].last()
df_top_results["min_loss"] = df_merged.groupby(by="experiment")["evaluation_loss"].min()
df_top_results["final_loss"] = df_merged.groupby(by="experiment")["evaluation_loss"].last()
df_top_results["max_training_time"] = df_merged.groupby(by="experiment")["training_time"].max()
df_top_results["min_training_time"] = df_merged.groupby(by="experiment")["training_time"].min()
df_top_results["mean_training_time"] = df_merged.groupby(by="experiment")["training_time"].mean()

In [7]:
df_top_results.to_csv("metrics_by_experiment.csv")