In [40]:
import json
import os
from glob import glob
import pandas as pd
import re
import scipy.stats as st
import numpy as np

In [41]:
def read_experiment_stats(regex):
    all_files = glob("./stats/*.json")
    files = []
    for f in all_files:
        temp = os.path.split(f)[-1]
        if re.search(regex, temp) is not None:
            files.append(f)
        
    method_acc = []
    method_total_flops = []
    method_client_flops = []
    method_comm = []
    names = []

        

    acc_dict = {}
    for i, f in enumerate(files):
        exp_name = os.path.split(f)[-1].replace(".json", "")
        run = exp_name.split("_")[-1][:-1]
        exp_name = "_".join(exp_name.split("_")[:-1])

        j = json.load(open(f, "r"))["stats"]
        method_acc.append(j['acc'])
        method_client_flops.append(j['cflops'])
        method_comm.append(j['comm_cost'])
        names.append(os.path.split(f)[-1].replace(".json", ""))
        
        if exp_name not in acc_dict:
            acc_dict[exp_name] = {}
            acc_dict[exp_name]['acc_list'] = []
            acc_dict[exp_name]['cflop_list'] = []
            acc_dict[exp_name]['comm_list'] = []
            acc_dict[exp_name]['num_runs'] = 0
        acc_dict[exp_name]['acc_list'].append(max(j['acc']) * 100)
        acc_dict[exp_name]['cflop_list'].append(sum(j['cflops']))
        if exp_name.find("scaffold") != -1:
            acc_dict[exp_name]['comm_list'].append(sum(j['comm_cost']) * 2)
        else:
            acc_dict[exp_name]['comm_list'].append(sum(j['comm_cost']))
        acc_dict[exp_name]['num_runs'] += 1

    exp_dict = {}
    for k in acc_dict.keys():
        exp_dict[k] = {}
        exp_dict[k]['acc_avg'] = np.mean(acc_dict[k]['acc_list'])
        exp_dict[k]['acc_std'] = np.std(acc_dict[k]['acc_list'])
        exp_dict[k]['cflop_avg'] = np.mean(acc_dict[k]['cflop_list'])
        exp_dict[k]['comm_avg'] = np.mean(acc_dict[k]['comm_list'])
        exp_dict[k]['num_runs'] = acc_dict[k]['num_runs']

    # # Make main results table
    # table = pd.DataFrame()
    # table['Experiment Name'] = df['Experiment Name']
    # table['Method Accuracy'] = df['Method Accuracy'].apply(lambda x: max(eval(x)) * 100)
    # table['Client TFLOPS'] = df['Client Flops'].apply(lambda x: eval(x)[-1] * 1e-12)
    # table['Total TFLOPS'] = df['Total Flops'].apply(lambda x: eval(x)[-1] * 1e-12)
    # table['Comm Cost (GBs)'] = df['Method Comm Cost'].apply(lambda x: eval(x)[-1] * 1e-3)
    table = pd.DataFrame(exp_dict)
    table = table.T
    table.reset_index(inplace=True)
    table = table.sort_values("index")
    return table, exp_dict

def compute_acc_per_comm_per_flop(metric_dict):
    new_dict = {}
    for k in metric_dict.keys():
        new_dict[k] = metric_dict[k]['acc_avg'] / (metric_dict[k]['cflop_avg'] * metric_dict[k]['comm_avg'])
    return new_dict


In [42]:
table, metric_dict = read_experiment_stats(r'.*')
table

Unnamed: 0,index,acc_avg,acc_std,cflop_avg,comm_avg,num_runs
3,non_iid_50_v1_fedavg_5,84.207746,0.190906,17.132878,2.3923,5.0
1,non_iid_50_v1_fednova_5,36.872116,0.013273,17.132878,2.3923,5.0
6,non_iid_50_v1_fedprox_5,85.086006,0.294974,17.132878,2.3923,5.0
5,non_iid_50_v1_scaffold_5,84.727077,0.167372,17.132878,4.7846,5.0
0,split_cifar10_fedavg_5,91.606667,0.487921,11.77308,2.394304,5.0
2,split_cifar10_fednova_5,70.603333,0.654845,11.77308,2.394304,5.0
4,split_cifar10_fedprox_5,92.546667,0.476142,11.77308,2.394304,5.0
7,split_cifar10_scaffold_5,91.396667,0.469917,11.77308,4.788608,5.0
