In [1]:
import h5py
import numpy as np
import pandas as pd 
import os
import numpy as np
import re
import csv
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes


In [2]:
def convert_csv_to_txt(input_file,output_file):
   
    with open(input_file, 'r') as csv_file, open(output_file, 'w') as space_delimited_file:
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            space_delimited_file.write(' '.join(row) + '\n')

    print(f'CSV file "{input_file}" converted to space-delimited file "{output_file}"')


In [5]:
def read_file(file):
    hf = h5py.File(file, 'r')
    attributes = []
    for key in hf.keys():
        attributes.append(key)
    
    return attributes, hf


In [6]:
def get_data(hf,attributes):
    data = []
    pm = []
    acc_pm = []
    loss_pm = []
    loss_gm = []
    for i in range(len(attributes)):
        ai = hf.get(attributes[i])
        ai = np.array(ai)
        data.append(ai)
    
    return data


In [7]:
def print_stats(description, mean_std, file):
    file.write(f"{description} (mean/std) : ({mean_std[0]} / {mean_std[1]})\n")

In [11]:
def average_result(path,directory_name, algorithm, avg_file, target):
    
    dir_list = os.listdir(path)

    i=0
    train_loss, train_accuracy, test_loss, test_accuracy = [], [], [], []
    per_train_loss, per_train_accuracy, per_test_loss, per_test_accuracy = [], [], [], []
    per_precision, per_recall, per_f1 = [], [], []
    precision, recall, f1 = [], [], []

    if algorithm in [ "Local", "Fedavg", "Fesem", "Fedprox"]:
        for file_name in dir_list:
            
            
            if file_name.endswith(".h5"):
                print(file_name)
                attributes, hf = read_file(path+file_name)

                data = get_data(hf,attributes)
                id=0
                for key in hf.keys():
                    attributes.append(key)
                    # print("id [",id,"] :", key)
                    id+=1

                gtsl = hf.get('global_test_loss')
                gtrl = hf.get('global_train_loss')
                gtsa = hf.get('global_test_accuracy')
                gtra = hf.get('global_train_accuracy')
                gp = hf.get('global_precision')
                gr = hf.get('global_recall')
                gf1 = hf.get('global_f1score')

                test_loss.append(np.array(gtsl).tolist())
                train_loss.append(np.array(gtrl).tolist())
                test_accuracy.append(np.array(gtsa).tolist())
                train_accuracy.append(np.array(gtra).tolist())
                precision.append(np.array(gp).tolist())
                recall.append(np.array(gr).tolist())
                f1.append(np.array(gf1).tolist())


                
            
        avg_train_loss = np.array(train_loss)
        avg_test_loss = np.array(test_loss)
        avg_train_accuracy = np.array(train_accuracy)
        avg_test_accuracy = np.array(test_accuracy)
        avg_precision = np.array(precision)
        avg_recall = np.array(recall)
        avg_f1 = np.array(f1)


        # print(avg_test_accuracy)
        
        gtrl_mean = np.mean(avg_train_loss, axis=0)
        gtra_mean = np.mean(avg_train_accuracy, axis=0)
        gtsl_mean = np.mean(avg_test_loss, axis=0)
        gtsa_mean = np.mean(avg_test_accuracy, axis=0)

        gtrl_std = np.std(avg_train_loss, axis=0)
        gtra_std = np.std(avg_train_accuracy, axis=0)
        gtsl_std = np.std(avg_test_loss, axis=0)
        gtsa_std = np.std(avg_test_accuracy, axis=0)

        gp_mean = np.mean(avg_precision, axis=0)
        gr_mean = np.mean(avg_recall, axis=0)
        gf1_mean = np.mean(avg_f1, axis=0)

        gp_std = np.std(avg_precision, axis=0)
        gr_std = np.std(avg_recall, axis=0)
        gf1_std = np.std(avg_f1, axis=0)



        gtrl_mean_std = np.column_stack((gtrl_mean, gtrl_std))
        gtra_mean_std = np.column_stack((gtra_mean, gtra_std))
        gtsl_mean_std = np.column_stack((gtsl_mean, gtsl_std))
        gtsa_mean_std = np.column_stack((gtsa_mean, gtsa_std))

        gp_mean_std = np.column_stack((gp_mean, gp_std))
        gr_mean_std = np.column_stack((gr_mean, gr_std))
        gf1_mean_std = np.column_stack((gf1_mean, gf1_std))


        training_loss_mean_std = gtrl_mean_std[gtrl_mean_std[:,0].argmin()]
        training_acc_mean_std = gtra_mean_std[gtra_mean_std[:,0].argmax()]
        val_loss_mean_std = gtsl_mean_std[gtsl_mean_std[:,0].argmin()]
        val_acc_mean_std = gtsa_mean_std[gtsa_mean_std[:,0].argmax()]
        precision_mean_std = gp_mean_std[gp_mean_std[:,0].argmax()]
        recall_mean_std = gr_mean_std[gr_mean_std[:,0].argmax()]
        f1_mean_std = gf1_mean_std[gf1_mean_std[:,0].argmax()]


        
        with h5py.File(directory_name  + '{}.h5'.format(avg_file), 'w') as hf:
            hf.create_dataset('avg_training_loss', data=gtrl_mean)
            hf.create_dataset('avg_training_accuracy', data=gtra_mean)
            hf.create_dataset('avg_test_loss', data=gtsl_mean)
            hf.create_dataset('avg_test_accuracy', data=gtsa_mean)
            hf.create_dataset('avg_precision', data=gp_mean)
            hf.create_dataset('avg_recall', data=gr_mean)
            hf.create_dataset('avg_f1', data=gf1_mean)

            hf.close



        print("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-")
        print("Algorithm :",algorithm)
        print("Global training loss (mean/std) : (",training_loss_mean_std[0],"/",training_loss_mean_std[1],")")
        print("Global training accuracy (mean/std) : (",training_acc_mean_std[0],"/",training_acc_mean_std[1],")")
        print("Global test loss (mean/std) : (", val_loss_mean_std[0],"/", val_loss_mean_std[1],")")
        print("Global test accuracy (mean/std) : (",val_acc_mean_std[0],"/",val_acc_mean_std[1],")")
        print("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n")  
        print(f"Global Precision (mean/std) : ({precision_mean_std[0]} / {precision_mean_std[1]})")
        print(f"Global Recall (mean/std) : ({recall_mean_std[0]} / {recall_mean_std[1]})")
        print(f"Global F1Score (mean/std) : ({f1_mean_std[0]} / {f1_mean_std[1]})")
        print("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n")  


        file_name = 'performance_' + algorithm + '_' + str(target) + '.txt'
        
    
        with open(directory_name + '/' + file_name, 'w') as file:
            file.write("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n")
            file.write(f"algorithm : {algorithm} : target {target}")
            print_stats("Global training loss", training_loss_mean_std, file)
            print_stats("Global training accuracy", training_acc_mean_std, file)
            print_stats("Global test loss", val_loss_mean_std, file)
            print_stats("Global test accuracy", val_acc_mean_std, file)
            
            file.write("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n\n")
            print_stats("Global Precision", precision_mean_std, file)
            print_stats("Global Recall", recall_mean_std, file)
            print_stats("Global F1Score", f1_mean_std, file)
            file.write("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n\n")
       
    
    if algorithm in ['pFedme', 'Fedmem', 'demlearn', 'h-sgd']:
        for file_name in dir_list:
            if file_name.endswith(".h5"):
                print(file_name)
                attributes, hf = read_file(path+file_name)

                data = get_data(hf,attributes)
                id=0
                for key in hf.keys():
                    attributes.append(key)
                    # print("id [",id,"] :", key)
                    id+=1

                gtsl = hf.get('global_test_loss')
                gtrl = hf.get('global_train_loss')
                gtsa = hf.get('global_test_accuracy')
                gtra = hf.get('global_train_accuracy')

                ptsl = hf.get('per_test_loss')
                ptrl = hf.get('per_train_loss')
                ptsa = hf.get('per_test_accuracy')
                ptra = hf.get('per_train_accuracy')

                gp = hf.get('global_precision')
                gr = hf.get('global_recall')
                gf1 = hf.get('global_f1score')

                pp = hf.get('per_precision')
                pr = hf.get('per_recall')
                pf1 = hf.get('per_f1score')
            
                test_loss.append(np.array(gtsl).tolist())
                train_loss.append(np.array(gtrl).tolist())
                test_accuracy.append(np.array(gtsa).tolist())
                train_accuracy.append(np.array(gtra).tolist())

                per_test_loss.append(np.array(ptsl).tolist())
                per_train_loss.append(np.array(ptrl).tolist())
                per_test_accuracy.append(np.array(ptsa).tolist())
                per_train_accuracy.append(np.array(ptra).tolist())

                precision.append(np.array(gp).tolist())
                recall.append(np.array(gr).tolist())
                f1.append(np.array(gf1).tolist())

                per_precision.append(np.array(pp).tolist())
                per_recall.append(np.array(pr).tolist())
                per_f1.append(np.array(pf1).tolist())

                
        avg_train_loss = np.array(train_loss)
        avg_test_loss = np.array(test_loss)
        avg_train_accuracy = np.array(train_accuracy)
        avg_test_accuracy = np.array(test_accuracy)


        avg_per_train_loss = np.array(per_train_loss)
        avg_per_test_loss = np.array(per_test_loss)
        avg_per_train_accuracy = np.array(per_train_accuracy)
        avg_per_test_accuracy = np.array(per_test_accuracy)

        avg_precision = np.array(precision)
        avg_recall = np.array(recall)
        avg_f1 = np.array(f1)

        avg_per_precision = np.array(per_precision)
        avg_per_recall = np.array(per_recall)
        avg_per_f1 = np.array(per_f1)


        gtrl_mean = np.mean(avg_train_loss, axis=0)
        gtra_mean = np.mean(avg_train_accuracy, axis=0)
        gtsl_mean = np.mean(avg_test_loss, axis=0)
        gtsa_mean = np.mean(avg_test_accuracy, axis=0)

        ptrl_mean = np.mean(avg_per_train_loss, axis=0)
        ptra_mean = np.mean(avg_per_train_accuracy, axis=0)
        ptsl_mean = np.mean(avg_per_test_loss, axis=0)
        ptsa_mean = np.mean(avg_per_test_accuracy, axis=0)

        gp_mean = np.mean(avg_precision, axis=0)
        gr_mean = np.mean(avg_recall, axis=0)
        gf1_mean = np.mean(avg_f1, axis=0)

        pp_mean = np.mean(avg_per_precision, axis=0)
        pr_mean = np.mean(avg_per_recall, axis=0)
        pf1_mean = np.mean(avg_per_f1, axis=0)
        





        gtrl_std = np.std(avg_train_loss, axis=0)
        gtra_std = np.std(avg_train_accuracy, axis=0)
        gtsl_std = np.std(avg_test_loss, axis=0)
        gtsa_std = np.std(avg_test_accuracy, axis=0)

        ptrl_std = np.std(avg_per_train_loss, axis=0)
        ptra_std = np.std(avg_per_train_accuracy, axis=0)
        ptsl_std = np.std(avg_per_test_loss, axis=0)
        ptsa_std = np.std(avg_per_test_accuracy, axis=0)

        gp_std = np.std(avg_precision, axis=0)
        gr_std = np.std(avg_recall, axis=0)
        gf1_std = np.std(avg_f1, axis=0)

        pp_std = np.std(avg_per_precision, axis=0)
        pr_std = np.std(avg_per_recall, axis=0)
        pf1_std = np.std(avg_per_f1, axis=0)
        


        gtrl_mean_std = np.column_stack((gtrl_mean, gtrl_std))
        gtra_mean_std = np.column_stack((gtra_mean, gtra_std))
        gtsl_mean_std = np.column_stack((gtsl_mean, gtsl_std))
        gtsa_mean_std = np.column_stack((gtsa_mean, gtsa_std))

        ptrl_mean_std = np.column_stack((ptrl_mean, ptrl_std))
        ptra_mean_std = np.column_stack((ptra_mean, ptra_std))
        ptsl_mean_std = np.column_stack((ptsl_mean, ptsl_std))
        ptsa_mean_std = np.column_stack((ptsa_mean, ptsa_std))

        gp_mean_std = np.column_stack((gp_mean, gp_std))
        gr_mean_std = np.column_stack((gr_mean, gr_std))
        gf1_mean_std = np.column_stack((gf1_mean, gf1_std))

        pp_mean_std = np.column_stack((pp_mean, pp_std))
        pr_mean_std = np.column_stack((pr_mean, pr_std))
        pf1_mean_std = np.column_stack((pf1_mean, pf1_std))

        training_loss_mean_std = gtrl_mean_std[gtrl_mean_std[:,0].argmin()]
        training_acc_mean_std = gtra_mean_std[gtra_mean_std[:,0].argmax()]
        val_loss_mean_std = gtsl_mean_std[gtsl_mean_std[:,0].argmin()]
        val_acc_mean_std = gtsa_mean_std[gtsa_mean_std[:,0].argmax()]

        per_training_loss_mean_std = ptrl_mean_std[gtrl_mean_std[:,0].argmin()]
        per_training_acc_mean_std = ptra_mean_std[gtra_mean_std[:,0].argmax()]
        per_val_loss_mean_std = ptsl_mean_std[gtsl_mean_std[:,0].argmin()]
        per_val_acc_mean_std = ptsa_mean_std[gtsa_mean_std[:,0].argmax()]

        precision_mean_std = gp_mean_std[gp_mean_std[:,0].argmax()]
        recall_mean_std = gr_mean_std[gr_mean_std[:,0].argmax()]
        f1_mean_std = gf1_mean_std[gf1_mean_std[:,0].argmax()]

        per_precision_mean_std = pp_mean_std[pp_mean_std[:,0].argmax()]
        per_recall_mean_std = pr_mean_std[pr_mean_std[:,0].argmax()]
        per_f1_mean_std = pf1_mean_std[pf1_mean_std[:,0].argmax()]
        
        

        with h5py.File(directory_name  + '{}.h5'.format(avg_file), 'w') as hf:
            hf.create_dataset('avg_training_loss', data=gtrl_mean)
            hf.create_dataset('avg_training_accuracy', data=gtra_mean)
            hf.create_dataset('avg_test_loss', data=gtsl_mean)
            hf.create_dataset('avg_test_accuracy', data=gtsa_mean)
            
            hf.create_dataset('avg_per_training_loss', data=ptrl_mean)
            hf.create_dataset('avg_per_training_accuracy', data=ptra_mean)
            hf.create_dataset('avg_per_test_loss', data=ptsl_mean)
            hf.create_dataset('avg_per_test_accuracy', data=ptsa_mean)

            hf.create_dataset('avg_precision', data=gp_mean)
            hf.create_dataset('avg_recall', data=gr_mean)
            hf.create_dataset('avg_f1', data=gf1_mean)

            hf.create_dataset('avg_per_precision', data=pp_mean)
            hf.create_dataset('avg_per_recall', data=pr_mean)
            hf.create_dataset('avg_per_f1', data=pf1_mean)

            hf.close



        print("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-")
        print("")
        print(f"Global training loss (mean/std) : ({training_loss_mean_std[0]} / {training_loss_mean_std[1]})")
        print(f"Global training accuracy (mean/std) : ({training_acc_mean_std[0]} / {training_acc_mean_std[1]})")
        print(f"Global test loss (mean/std) : ({val_loss_mean_std[0]} / {val_loss_mean_std[1]})")
        print(f"Global test accuracy (mean/std) : ({val_acc_mean_std[0]} / {val_acc_mean_std[1]})")
        print("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n")
        print(f"Personalized training loss (mean/std) : ({per_training_loss_mean_std[0]} / {per_training_loss_mean_std[1]})")
        print(f"Personalized training accuracy (mean/std) : ({per_training_acc_mean_std[0]} / {per_training_acc_mean_std[1]})")
        print(f"Personalized test loss (mean/std) : ({per_val_loss_mean_std[0]} / {per_val_loss_mean_std[1]})")
        print(f"Personalized test accuracy (mean/std) : ({per_val_acc_mean_std[0]} / {per_val_acc_mean_std[1]})")  
        print("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n")  
        print(f"Global Precision (mean/std) : ({precision_mean_std[0]} / {precision_mean_std[1]})")
        print(f"Global Recall (mean/std) : ({recall_mean_std[0]} / {recall_mean_std[1]})")
        print(f"Global F1Score (mean/std) : ({f1_mean_std[0]} / {f1_mean_std[1]})")
        print("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n")  
        print(f"Personalized Precision (mean/std) : ({per_precision_mean_std[0]} / {per_precision_mean_std[1]})")
        print(f"Personalized Recall (mean/std) : ({per_recall_mean_std[0]} / {per_recall_mean_std[1]})")
        print(f"Personalized F1Score (mean/std) : ({per_f1_mean_std[0]} / {per_f1_mean_std[1]})")
        print("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n") 

        file_name = 'performance_' + algorithm + '_' + str(target) + '.txt'
        # Open a file to write
        with open(directory_name + '/' + file_name, 'w') as file:
            file.write("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n")
            file.write(f"algorithm : {algorithm} : target {target}")
            print_stats("Global training loss", training_loss_mean_std, file)
            print_stats("Global training accuracy", training_acc_mean_std, file)
            print_stats("Global test loss", val_loss_mean_std, file)
            print_stats("Global test accuracy", val_acc_mean_std, file)
            file.write("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n\n")
            print_stats("Personalized training loss", per_training_loss_mean_std, file)
            print_stats("Personalized training accuracy", per_training_acc_mean_std, file)
            print_stats("Personalized test loss", per_val_loss_mean_std, file)
            print_stats("Personalized test accuracy", per_val_acc_mean_std, file)
            file.write("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n\n")
            print_stats("Global Precision", precision_mean_std, file)
            print_stats("Global Recall", recall_mean_std, file)
            print_stats("Global F1Score", f1_mean_std, file)
            file.write("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n\n")
            print_stats("Personalized Precision", per_precision_mean_std, file)
            print_stats("Personalized Recall", per_recall_mean_std, file)
            print_stats("Personalized F1Score", per_f1_mean_std, file)
            file.write("+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n\n") 



In [12]:
path = "/proj/sourasb-220503/FedMEM/results/ResNet50TL/FeSEM/10/40.0/h5/"
directory_name = "/proj/sourasb-220503/FedMEM/results/convergence/"
average_result(path, directory_name, 'Fesem', 'Fesem_10',10)

_exp_no_2_GR_50_BS_64.h5
_exp_no_1_GR_50_BS_64.h5
_exp_no_3_GR_50_BS_64.h5
_exp_no_0_GR_50_BS_64.h5
_exp_no_4_GR_50_BS_64.h5


ValueError: attempt to get argmin of an empty sequence