## This Codes reads client wise evaluation results to generate a summary of the evaluation results, We exclude attacking node evaluation results for fair comparison

In [1]:
import os

eval_folder_path = "exp/FedAvg_sasrec_on_sr_data_lr0.001_lstep1/CE_ML-1M_rand_aug_1e-1"
evaluation_log_path = os.path.join(eval_folder_path, "eval_results.log")
config_path = os.path.join(eval_folder_path, "config.yaml")


## read yaml for determine attack method and attacker node ids
import yaml
with open(config_path, "r") as f:
    config = yaml.safe_load(f)
    
attack_method = config["attack"]["attack_method"]
attacker_id = config["attack"]["attacker_id"]

print(f"attack_method: {attack_method}")
print(f"{len(attacker_id)} of attackers : {attacker_id}")


attack_method: 
0 of attackers : []


In [2]:
## read evaluation log
## we only require every client's Result_raw for each round
from collections import defaultdict
"""
    {'Role': 'Client #*', 'Round': '*' : 'Results_raw' : {}}
"""
## parse the log file line by line
raw_client_results = []
with open(evaluation_log_path, "r") as f:
    for line in f:
        if "Client #" in line:
            ## read line as dict
            result_dict = eval(line)
            raw_client_results.append(result_dict)
            
print(f"Number of clients: {len(raw_client_results)}")
print(f"Example of client result: {raw_client_results[0]}")

## iterate over all clients and extract the Results_raw group by round
## exclude attacker id if matched client id
round_results = defaultdict(list)
print(f"total raw results counts are : {len(raw_client_results)}")
for client_result in raw_client_results:
    current_round = client_result['Round']
    client_id = int(client_result['Role'].split("#")[1])
    if client_id in attacker_id:
        #print(client_id)
        continue
    else :
        round_results[current_round].append(client_result["Results_raw"])

## first key
first_key = list(round_results.keys())[0]

print(f"Number of benign client results: {len(round_results[first_key])}")

eval_keys = list(round_results[first_key][0].keys())
## 1. Average all metrics
## 2. std of all metrics
## 3. Min/Max of all metrics


for round, client_results in round_results.items():
    print(f"Round {round}")
    ## 1. Average all metrics
    avg_metrics = {key: 0.0 for key in eval_keys}
    for client_result in client_results:
        for key in eval_keys:
            avg_metrics[key] += client_result[key]
    for key in eval_keys:
        avg_metrics[key] /= len(client_results)
    print(f"Averaged metrics: {avg_metrics}")
    ## 2. std of all metrics
    std_metrics = {key: 0.0 for key in eval_keys}
    for client_result in client_results:
        for key in eval_keys:
            std_metrics[key] += (client_result[key] - avg_metrics[key]) ** 2
    
    print(f"Standard deviation of metrics: {std_metrics}")
    
    ## 3. Min/Max of all metrics
    min_metrics = {key: float('inf') for key in eval_keys}
    max_metrics = {key: float('-inf') for key in eval_keys}
    for client_result in client_results:
        for key in eval_keys:
            min_metrics[key] = min(min_metrics[key], client_result[key])
            max_metrics[key] = max(max_metrics[key], client_result[key])
    print(f"Min metrics: {min_metrics}")
    print(f"Max metrics: {max_metrics}")
    
    print("\n\n")



Number of clients: 6040
Example of client result: {'Role': 'Client #1', 'Round': 4000, 'Results_raw': {'val_loss': 7.307229995727539, 'val_total': 1, 'val_avg_loss': 7.307229995727539, 'val_recall_10': 0.0, 'val_recall_20': 0.0, 'val_ndcg_10': 0.0, 'val_ndcg_20': 0.0, 'test_loss': 9.138725280761719, 'test_total': 1, 'test_avg_loss': 9.138725280761719, 'test_recall_10': 0.0, 'test_recall_20': 0.0, 'test_ndcg_10': 0.0, 'test_ndcg_20': 0.0}}
total raw results counts are : 6040
Number of benign client results: 6040
Round 4000
Averaged metrics: {'val_loss': 8.093041948589269, 'val_total': 1.0, 'val_avg_loss': 8.093041948589269, 'val_recall_10': 0.041721854304635764, 'val_recall_20': 0.06556291390728476, 'val_ndcg_10': 0.017070472640114905, 'val_ndcg_20': 0.022953054839313425, 'test_loss': 8.085947570184999, 'test_total': 1.0, 'test_avg_loss': 8.085947570184999, 'test_recall_10': 0.043874172185430466, 'test_recall_20': 0.06771523178807946, 'test_ndcg_10': 0.01762662407084806, 'test_ndcg_20':