## This Codes reads client wise evaluation results to generate a summary of the evaluation results, We exclude attacking node evaluation results for fair comparison

In [9]:
import os

eval_folder_path = "exp/krum_sasrec_on_sr_data_lr0.001_lstep60/random_attack-250-client_wise_evaluation"
evaluation_log_path = os.path.join(eval_folder_path, "eval_results.log")
config_path = os.path.join(eval_folder_path, "config.yaml")


## read yaml for determine attack method and attacker node ids
import yaml
with open(config_path, "r") as f:
    config = yaml.safe_load(f)
    
attack_method = config["attack"]["attack_method"]
attacker_id = config["attack"]["attacker_id"]

print(f"attack_method: {attack_method}")
print(f"{len(attacker_id)} of attackers : {attacker_id}")


attack_method: sr_targeted_random_sasrec
250 of attackers : [6041, 6042, 6043, 6044, 6045, 6046, 6047, 6048, 6049, 6050, 6051, 6052, 6053, 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, 6062, 6063, 6064, 6065, 6066, 6067, 6068, 6069, 6070, 6071, 6072, 6073, 6074, 6075, 6076, 6077, 6078, 6079, 6080, 6081, 6082, 6083, 6084, 6085, 6086, 6087, 6088, 6089, 6090, 6091, 6092, 6093, 6094, 6095, 6096, 6097, 6098, 6099, 6100, 6101, 6102, 6103, 6104, 6105, 6106, 6107, 6108, 6109, 6110, 6111, 6112, 6113, 6114, 6115, 6116, 6117, 6118, 6119, 6120, 6121, 6122, 6123, 6124, 6125, 6126, 6127, 6128, 6129, 6130, 6131, 6132, 6133, 6134, 6135, 6136, 6137, 6138, 6139, 6140, 6141, 6142, 6143, 6144, 6145, 6146, 6147, 6148, 6149, 6150, 6151, 6152, 6153, 6154, 6155, 6156, 6157, 6158, 6159, 6160, 6161, 6162, 6163, 6164, 6165, 6166, 6167, 6168, 6169, 6170, 6171, 6172, 6173, 6174, 6175, 6176, 6177, 6178, 6179, 6180, 6181, 6182, 6183, 6184, 6185, 6186, 6187, 6188, 6189, 6190, 6191, 6192, 6193, 6194, 6195, 6196, 619

In [10]:
## read evaluation log
## we only require every client's Result_raw for each round
from collections import defaultdict
"""
    {'Role': 'Client #*', 'Round': '*' : 'Results_raw' : {}}
"""
## parse the log file line by line
raw_client_results = []
with open(evaluation_log_path, "r") as f:
    for line in f:
        if "Client #" in line:
            ## read line as dict
            result_dict = eval(line)
            raw_client_results.append(result_dict)
            
print(f"Number of clients: {len(raw_client_results)}")
print(f"Example of client result: {raw_client_results[0]}")

## iterate over all clients and extract the Results_raw group by round
## exclude attacker id if matched client id
round_results = defaultdict(list)
print(f"total raw results counts are : {len(raw_client_results)}")
for client_result in raw_client_results:
    current_round = client_result['Round']
    client_id = int(client_result['Role'].split("#")[1])
    if client_id in attacker_id:
        #print(client_id)
        continue
    else :
        round_results[current_round].append(client_result["Results_raw"])

## first key
first_key = list(round_results.keys())[0]

print(f"Number of benign client results: {len(round_results[first_key])}")

eval_keys = list(round_results[first_key][0].keys())
## 1. Average all metrics
## 2. std of all metrics
## 3. Min/Max of all metrics


for round, client_results in round_results.items():
    print(f"Round {round}")
    ## 1. Average all metrics
    avg_metrics = {key: 0.0 for key in eval_keys}
    for client_result in client_results:
        for key in eval_keys:
            avg_metrics[key] += client_result[key]
    for key in eval_keys:
        avg_metrics[key] /= len(client_results)
    print(f"Averaged metrics: {avg_metrics}")
    ## 2. std of all metrics
    std_metrics = {key: 0.0 for key in eval_keys}
    for client_result in client_results:
        for key in eval_keys:
            std_metrics[key] += (client_result[key] - avg_metrics[key]) ** 2
    
    print(f"Standard deviation of metrics: {std_metrics}")
    
    ## 3. Min/Max of all metrics
    min_metrics = {key: float('inf') for key in eval_keys}
    max_metrics = {key: float('-inf') for key in eval_keys}
    for client_result in client_results:
        for key in eval_keys:
            min_metrics[key] = min(min_metrics[key], client_result[key])
            max_metrics[key] = max(max_metrics[key], client_result[key])
    print(f"Min metrics: {min_metrics}")
    print(f"Max metrics: {max_metrics}")
    
    print("\n\n")



Number of clients: 6290
Example of client result: {'Role': 'Client #1', 'Round': 201, 'Results_raw': {'val_avg_loss': 8.436644554138184, 'val_loss': 8.436644554138184, 'val_total': 1, 'val_poison_recall_10': 0.0, 'val_poison_recall_20': 0.0, 'val_poison_recall_50': 0.0, 'val_poison_ndcg_10': 0.0, 'val_poison_ndcg_20': 0.0, 'val_poison_ndcg_50': 0.0, 'val_recall_10': 0.0, 'val_recall_20': 0.0, 'val_ndcg_10': 0.0, 'val_ndcg_20': 0.0, 'test_avg_loss': 8.336087226867676, 'test_loss': 8.336087226867676, 'test_total': 1, 'test_poison_recall_10': 0.0, 'test_poison_recall_20': 1.0, 'test_poison_recall_50': 1.0, 'test_poison_ndcg_10': 0.0, 'test_poison_ndcg_20': 0.27023815442731974, 'test_poison_ndcg_50': 0.27023815442731974, 'test_recall_10': 0.0, 'test_recall_20': 0.0, 'test_ndcg_10': 0.0, 'test_ndcg_20': 0.0}}
total raw results counts are : 6290
Number of benign client results: 6040
Round 201
Averaged metrics: {'val_avg_loss': 8.577538700900133, 'val_loss': 8.577538700900133, 'val_total': 1.