In [None]:
import pandas as pd
import numpy as np
import re

# Read the CSV file
df = pd.read_csv('logs/best_accuracies.csv')

# Define the command to filter the logs
command = "/home/dachille/.conda/envs/splitfed/bin/python train_fl.py --dataset cifar10 cifar100 --partition subsample --alg fedprox --n_parties 2 --optimizer adam --lr 0.001 --model resnet-18 --comm_round 50 --epochs 5 --batch-size 64 --device cuda:1 --alpha 0.1 0.5 1.0 1e200 --seed 1 12 123 --mu 0.01"

# Extract the relevant parameters from the command
params = re.findall(r'--(\w+)\s*([\w.]+(?:\s+[\w.]+)*)', command)
params_dict = {k: v.split() for k, v in params}

# Filter the DataFrame based on the extracted parameters
for param, values in params_dict.items():
    if param in df.columns:
        df = df[df[param].isin(values)]
    elif param == 'alpha':
        df['alpha'] = df['Hyperparameters'].apply(lambda x: eval(x)['alpha'])
        df = df[df['alpha'].isin([float(v) for v in values])]
    elif param == 'seed':
        df['seed'] = df['Hyperparameters'].apply(lambda x: eval(x)['seed'])
        df = df[df['seed'].isin([int(v) for v in values])]
    elif param in ['dataset', 'partition', 'alg', 'model', 'optimizer']:
        df[param] = df['Hyperparameters'].apply(lambda x: eval(x)[param])
        df = df[df[param].isin(values)]

# Drop rows with missing values
df = df.dropna()

# Perform analysis on the filtered data
if not df.empty:
    print("Filtered DataFrame:")
    print(df)
    
    # Calculate average accuracies
    avg_local_accuracy = df['Best Local Accuracy'].mean()
    avg_local_accuracy_top5 = df['Best Local Accuracy Top-5'].mean()
    avg_global_accuracy = df['Best Global Accuracy'].mean()
    avg_global_accuracy_top5 = df['Best Global Accuracy Top-5'].mean()
    
    print("\nAverage Accuracies:")
    print("Local Accuracy:", avg_local_accuracy)
    print("Local Accuracy Top-5:", avg_local_accuracy_top5)
    print("Global Accuracy:", avg_global_accuracy)
    print("Global Accuracy Top-5:", avg_global_accuracy_top5)
    
    # Group by client ID and calculate average accuracies per client
    client_accuracies = df.groupby('Client ID').agg({
        'Best Local Accuracy': 'mean',
        'Best Local Accuracy Top-5': 'mean',
        'Best Global Accuracy': 'mean',
        'Best Global Accuracy Top-5': 'mean'
    })
    
    print("\nAverage Accuracies per Client:")
    print(client_accuracies)
else:
    print("No matching logs found for the given command.")