# Metrics Visualization for Different Number of Epochs

In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from win32com.server.localserver import serve

from analysis_util import load_and_extract_metrics

In [2]:
# Set Seaborn style and context
sns.set_theme(style="whitegrid")
plt.figure(figsize=(16, 10))

<Figure size 1600x1000 with 0 Axes>

<Figure size 1600x1000 with 0 Axes>

## Data Preparation

In [3]:
paths = {
    1: 'SimulationOutputs/num_epochs_1/metrics.json',
    2: 'SimulationOutputs/num_epochs_2/metrics.json',
    3: 'SimulationOutputs/num_epochs_3/metrics.json',
    4: 'SimulationOutputs/num_epochs_4/metrics.json',
    5: 'SimulationOutputs/num_epochs_5/metrics.json',
    10: 'SimulationOutputs/num_epochs_10/metrics.json'
}

In [4]:
# Load metrics for each number of epochs
num_epochs_metrics = {}
for num_epochs, path in paths.items():
    num_epochs_metrics[num_epochs] = load_and_extract_metrics(path)

In [5]:
# Prepare data for plotting
def prepare_metrics_dataframe(num_epochs_metrics, metric_type):
    data = []
    for num_epochs, metrics in num_epochs_metrics.items():
        for round_num, accuracy in enumerate(metrics[metric_type]):
            data.append({
                'Num Epochs': num_epochs,
                'Round': round_num,
                'Accuracy': accuracy
            })
    return pd.DataFrame(data)

In [None]:
server_df = prepare_metrics_dataframe(num_epochs_metrics, 'server_metrics')
client_df = prepare_metrics_dataframe(num_epochs_metrics, 'aggregated_metrics')

client_df['Round'] = client_df['Round'] + 1 # Shift rounds by 1

best_metrics = {
    'Num Epochs': list(num_epochs_metrics.keys()),
    'Server Best Metrics': [metrics['best_server_metric'] for metrics in num_epochs_metrics.values()],
    'Aggregated Best Metrics': [metrics['best_aggregated_metric'] for metrics in num_epochs_metrics.values()]
}

best_metrics_df = pd.DataFrame(best_metrics)

## First Visualization: Accuracy per Round Comparison

In [None]:
plt.figure(figsize=(15, 30))

for j, num_epochs in enumerate(sorted(num_epochs_metrics.keys())):
    plt.subplot(6, 1, j + 1)
    
    # Plot centralized accuracy
    centralized_subset = server_df[server_df['Num Epochs'] == num_epochs]
    sns.lineplot(data=centralized_subset, x='Round', y='Accuracy', 
                marker='o', color='blue', label='Centralized')
    
    # Plot distributed accuracy
    distributed_subset = client_df[client_df['Num Epochs'] == num_epochs]
    sns.lineplot(data=distributed_subset, x='Round', y='Accuracy', 
                marker='o', color='red', label='Distributed')
    
    plt.title(f'{num_epochs} Epochs - Accuracy Comparison')
    plt.xlabel('Round')
    plt.ylabel('Accuracy')
    plt.legend()

plt.tight_layout()
plt.show()

## Best Metrics as Function of Epochs

In [None]:
plt.figure(figsize=(20, 10))

plt.subplot(1, 2, 1)
sns.lineplot(data=best_metrics_df, x='Num Epochs', y='Server Best Metrics', marker='o')
plt.title('Best Server Metrics vs Number of Epochs')
plt.xlabel('Number of Epochs')
plt.ylabel('Best Accuracy')

plt.subplot(1, 2, 2)
sns.lineplot(data=best_metrics_df, x='Num Epochs', y='Aggregated Best Metrics', marker='o')
plt.title('Best Aggregated Metrics vs Number of Epochs')
plt.xlabel('Number of Epochs')
plt.ylabel('Best Accuracy')

plt.tight_layout()
plt.show()