# Comparative Analysis of Number of Partitions

This notebook performs a comprehensive analysis of metrics across different partition values.

In [4]:
# Import required libraries
import os
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from analysis_util import load_and_extract_metrics

In [5]:
def analyze_num_partitions(base_path, partition_values):
    """
    Perform comprehensive analysis of metrics across different partition values.

    Args:
        base_path (str): Base directory containing JSON files
        partition_values (list): List of partition values to analyze
    """
    # Set up seaborn style
    sns.set_theme(style="whitegrid")
    plt.figure(figsize=(20, 15))

    # Collect metrics for all partition values
    all_metrics = []

    # 1. 7x2 Subplots: Aggregated and Server Metrics
    plt.figure(figsize=(20, 25))
    for i, partitions in enumerate(partition_values, 1):
        # Construct file path
        file_path = os.path.join(base_path, f'num_partitions_{partitions}/metrics.json')
        metrics = load_and_extract_metrics(file_path)
        
        # Store metrics for later use
        metrics['num_partitions'] = partitions
        all_metrics.append(metrics)

        # Subplot for aggregated metrics
        plt.subplot(7, 2, 2*i-1)
        plt.plot(metrics['aggregated_metrics'], label=f'Partitions: {partitions}')
        plt.title(f'Aggregated Metrics (Partitions: {partitions})')
        plt.xlabel('Rounds')
        plt.ylabel('Accuracy')
        plt.legend()

        # Subplot for server metrics
        plt.subplot(7, 2, 2*i)
        plt.plot(metrics['server_metrics'], label=f'Partitions: {partitions}', color='orange')
        plt.title(f'Server Metrics (Partitions: {partitions})')
        plt.xlabel('Rounds')
        plt.ylabel('Accuracy')
        plt.legend()

    plt.tight_layout()
    plt.savefig('num_partitions_subplot_metrics.png')
    plt.close()

    # 2. Comparison Plots for Metrics
    plt.figure(figsize=(15, 10))
    
    # Aggregated Metrics Comparison
    plt.subplot(1, 2, 1)
    for metrics in all_metrics:
        plt.plot(metrics['aggregated_metrics'], 
                 label=f'{metrics["num_partitions"]} Partitions')
    plt.title('Aggregated Metrics Comparison')
    plt.xlabel('Rounds')
    plt.ylabel('Accuracy')
    plt.legend()

    # Server Metrics Comparison
    plt.subplot(1, 2, 2)
    for metrics in all_metrics:
        plt.plot(metrics['server_metrics'], 
                 label=f'{metrics["num_partitions"]} Partitions')
    plt.title('Server Metrics Comparison')
    plt.xlabel('Rounds')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.savefig('num_partitions_metrics_comparison.png')
    plt.close()

    # 3. Best Metric Scores vs Number of Partitions
    plt.figure(figsize=(15, 10))
    
    # Best Aggregated Metrics
    plt.subplot(1, 2, 1)
    best_aggregated = [metrics['best_aggregated_metric'] for metrics in all_metrics]
    plt.plot(partition_values, best_aggregated, marker='o')
    plt.title('Best Aggregated Metric Score')
    plt.xlabel('Number of Partitions')
    plt.ylabel('Best Accuracy')
    plt.xticks(partition_values)

    # Best Server Metrics
    plt.subplot(1, 2, 2)
    best_server = [metrics['best_server_metric'] for metrics in all_metrics]
    plt.plot(partition_values, best_server, marker='o', color='orange')
    plt.title('Best Server Metric Score')
    plt.xlabel('Number of Partitions')
    plt.ylabel('Best Accuracy')
    plt.xticks(partition_values)

    plt.tight_layout()
    plt.savefig('num_partitions_best_metrics.png')
    plt.close()

## Run Analysis

Set the base path to your JSON results directory and specify the partition values.

In [6]:
# Set base path and partition values
base_path = './SimulationOutputs/'
partition_values = [1, 2, 3, 4, 5, 10, 20]

# Run the analysis
analyze_num_partitions(base_path, partition_values)

<Figure size 2000x1500 with 0 Axes>

## Output

The script will generate three output images:
1. `num_partitions_subplot_metrics.png`: 7x2 subplots of metrics
2. `num_partitions_metrics_comparison.png`: Comparison of all metrics
3. `num_partitions_best_metrics.png`: Best metric scores vs number of partitions