# Adaptive Federated Learning for Agricultural IoT - Analysis

This notebook provides comprehensive analysis of the federated learning experiments.
It loads results from different experiments and compares their performance.

In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import glob

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

%matplotlib inline

## 1. Load Experiment Results

In [None]:
def load_experiment_results(results_dir='./results'):
    """Load all experiment results from the results directory"""
    
    results = {}
    results_path = Path(results_dir)
    
    # Find all strategy result files
    strategy_files = glob.glob(str(results_path / '*_strategy.json'))
    
    for strategy_file in strategy_files:
        # Extract experiment name
        exp_name = Path(strategy_file).stem.replace('_strategy', '')
        
        try:
            # Load strategy results
            with open(strategy_file, 'r') as f:
                strategy_data = json.load(f)
            
            # Load history results
            history_file = strategy_file.replace('_strategy.json', '_history.json')
            if Path(history_file).exists():
                with open(history_file, 'r') as f:
                    history_data = json.load(f)
            else:
                history_data = {}
            
            # Load config
            config_file = strategy_file.replace('_strategy.json', '_config.json')
            if Path(config_file).exists():
                with open(config_file, 'r') as f:
                    config_data = json.load(f)
            else:
                config_data = {}
            
            results[exp_name] = {
                'strategy': strategy_data,
                'history': history_data,
                'config': config_data
            }
            
            print(f"Loaded experiment: {exp_name}")
            
        except Exception as e:
            print(f"Error loading {strategy_file}: {e}")
    
    return results

# Load results
results = load_experiment_results()
print(f"\nLoaded {len(results)} experiments")
for name in results.keys():
    print(f"  - {name}")

## 2. Performance Comparison

In [None]:
def extract_performance_metrics(results):
    """Extract key performance metrics from results"""
    
    metrics_df = []
    
    for exp_name, data in results.items():
        strategy_data = data.get('strategy', {})
        history_data = data.get('history', {})
        
        training_history = strategy_data.get('training_history', {})
        
        # Extract metrics
        global_accuracies = training_history.get('global_accuracy', [])
        global_losses = training_history.get('global_loss', [])
        client_accuracies = training_history.get('client_accuracies', [])
        
        if global_accuracies:
            final_accuracy = global_accuracies[-1]
            max_accuracy = max(global_accuracies)
            convergence_round = len(global_accuracies)
        else:
            final_accuracy = max_accuracy = convergence_round = 0
        
        if global_losses:
            final_loss = global_losses[-1]
            min_loss = min(global_losses)
        else:
            final_loss = min_loss = 0
        
        # Calculate client accuracy variance (fairness metric)
        if client_accuracies:
            final_client_accs = client_accuracies[-1] if client_accuracies else []
            if final_client_accs:
                acc_std = np.std(final_client_accs)
                acc_variance = np.var(final_client_accs)
            else:
                acc_std = acc_variance = 0
        else:
            acc_std = acc_variance = 0
        
        # Communication efficiency
        communication_rounds = training_history.get('communication_rounds', convergence_round)
        
        metrics_df.append({
            'Experiment': exp_name,
            'Final Accuracy': final_accuracy,
            'Max Accuracy': max_accuracy,
            'Final Loss': final_loss,
            'Min Loss': min_loss,
            'Convergence Rounds': convergence_round,
            'Accuracy Std': acc_std,
            'Accuracy Variance': acc_variance,
            'Communication Rounds': communication_rounds
        })
    
    return pd.DataFrame(metrics_df)

# Extract and display metrics
if results:
    metrics_df = extract_performance_metrics(results)
    print("Performance Comparison:")
    print(metrics_df.round(4))
else:
    print("No results found. Please run experiments first.")

## 3. Visualization of Results

In [None]:
def plot_experiment_comparison(results):
    """Create comprehensive comparison plots"""
    
    if not results:
        print("No results to plot")
        return
    
    fig, axes = plt.subplots(2, 3, figsize=(20, 12))
    fig.suptitle('Federated Learning Experiment Comparison', fontsize=16, fontweight='bold')
    
    # Plot 1: Global Accuracy Over Rounds
    ax1 = axes[0, 0]
    for exp_name, data in results.items():
        accuracies = data.get('strategy', {}).get('training_history', {}).get('global_accuracy', [])
        if accuracies:
            rounds = list(range(1, len(accuracies) + 1))
            ax1.plot(rounds, accuracies, marker='o', linewidth=2, label=exp_name.replace('_', ' '))
    
    ax1.set_title('Global Accuracy vs Rounds')
    ax1.set_xlabel('Round')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Plot 2: Global Loss Over Rounds
    ax2 = axes[0, 1]
    for exp_name, data in results.items():
        losses = data.get('strategy', {}).get('training_history', {}).get('global_loss', [])
        if losses:
            rounds = list(range(1, len(losses) + 1))
            ax2.plot(rounds, losses, marker='s', linewidth=2, label=exp_name.replace('_', ' '))
    
    ax2.set_title('Global Loss vs Rounds')
    ax2.set_xlabel('Round')
    ax2.set_ylabel('Loss')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # Plot 3: Final Accuracy Comparison
    ax3 = axes[0, 2]
    if 'metrics_df' in globals():
        metrics_df.plot(x='Experiment', y='Final Accuracy', kind='bar', ax=ax3, legend=False, color='skyblue')
        ax3.set_title('Final Accuracy Comparison')
        ax3.set_ylabel('Accuracy')
        ax3.tick_params(axis='x', rotation=45)
    
    # Plot 4: Accuracy Variance (Fairness)
    ax4 = axes[1, 0]
    for exp_name, data in results.items():
        client_accs = data.get('strategy', {}).get('training_history', {}).get('client_accuracies', [])
        if client_accs:
            rounds = list(range(1, len(client_accs) + 1))
            acc_stds = [np.std(accs) if accs else 0 for accs in client_accs]
            ax4.plot(rounds, acc_stds, marker='^', linewidth=2, label=exp_name.replace('_', ' '))
    
    ax4.set_title('Client Accuracy Variance vs Rounds')
    ax4.set_xlabel('Round')
    ax4.set_ylabel('Accuracy Std Dev')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    # Plot 5: Communication Efficiency
    ax5 = axes[1, 1]
    if 'metrics_df' in globals():
        metrics_df.plot(x='Experiment', y='Communication Rounds', kind='bar', ax=ax5, legend=False, color='lightcoral')
        ax5.set_title('Communication Rounds')
        ax5.set_ylabel('Rounds')
        ax5.tick_params(axis='x', rotation=45)
    
    # Plot 6: Convergence Speed
    ax6 = axes[1, 2]
    convergence_data = []
    exp_names = []
    
    for exp_name, data in results.items():
        accuracies = data.get('strategy', {}).get('training_history', {}).get('global_accuracy', [])
        if accuracies:
            # Find round where accuracy reaches 90% of max
            max_acc = max(accuracies)
            target_acc = 0.9 * max_acc
            convergence_round = next((i for i, acc in enumerate(accuracies) if acc >= target_acc), len(accuracies))
            convergence_data.append(convergence_round + 1)
            exp_names.append(exp_name.replace('_', ' '))
    
    if convergence_data:
        ax6.bar(exp_names, convergence_data, color='lightgreen')
        ax6.set_title('Convergence Speed (90% of Max Accuracy)')
        ax6.set_ylabel('Rounds to Convergence')
        ax6.tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()

# Generate plots
plot_experiment_comparison(results)

## 4. Statistical Analysis

In [None]:
def statistical_analysis(results):
    """Perform statistical analysis of results"""
    
    if not results or 'metrics_df' not in globals():
        print("No data available for statistical analysis")
        return
    
    print("=== Statistical Analysis ===\n")
    
    # Summary statistics
    print("Summary Statistics:")
    print(metrics_df.describe().round(4))
    print()
    
    # Best performing experiment
    best_accuracy = metrics_df.loc[metrics_df['Final Accuracy'].idxmax()]
    print(f"Best Final Accuracy: {best_accuracy['Experiment']} ({best_accuracy['Final Accuracy']:.4f})")
    
    best_convergence = metrics_df.loc[metrics_df['Convergence Rounds'].idxmin()]
    print(f"Fastest Convergence: {best_convergence['Experiment']} ({best_convergence['Convergence Rounds']} rounds)")
    
    most_fair = metrics_df.loc[metrics_df['Accuracy Std'].idxmin()]
    print(f"Most Fair (lowest std): {most_fair['Experiment']} ({most_fair['Accuracy Std']:.4f})")
    print()
    
    # Improvement analysis
    baseline_exp = None
    for exp in metrics_df['Experiment']:
        if 'baseline' in exp.lower() or 'fedavg' in exp.lower():
            baseline_exp = exp
            break
    
    if baseline_exp:
        baseline_acc = metrics_df[metrics_df['Experiment'] == baseline_exp]['Final Accuracy'].iloc[0]
        print(f"Improvements over baseline ({baseline_exp}): {baseline_acc:.4f}")
        
        for _, row in metrics_df.iterrows():
            if row['Experiment'] != baseline_exp:
                improvement = (row['Final Accuracy'] - baseline_acc) / baseline_acc * 100
                print(f"  {row['Experiment']}: {improvement:+.2f}%")
        print()
    
    # Correlation analysis
    print("Correlation Analysis:")
    corr_cols = ['Final Accuracy', 'Convergence Rounds', 'Accuracy Std', 'Communication Rounds']
    correlation_matrix = metrics_df[corr_cols].corr()
    print(correlation_matrix.round(3))

# Perform analysis
statistical_analysis(results)

## 5. Adaptive Participation Analysis

In [None]:
def analyze_adaptive_participation(results):
    """Analyze adaptive participation patterns"""
    
    adaptive_results = {name: data for name, data in results.items() 
                       if 'adaptive' in name.lower() or 'combined' in name.lower()}
    
    if not adaptive_results:
        print("No adaptive participation experiments found")
        return
    
    print("=== Adaptive Participation Analysis ===\n")
    
    for exp_name, data in adaptive_results.items():
        print(f"Experiment: {exp_name}")
        
        training_history = data.get('strategy', {}).get('training_history', {})
        participation_counts = training_history.get('participation_counts', {})
        
        if participation_counts:
            client_ids = list(participation_counts.keys())
            counts = list(participation_counts.values())
            
            print(f"  Client participation distribution:")
            print(f"    Mean: {np.mean(counts):.2f}")
            print(f"    Std: {np.std(counts):.2f}")
            print(f"    Min: {min(counts)}, Max: {max(counts)}")
            print(f"    Fairness ratio (min/max): {min(counts)/max(counts):.3f}")
        
        print()

# Analyze adaptive participation
analyze_adaptive_participation(results)

## 6. Clustering Analysis

In [None]:
def analyze_clustering(results):
    """Analyze clustering patterns and personalization effects"""
    
    clustering_results = {name: data for name, data in results.items() 
                         if 'cluster' in name.lower() or 'personalization' in name.lower() or 'combined' in name.lower()}
    
    if not clustering_results:
        print("No clustering experiments found")
        return
    
    print("=== Clustering and Personalization Analysis ===\n")
    
    for exp_name, data in clustering_results.items():
        print(f"Experiment: {exp_name}")
        
        training_history = data.get('strategy', {}).get('training_history', {})
        cluster_assignments = training_history.get('cluster_assignments', {})
        
        if cluster_assignments:
            # Analyze cluster stability
            rounds = sorted(cluster_assignments.keys())
            
            if len(rounds) > 1:
                # Calculate cluster assignment changes
                changes = 0
                total_assignments = 0
                
                for i in range(1, len(rounds)):
                    prev_assignments = cluster_assignments[rounds[i-1]]
                    curr_assignments = cluster_assignments[rounds[i]]
                    
                    for client_id in prev_assignments:
                        if client_id in curr_assignments:
                            total_assignments += 1
                            if prev_assignments[client_id] != curr_assignments[client_id]:
                                changes += 1
                
                stability = 1 - (changes / total_assignments) if total_assignments > 0 else 1
                print(f"  Cluster stability: {stability:.3f}")
            
            # Analyze final cluster distribution
            if rounds:
                final_assignments = cluster_assignments[rounds[-1]]
                cluster_sizes = {}
                for client_id, cluster_id in final_assignments.items():
                    cluster_sizes[cluster_id] = cluster_sizes.get(cluster_id, 0) + 1
                
                print(f"  Final cluster sizes: {dict(sorted(cluster_sizes.items()))}")
                
                # Calculate cluster balance
                sizes = list(cluster_sizes.values())
                if sizes:
                    balance = min(sizes) / max(sizes)
                    print(f"  Cluster balance (min/max): {balance:.3f}")
        
        # Analyze personalization benefit
        client_accuracies = training_history.get('client_accuracies', [])
        if client_accuracies:
            final_client_accs = client_accuracies[-1]
            if final_client_accs:
                print(f"  Client accuracy range: [{min(final_client_accs):.4f}, {max(final_client_accs):.4f}]")
                print(f"  Client accuracy std: {np.std(final_client_accs):.4f}")
        
        print()

# Analyze clustering
analyze_clustering(results)

## 7. Recommendations and Conclusions

In [None]:
def generate_recommendations(results):
    """Generate recommendations based on experimental results"""
    
    if not results or 'metrics_df' not in globals():
        print("No data available for recommendations")
        return
    
    print("=== Recommendations for Agricultural IoT Deployment ===\n")
    
    # Find best overall approach
    best_overall = metrics_df.loc[metrics_df['Final Accuracy'].idxmax()]
    most_fair = metrics_df.loc[metrics_df['Accuracy Std'].idxmin()]
    fastest_convergence = metrics_df.loc[metrics_df['Convergence Rounds'].idxmin()]
    
    print(f"1. **Best Overall Performance**: {best_overall['Experiment']}")
    print(f"   - Achieves highest final accuracy: {best_overall['Final Accuracy']:.4f}")
    print(f"   - Convergence time: {best_overall['Convergence Rounds']} rounds")
    print()
    
    print(f"2. **Most Fair Distribution**: {most_fair['Experiment']}")
    print(f"   - Lowest accuracy variance: {most_fair['Accuracy Std']:.4f}")
    print(f"   - Good for ensuring all farms benefit equally")
    print()
    
    print(f"3. **Fastest Convergence**: {fastest_convergence['Experiment']}")
    print(f"   - Converges in: {fastest_convergence['Convergence Rounds']} rounds")
    print(f"   - Best for time-critical deployments")
    print()
    
    # Practical recommendations
    print("**Practical Deployment Recommendations:**")
    print("")
    print("1. **For Large-Scale Agricultural Networks:**")
    if 'combined' in best_overall['Experiment'].lower():
        print("   - Use combined adaptive participation + clustering approach")
        print("   - Provides best balance of accuracy and fairness")
    print("")
    
    print("2. **For Resource-Constrained Environments:**")
    print("   - Implement adaptive client selection to handle unreliable devices")
    print("   - Use compression techniques for communication efficiency")
    print("")
    
    print("3. **For Heterogeneous Farm Conditions:**")
    print("   - Deploy clustered personalization for regional adaptations")
    print("   - Group farms by climate, soil type, and crop patterns")
    print("")
    
    print("4. **For Data Privacy Concerns:**")
    print("   - Federated learning keeps sensitive farm data local")
    print("   - Additional differential privacy can be added if needed")
    print("")
    
    print("**Key Implementation Guidelines:**")
    print("- Start with 3-5 clusters for regional personalization")
    print("- Use adaptive participation with 60-70% client sampling")
    print("- Implement FedProx (μ=0.01) for handling device heterogeneity")
    print("- Plan for 25-40 communication rounds for convergence")
    print("- Monitor client participation fairness and adjust weights accordingly")

# Generate recommendations
generate_recommendations(results)

## 8. Export Results for Report

In [None]:
def export_results_summary(results, output_file='experiment_summary.csv'):
    """Export results summary for inclusion in research paper"""
    
    if 'metrics_df' not in globals():
        print("No metrics available to export")
        return
    
    # Create comprehensive summary
    summary_df = metrics_df.copy()
    
    # Add additional metrics
    summary_df['Accuracy Improvement %'] = 0.0
    summary_df['Fairness Score'] = 1 / (1 + summary_df['Accuracy Std'])  # Higher is better
    summary_df['Efficiency Score'] = summary_df['Max Accuracy'] / summary_df['Convergence Rounds']
    
    # Calculate improvements over baseline
    baseline_acc = None
    for _, row in summary_df.iterrows():
        if 'baseline' in row['Experiment'].lower() or 'fedavg' in row['Experiment'].lower():
            baseline_acc = row['Final Accuracy']
            break
    
    if baseline_acc:
        summary_df['Accuracy Improvement %'] = ((summary_df['Final Accuracy'] - baseline_acc) / baseline_acc * 100).round(2)
    
    # Reorder columns for clarity
    column_order = [
        'Experiment', 'Final Accuracy', 'Max Accuracy', 'Accuracy Improvement %',
        'Final Loss', 'Convergence Rounds', 'Accuracy Std', 'Fairness Score',
        'Efficiency Score', 'Communication Rounds'
    ]
    
    summary_df = summary_df[column_order].round(4)
    
    # Export to CSV
    summary_df.to_csv(output_file, index=False)
    print(f"Results summary exported to {output_file}")
    
    # Display summary
    print("\nExperiment Summary:")
    print(summary_df)
    
    return summary_df

# Export results
if results:
    summary = export_results_summary(results)
else:
    print("No results to export")