# PhysioNet EEG: Generate Paper-Ready Results

This notebook generates publication-ready tables and figures from the experimental results.

**Expected Runtime**: 5-10 minutes

**Input**: 
- `results/summary_all_models.csv` (from notebook 01)
- `results/channel_selection_results.csv` (from notebook 02)
- `results/retention_analysis.csv` (from notebook 02)

**Output**: 
- `table_ii_model_comparison.tex` - LaTeX table for model comparison
- `table_iii_retention.tex` - LaTeX table for retention analysis
- `figure_model_comparison.pdf` - Bar chart comparing all models
- `figure_retention_curves.pdf` - Line plots for retention analysis
- `paper_summary.json` - Summary statistics for paper

## Setup

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json

# Set plotting style
plt.style.use('seaborn-v0_8-paper')
sns.set_palette("husl")
plt.rcParams['figure.dpi'] = 300
plt.rcParams['font.size'] = 10
plt.rcParams['font.family'] = 'serif'

# Paths
RESULTS_DIR = './results'
FIGURES_DIR = './figures'

os.makedirs(FIGURES_DIR, exist_ok=True)

## Load Results

In [None]:
# Load all results
model_results = pd.read_csv(os.path.join(RESULTS_DIR, 'summary_all_models.csv'))
channel_selection_results = pd.read_csv(os.path.join(RESULTS_DIR, 'channel_selection_results.csv'))
retention_results = pd.read_csv(os.path.join(RESULTS_DIR, 'retention_analysis.csv'))

print("Results loaded successfully!")
print(f"\nModel results shape: {model_results.shape}")
print(f"Channel selection results shape: {channel_selection_results.shape}")
print(f"Retention results shape: {retention_results.shape}")

## Table II: Model Comparison

Generate LaTeX table comparing all baseline models:

In [None]:
# Sort by accuracy
model_results_sorted = model_results.sort_values('mean_accuracy', ascending=False).reset_index(drop=True)

# Add rank column
model_results_sorted['rank'] = range(1, len(model_results_sorted) + 1)

# Format for display
model_results_sorted['accuracy_str'] = model_results_sorted.apply(
    lambda row: f"{row['mean_accuracy']*100:.2f} ± {row['std_accuracy']*100:.2f}",
    axis=1
)

print("Model Comparison Table:")
print(model_results_sorted[['rank', 'model', 'accuracy_str']])

In [None]:
# Generate LaTeX table
latex_table = r"""
\begin{table}[htbp]
\centering
\caption{Comparison of baseline methods on PhysioNet Motor Imagery dataset}
\label{tab:model_comparison}
\begin{tabular}{clc}
\toprule
\textbf{Rank} & \textbf{Method} & \textbf{Accuracy (\%)} \\\\
\midrule
"""

for _, row in model_results_sorted.iterrows():
    # Bold the winner
    if row['rank'] == 1:
        latex_table += f"{row['rank']} & \\textbf{{{row['model']}}} & \\textbf{{{row['accuracy_str']}}} \\\\\\
"
    else:
        latex_table += f"{row['rank']} & {row['model']} & {row['accuracy_str']} \\\\\\
"

latex_table += r"""
\bottomrule
\end{tabular}
\end{table}
"""

# Save LaTeX table
table_path = os.path.join(RESULTS_DIR, 'table_ii_model_comparison.tex')
with open(table_path, 'w') as f:
    f.write(latex_table)

print(f"\nLaTeX table saved to: {table_path}")
print("\nTable preview:")
print(latex_table)

## Table III: Channel Selection and Retention Analysis

In [None]:
# Create retention table for different k values
retention_k_display = [10, 20, 30, 40, 50, 64]
retention_display = retention_results[retention_results['k'].isin(retention_k_display)].copy()

retention_display['accuracy_str'] = retention_display.apply(
    lambda row: f"{row['mean_accuracy']*100:.2f} ± {row['std_accuracy']*100:.2f}",
    axis=1
)

# Also add best channel selection results
cs_summary = channel_selection_results.groupby(['model', 'method']).apply(
    lambda x: x.loc[x['mean_accuracy'].idxmax()]
).reset_index(drop=True)

cs_summary['accuracy_str'] = cs_summary.apply(
    lambda row: f"{row['mean_accuracy']*100:.2f} ± {row['std_accuracy']*100:.2f}",
    axis=1
)

print("Best Channel Selection Results:")
print(cs_summary[['model', 'method', 'k', 'accuracy_str']])

In [None]:
# Generate LaTeX table for retention analysis
latex_retention = r"""
\begin{table}[htbp]
\centering
\caption{Performance retention with channel selection using Gate Selection method}
\label{tab:retention_analysis}
\begin{tabular}{cc}
\toprule
\textbf{Channels (k)} & \textbf{Accuracy (\%)} \\\\
\midrule
"""

for _, row in retention_display.iterrows():
    latex_retention += f"{row['k']} & {row['accuracy_str']} \\\\\\
"

latex_retention += r"""
\bottomrule
\end{tabular}
\end{table}
"""

# Save LaTeX table
retention_table_path = os.path.join(RESULTS_DIR, 'table_iii_retention.tex')
with open(retention_table_path, 'w') as f:
    f.write(latex_retention)

print(f"\nRetention table saved to: {retention_table_path}")

## Figure 1: Model Comparison Bar Chart

In [None]:
# Create bar chart comparing all models
fig, ax = plt.subplots(figsize=(10, 6))

# Plot bars
x_pos = np.arange(len(model_results_sorted))
bars = ax.bar(x_pos, model_results_sorted['mean_accuracy'] * 100, 
              yerr=model_results_sorted['std_accuracy'] * 100,
              capsize=5, alpha=0.8, edgecolor='black', linewidth=1.5)

# Highlight the winner in different color
bars[0].set_color('#FF6B6B')
bars[0].set_alpha(1.0)

# Labels and formatting
ax.set_xlabel('Method', fontsize=12, fontweight='bold')
ax.set_ylabel('Accuracy (%)', fontsize=12, fontweight='bold')
ax.set_title('Comparison of Baseline Methods on PhysioNet Motor Imagery Dataset', 
             fontsize=14, fontweight='bold', pad=20)
ax.set_xticks(x_pos)
ax.set_xticklabels(model_results_sorted['model'], rotation=45, ha='right')
ax.grid(axis='y', alpha=0.3, linestyle='--')
ax.set_ylim([75, 90])

# Add value labels on bars
for i, (bar, row) in enumerate(zip(bars, model_results_sorted.itertuples())):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height + 0.5,
            f'{row.mean_accuracy*100:.2f}',
            ha='center', va='bottom', fontsize=9, fontweight='bold')

plt.tight_layout()

# Save figure
fig_path = os.path.join(FIGURES_DIR, 'figure_model_comparison.pdf')
plt.savefig(fig_path, format='pdf', bbox_inches='tight', dpi=300)
plt.savefig(os.path.join(FIGURES_DIR, 'figure_model_comparison.png'), 
            format='png', bbox_inches='tight', dpi=300)

print(f"\nModel comparison figure saved to: {fig_path}")
plt.show()

## Figure 2: Retention Analysis Curves

In [None]:
# Create retention curve
fig, ax = plt.subplots(figsize=(10, 6))

# Plot retention curve
ax.plot(retention_results['k'], retention_results['mean_accuracy'] * 100,
        marker='o', linewidth=2, markersize=8, label='Adaptive-Gating-EEG-ARNN (Gate Selection)',
        color='#4ECDC4')

# Fill error region
ax.fill_between(retention_results['k'],
                (retention_results['mean_accuracy'] - retention_results['std_accuracy']) * 100,
                (retention_results['mean_accuracy'] + retention_results['std_accuracy']) * 100,
                alpha=0.2, color='#4ECDC4')

# Add baseline (64 channels)
baseline_acc = model_results_sorted[model_results_sorted['model'] == 'Adaptive-Gating-EEG-ARNN']['mean_accuracy'].values[0]
ax.axhline(y=baseline_acc * 100, color='red', linestyle='--', linewidth=2, 
           label=f'Full 64 channels ({baseline_acc*100:.2f}%)', alpha=0.7)

# Labels and formatting
ax.set_xlabel('Number of Channels (k)', fontsize=12, fontweight='bold')
ax.set_ylabel('Accuracy (%)', fontsize=12, fontweight='bold')
ax.set_title('Performance Retention with Channel Selection', 
             fontsize=14, fontweight='bold', pad=20)
ax.grid(True, alpha=0.3, linestyle='--')
ax.legend(fontsize=10, loc='lower right')
ax.set_xlim([0, 70])
ax.set_ylim([75, 90])

# Annotate key points
key_k_values = [10, 20, 30]
for k in key_k_values:
    row = retention_results[retention_results['k'] == k].iloc[0]
    ax.annotate(f'k={k}\n{row["mean_accuracy"]*100:.2f}%',
                xy=(k, row['mean_accuracy']*100),
                xytext=(k, row['mean_accuracy']*100 - 3),
                ha='center', fontsize=8,
                bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.3))

plt.tight_layout()

# Save figure
retention_fig_path = os.path.join(FIGURES_DIR, 'figure_retention_curves.pdf')
plt.savefig(retention_fig_path, format='pdf', bbox_inches='tight', dpi=300)
plt.savefig(os.path.join(FIGURES_DIR, 'figure_retention_curves.png'),
            format='png', bbox_inches='tight', dpi=300)

print(f"\nRetention curve figure saved to: {retention_fig_path}")
plt.show()

## Figure 3: Channel Selection Method Comparison

In [None]:
# Compare different channel selection methods for Adaptive-Gating-EEG-ARNN
adaptive_gating_cs = channel_selection_results[
    channel_selection_results['model'] == 'Adaptive-Gating-EEG-ARNN'
]

fig, ax = plt.subplots(figsize=(10, 6))

# Plot each method
for method in ['edge', 'aggregation', 'gate']:
    method_data = adaptive_gating_cs[adaptive_gating_cs['method'] == method]
    ax.plot(method_data['k'], method_data['mean_accuracy'] * 100,
            marker='o', linewidth=2, markersize=6, label=method.upper())
    
    # Add error bars
    ax.fill_between(method_data['k'],
                    (method_data['mean_accuracy'] - method_data['std_accuracy']) * 100,
                    (method_data['mean_accuracy'] + method_data['std_accuracy']) * 100,
                    alpha=0.15)

# Labels and formatting
ax.set_xlabel('Number of Channels (k)', fontsize=12, fontweight='bold')
ax.set_ylabel('Accuracy (%)', fontsize=12, fontweight='bold')
ax.set_title('Comparison of Channel Selection Methods\n(Adaptive-Gating-EEG-ARNN)', 
             fontsize=14, fontweight='bold', pad=20)
ax.grid(True, alpha=0.3, linestyle='--')
ax.legend(fontsize=10, loc='lower right', title='Selection Method')

plt.tight_layout()

# Save figure
cs_fig_path = os.path.join(FIGURES_DIR, 'figure_channel_selection_comparison.pdf')
plt.savefig(cs_fig_path, format='pdf', bbox_inches='tight', dpi=300)
plt.savefig(os.path.join(FIGURES_DIR, 'figure_channel_selection_comparison.png'),
            format='png', bbox_inches='tight', dpi=300)

print(f"\nChannel selection comparison figure saved to: {cs_fig_path}")
plt.show()

## Summary Statistics for Paper

In [None]:
# Create summary dictionary
summary = {
    'winner': {
        'model': model_results_sorted.iloc[0]['model'],
        'accuracy': float(model_results_sorted.iloc[0]['mean_accuracy']),
        'std': float(model_results_sorted.iloc[0]['std_accuracy']),
        'accuracy_pct': f"{model_results_sorted.iloc[0]['mean_accuracy']*100:.2f}",
        'std_pct': f"{model_results_sorted.iloc[0]['std_accuracy']*100:.2f}"
    },
    'all_models': {},
    'channel_selection': {
        'best_method': None,
        'best_k': None,
        'best_accuracy': None
    },
    'retention': {
        'channels_for_90pct_retention': None,
        'accuracy_with_30_channels': None
    }
}

# Add all model accuracies
for _, row in model_results_sorted.iterrows():
    summary['all_models'][row['model']] = {
        'rank': int(row['rank']),
        'accuracy': float(row['mean_accuracy']),
        'std': float(row['std_accuracy']),
        'accuracy_pct': f"{row['mean_accuracy']*100:.2f}",
        'std_pct': f"{row['std_accuracy']*100:.2f}"
    }

# Best channel selection method
best_cs = cs_summary.loc[cs_summary['mean_accuracy'].idxmax()]
summary['channel_selection']['best_method'] = best_cs['method']
summary['channel_selection']['best_k'] = int(best_cs['k'])
summary['channel_selection']['best_accuracy'] = f"{best_cs['mean_accuracy']*100:.2f}"

# Retention analysis
baseline_acc = model_results_sorted.iloc[0]['mean_accuracy']
target_acc = baseline_acc * 0.9  # 90% retention

retention_90pct = retention_results[retention_results['mean_accuracy'] >= target_acc]
if len(retention_90pct) > 0:
    min_k = retention_90pct['k'].min()
    summary['retention']['channels_for_90pct_retention'] = int(min_k)

acc_30 = retention_results[retention_results['k'] == 30]
if len(acc_30) > 0:
    summary['retention']['accuracy_with_30_channels'] = f"{acc_30.iloc[0]['mean_accuracy']*100:.2f}"

# Save summary
summary_path = os.path.join(RESULTS_DIR, 'paper_summary.json')
with open(summary_path, 'w') as f:
    json.dump(summary, f, indent=2)

print("\nPaper Summary:")
print(json.dumps(summary, indent=2))
print(f"\nSummary saved to: {summary_path}")

## Final Ranking Table

In [None]:
# Create final ranking CSV for easy reference
final_ranking = model_results_sorted[['rank', 'model', 'mean_accuracy', 'std_accuracy']].copy()
final_ranking['accuracy_pct'] = final_ranking['mean_accuracy'] * 100
final_ranking['std_pct'] = final_ranking['std_accuracy'] * 100

ranking_path = os.path.join(RESULTS_DIR, 'final_ranking.csv')
final_ranking.to_csv(ranking_path, index=False)

print("\nFinal Model Ranking:")
print(final_ranking[['rank', 'model', 'accuracy_pct', 'std_pct']])
print(f"\nRanking saved to: {ranking_path}")

## Verification

Verify that all outputs are ready for the paper:

In [None]:
# Check all required files
required_files = [
    ('results/table_ii_model_comparison.tex', 'LaTeX Table II - Model Comparison'),
    ('results/table_iii_retention.tex', 'LaTeX Table III - Retention Analysis'),
    ('figures/figure_model_comparison.pdf', 'Figure 1 - Model Comparison'),
    ('figures/figure_retention_curves.pdf', 'Figure 2 - Retention Curves'),
    ('figures/figure_channel_selection_comparison.pdf', 'Figure 3 - Channel Selection Methods'),
    ('results/paper_summary.json', 'Paper Summary Statistics'),
    ('results/final_ranking.csv', 'Final Model Ranking'),
]

print("\n" + "="*60)
print("VERIFICATION: Paper-Ready Outputs")
print("="*60 + "\n")

all_present = True
for filepath, description in required_files:
    if os.path.exists(filepath):
        file_size = os.path.getsize(filepath)
        print(f"[OK] {description}")
        print(f"     Path: {filepath}")
        print(f"     Size: {file_size:,} bytes\n")
    else:
        print(f"[MISSING] {description}")
        print(f"          Expected: {filepath}\n")
        all_present = False

if all_present:
    print("\n" + "="*60)
    print("SUCCESS: All paper outputs generated successfully!")
    print("="*60)
else:
    print("\n" + "="*60)
    print("WARNING: Some outputs are missing!")
    print("="*60)

## Key Findings Summary

In [None]:
print("\n" + "="*60)
print("KEY FINDINGS FOR PAPER")
print("="*60 + "\n")

winner = summary['winner']
print(f"1. BEST MODEL: {winner['model']}")
print(f"   Accuracy: {winner['accuracy_pct']}% ± {winner['std_pct']}%\n")

print(f"2. CHANNEL SELECTION:")
print(f"   Best Method: {summary['channel_selection']['best_method'].upper()}")
print(f"   Optimal k: {summary['channel_selection']['best_k']}")
print(f"   Accuracy: {summary['channel_selection']['best_accuracy']}%\n")

if summary['retention']['channels_for_90pct_retention']:
    print(f"3. RETENTION:")
    print(f"   90% retention achieved with: {summary['retention']['channels_for_90pct_retention']} channels")
    print(f"   Accuracy with 30 channels: {summary['retention']['accuracy_with_30_channels']}%")
    reduction = (1 - 30/64) * 100
    print(f"   Channel reduction: {reduction:.1f}%\n")

print(f"4. MODEL RANKING:")
for i, row in enumerate(model_results_sorted.itertuples(), 1):
    print(f"   {i}. {row.model}: {row.mean_accuracy*100:.2f}%")

print("\n" + "="*60)
print("All results ready for paper submission!")
print("="*60)