# Enhanced Model Testing Summary with Degree-Based Error Analysis

This notebook extends the original model testing summary (Notebook 5) with comprehensive
degree-based error analysis for better understanding of model performance patterns.

## Enhanced Features

- **Degree-stratified performance metrics**: Analyze errors by node degree combinations
- **Bias-variance decomposition**: Understand error sources across degree ranges
- **Enhanced visualizations**: Heatmaps and plots by degree categories
- **Scalable framework**: Optimized for both small graphs and HPC deployment

## Workflow

1. Load and aggregate model performance data
2. Apply degree-based analysis to each edge type
3. Generate comprehensive degree-stratified metrics
4. Create enhanced visualizations
5. Provide degree-aware model recommendations

In [1]:
# Papermill parameters (optional)
edge_types = None  # None = use small graphs for testing, or provide list
small_graph_mode = True  # Set to False for full HPC analysis
max_edges_small = 10000  # Maximum edges for small graph classification

In [2]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
import scipy.sparse as sp
from typing import Dict, List, Tuple
import warnings
warnings.filterwarnings('ignore')

# Setup paths
repo_dir = Path.cwd().parent
src_dir = repo_dir / 'src'
data_dir = repo_dir / 'data'
results_dir = repo_dir / 'results' / 'model_comparison'
summary_dir = repo_dir / 'results' / 'model_comparison_summary_with_degree'
degree_analysis_dir = repo_dir / 'results' / 'degree_analysis_enhanced'

summary_dir.mkdir(parents=True, exist_ok=True)
degree_analysis_dir.mkdir(parents=True, exist_ok=True)

sys.path.append(str(src_dir))

# Import modules
from degree_analysis import DegreeAnalyzer, identify_small_graphs, run_degree_analysis_pipeline

print(f"Repository directory: {repo_dir}")
print(f"Results directory: {results_dir}")
print(f"Summary output directory: {summary_dir}")
print(f"Degree analysis output directory: {degree_analysis_dir}")
print(f"Small graph mode: {small_graph_mode}")

# Set plot style
sns.set_style('whitegrid')
plt.rcParams['figure.dpi'] = 100

Repository directory: /projects/lgillenwater@xsede.org/repositories/Context-Aware-Path-Probability
Results directory: /projects/lgillenwater@xsede.org/repositories/Context-Aware-Path-Probability/results/model_comparison
Summary output directory: /projects/lgillenwater@xsede.org/repositories/Context-Aware-Path-Probability/results/model_comparison_summary_with_degree
Degree analysis output directory: /projects/lgillenwater@xsede.org/repositories/Context-Aware-Path-Probability/results/degree_analysis_enhanced
Small graph mode: True


## 1. Identify Target Edge Types

In [3]:
# Determine which edge types to analyze
if edge_types is None:
    if small_graph_mode:
        # Use small graphs for local testing
        small_graphs = identify_small_graphs(data_dir, max_edges=max_edges_small)
        edge_types = [g['edge_type'] for g in small_graphs[:5]]  # Top 5 smallest
        print(f"Small graph mode: analyzing {len(edge_types)} smallest edge types")
        
        # Display selected graphs
        small_df = pd.DataFrame(small_graphs[:5])
        small_df['density_pct'] = small_df['density'] * 100
        print("\nSelected edge types:")
        print(small_df[['edge_type', 'n_edges', 'shape', 'density_pct']].to_string(index=False))
    else:
        # Use all available edge types
        DEFAULT_EDGE_TYPES = [
            "AdG", "AeG", "AuG", "CbG", "CcSE", "CdG", "CpD", "CrC", "CtD", "CuG",
            "DaG", "DdG", "DlA", "DpS", "DrD", "DuG", "GcG", "GiG", "GpBP", "GpCC",
            "GpMF", "GpPW", "Gr>G", "PCiC"
        ]
        edge_types = DEFAULT_EDGE_TYPES
        print(f"Full analysis mode: analyzing {len(edge_types)} edge types")

print(f"\nTarget edge types: {edge_types}")

Small graph mode: analyzing 5 smallest edge types

Selected edge types:
edge_type  n_edges       shape  density_pct
      CpD      390 (1552, 137)     0.183422
      CtD      755 (1552, 137)     0.355087
     PCiC     1029 (345, 1552)     0.192178
      DrD     1086  (137, 137)     5.786137
      DpS     3357  (137, 438)     5.594441

Target edge types: ['CpD', 'CtD', 'PCiC', 'DrD', 'DpS']


## 2. Load Original Model Performance Data

In [4]:
def load_edge_type_results(edge_type: str) -> Dict:
    """Load all result files for a given edge type."""
    edge_results_dir = results_dir / f"{edge_type}_results"
    
    if not edge_results_dir.exists():
        return None
    
    results = {'edge_type': edge_type}
    
    # Load model comparison metrics
    comparison_file = edge_results_dir / 'model_comparison.csv'
    if comparison_file.exists():
        results['model_comparison'] = pd.read_csv(comparison_file)
    
    # Load analytical comparison
    analytical_file = edge_results_dir / 'models_vs_analytical_comparison.csv'
    if analytical_file.exists():
        results['analytical_comparison'] = pd.read_csv(analytical_file)
    
    # Load empirical comparison
    empirical_file = edge_results_dir / 'test_vs_empirical_comparison.csv'
    if empirical_file.exists():
        results['empirical_comparison'] = pd.read_csv(empirical_file)
    
    # Load analytical vs empirical comparison
    analytical_empirical_file = edge_results_dir / 'analytical_vs_empirical_comparison.csv'
    if analytical_empirical_file.exists():
        results['analytical_vs_empirical'] = pd.read_csv(analytical_empirical_file)
        
    return results

# Load all results
all_results = {}
successful_loads = 0
failed_loads = []

for edge_type in edge_types:
    result = load_edge_type_results(edge_type)
    if result is not None:
        all_results[edge_type] = result
        successful_loads += 1
    else:
        failed_loads.append(edge_type)

print(f"Successfully loaded results for {successful_loads} edge types")
if failed_loads:
    print(f"Failed to load results for {len(failed_loads)} edge types: {failed_loads}")

# Update edge_types to only include successful loads
edge_types = list(all_results.keys())
print(f"\nProceeding with {len(edge_types)} edge types: {edge_types}")

Successfully loaded results for 5 edge types

Proceeding with 5 edge types: ['CpD', 'CtD', 'PCiC', 'DrD', 'DpS']


## 3. Run Degree-Based Analysis Pipeline

In [5]:
# Run degree analysis for each edge type
degree_analysis_results = {}
successful_degree_analyses = []
failed_degree_analyses = []

print(f"Running degree-based analysis for {len(edge_types)} edge types...\n")

for i, edge_type in enumerate(edge_types):
    print(f"[{i+1}/{len(edge_types)}] Analyzing {edge_type}...")
    
    try:
        file_paths = run_degree_analysis_pipeline(
            edge_type=edge_type,
            data_dir=data_dir,
            results_dir=results_dir,
            output_dir=degree_analysis_dir,
            small_graph_mode=small_graph_mode
        )
        
        if file_paths:
            degree_analysis_results[edge_type] = file_paths
            successful_degree_analyses.append(edge_type)
            print(f"  ✓ Success - Generated files for {len(file_paths)} models")
        else:
            failed_degree_analyses.append(edge_type)
            print(f"  ✗ Failed - No output generated")
    
    except Exception as e:
        failed_degree_analyses.append(edge_type)
        print(f"  ✗ Error: {e}")

print(f"\n{'='*60}")
print(f"DEGREE ANALYSIS SUMMARY")
print(f"{'='*60}")
print(f"Successful: {len(successful_degree_analyses)} - {successful_degree_analyses}")
print(f"Failed: {len(failed_degree_analyses)} - {failed_degree_analyses}")
print(f"Success rate: {len(successful_degree_analyses)/len(edge_types)*100:.1f}%")

Running degree-based analysis for 5 edge types...

[1/5] Analyzing CpD...
  ✗ Error: 'Model'
[2/5] Analyzing CtD...
  ✗ Error: 'Model'
[3/5] Analyzing PCiC...
  ✗ Error: 'Model'
[4/5] Analyzing DrD...
  ✗ Error: 'Model'
[5/5] Analyzing DpS...
  ✗ Error: 'Model'

DEGREE ANALYSIS SUMMARY
Successful: 0 - []
Failed: 5 - ['CpD', 'CtD', 'PCiC', 'DrD', 'DpS']
Success rate: 0.0%


## 4. Aggregate Degree-Based Metrics

In [6]:
# Aggregate degree-based metrics across all edge types and models
degree_metrics_list = []

for edge_type in successful_degree_analyses:
    # Find all degree metrics files for this edge type
    metrics_files = list(degree_analysis_dir.glob(f'{edge_type}_*_degree_metrics.csv'))
    
    for metrics_file in metrics_files:
        # Extract model name from filename
        filename_parts = metrics_file.stem.split('_')
        model_name = ' '.join(filename_parts[1:-2])  # Remove edge_type and 'degree_metrics'
        
        try:
            metrics_df = pd.read_csv(metrics_file)
            metrics_df['edge_type'] = edge_type
            metrics_df['model'] = model_name
            degree_metrics_list.append(metrics_df)
        except Exception as e:
            print(f"Error loading {metrics_file}: {e}")

if degree_metrics_list:
    degree_metrics_df = pd.concat(degree_metrics_list, ignore_index=True)
    print(f"Aggregated degree metrics: {len(degree_metrics_df)} records")
    print(f"Edge types: {degree_metrics_df['edge_type'].nunique()}")
    print(f"Models: {degree_metrics_df['model'].unique().tolist()}")
    print(f"Degree combinations: {degree_metrics_df['degree_combination'].nunique()}")
    
    # Display sample
    print("\nSample degree metrics:")
    display_cols = ['edge_type', 'model', 'degree_combination', 'n_samples', 'mae', 'correlation']
    print(degree_metrics_df[display_cols].head(10).to_string(index=False))
else:
    degree_metrics_df = pd.DataFrame()
    print("No degree metrics data available")

No degree metrics data available


## 5. Enhanced Visualizations

In [7]:
if not degree_metrics_df.empty:
    # 1. Error by degree combination across all edge types
    fig, axes = plt.subplots(2, 2, figsize=(18, 14))
    
    metrics_to_plot = ['mae', 'rmse', 'correlation', 'bias']
    
    for idx, metric in enumerate(metrics_to_plot):
        ax = axes[idx // 2, idx % 2]
        
        # Average metric by degree combination and model
        pivot_data = degree_metrics_df.groupby(['degree_combination', 'model'])[metric].mean().unstack(fill_value=np.nan)
        
        # Create grouped bar plot
        pivot_data.plot(kind='bar', ax=ax, width=0.8)
        
        ax.set_title(f'Average {metric.upper()} by Degree Combination', fontsize=14, fontweight='bold')
        ax.set_xlabel('Degree Combination', fontsize=12)
        ax.set_ylabel(f'Mean {metric.upper()}', fontsize=12)
        ax.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left')
        ax.grid(axis='y', alpha=0.3)
        ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
    
    plt.tight_layout()
    plt.savefig(summary_dir / 'degree_based_performance_overview.png', dpi=300, bbox_inches='tight')
    plt.show()
    print("Saved degree-based performance overview")

In [8]:
if not degree_metrics_df.empty:
    # 2. Model comparison heatmap by degree combination
    fig, axes = plt.subplots(1, 2, figsize=(20, 8))
    
    # MAE heatmap
    mae_pivot = degree_metrics_df.groupby(['model', 'degree_combination'])['mae'].mean().unstack(fill_value=np.nan)
    sns.heatmap(mae_pivot, annot=True, fmt='.4f', cmap='Reds', ax=axes[0],
                cbar_kws={'label': 'Mean Absolute Error'})
    axes[0].set_title('MAE by Model and Degree Combination', fontsize=14, fontweight='bold')
    axes[0].set_xlabel('Degree Combination', fontsize=12)
    axes[0].set_ylabel('Model', fontsize=12)
    
    # Correlation heatmap
    corr_pivot = degree_metrics_df.groupby(['model', 'degree_combination'])['correlation'].mean().unstack(fill_value=np.nan)
    sns.heatmap(corr_pivot, annot=True, fmt='.4f', cmap='Blues', ax=axes[1],
                cbar_kws={'label': 'Correlation'})
    axes[1].set_title('Correlation by Model and Degree Combination', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Degree Combination', fontsize=12)
    axes[1].set_ylabel('Model', fontsize=12)
    
    plt.tight_layout()
    plt.savefig(summary_dir / 'model_degree_heatmaps.png', dpi=300, bbox_inches='tight')
    plt.show()
    print("Saved model-degree heatmaps")

In [9]:
if not degree_metrics_df.empty:
    # 3. Sample size distribution by degree combination
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # Box plot of sample sizes
    degree_metrics_df.boxplot(column='n_samples', by='degree_combination', ax=axes[0])
    axes[0].set_title('Sample Size Distribution by Degree Combination', fontsize=14, fontweight='bold')
    axes[0].set_xlabel('Degree Combination', fontsize=12)
    axes[0].set_ylabel('Sample Size', fontsize=12)
    axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=45, ha='right')
    plt.suptitle('')  # Remove automatic title
    
    # Average sample size by degree combination
    avg_samples = degree_metrics_df.groupby('degree_combination')['n_samples'].mean().sort_values(ascending=False)
    avg_samples.plot(kind='bar', ax=axes[1], color='skyblue', edgecolor='black')
    axes[1].set_title('Average Sample Size by Degree Combination', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Degree Combination', fontsize=12)
    axes[1].set_ylabel('Average Sample Size', fontsize=12)
    axes[1].grid(axis='y', alpha=0.3)
    axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=45, ha='right')
    
    # Add value labels
    for i, v in enumerate(avg_samples.values):
        axes[1].text(i, v + v*0.02, f'{int(v)}', ha='center', va='bottom', fontweight='bold')
    
    plt.tight_layout()
    plt.savefig(summary_dir / 'sample_size_by_degree.png', dpi=300, bbox_inches='tight')
    plt.show()
    print("Saved sample size analysis")

## 6. Enhanced Model Recommendations

In [10]:
# Generate degree-aware model recommendations
if not degree_metrics_df.empty:
    print(f"\n{'='*80}")
    print(f"ENHANCED MODEL RECOMMENDATIONS WITH DEGREE ANALYSIS")
    print(f"{'='*80}")
    
    # 1. Best model by degree combination (correlation)
    print(f"\n1. BEST MODEL BY DEGREE COMBINATION (Correlation):")
    best_by_degree = degree_metrics_df.groupby('degree_combination')['correlation'].agg(['mean', 'idxmax'])
    for degree_combo in best_by_degree.index:
        best_idx = best_by_degree.loc[degree_combo, 'idxmax']
        best_model = degree_metrics_df.loc[best_idx, 'model']
        best_corr = best_by_degree.loc[degree_combo, 'mean']
        print(f"   {degree_combo}: {best_model} (avg correlation: {best_corr:.4f})")
    
    # 2. Most robust model across degree combinations (lowest std)
    print(f"\n2. MOST ROBUST MODELS (Lowest correlation std across degree combinations):")
    model_stability = degree_metrics_df.groupby('model')['correlation'].agg(['mean', 'std']).sort_values('std')
    print(f"   Most stable: {model_stability.index[0]} (std: {model_stability.iloc[0]['std']:.4f}, mean: {model_stability.iloc[0]['mean']:.4f})")
    print(f"   Least stable: {model_stability.index[-1]} (std: {model_stability.iloc[-1]['std']:.4f}, mean: {model_stability.iloc[-1]['mean']:.4f})")
    
    # 3. Best models for high vs low degree nodes
    print(f"\n3. RECOMMENDATIONS BY DEGREE CATEGORY:")
    
    # Categorize degree combinations
    degree_metrics_df['is_high_degree'] = degree_metrics_df['degree_combination'].str.contains('High|Hub')
    
    high_degree_best = degree_metrics_df[degree_metrics_df['is_high_degree']].groupby('model')['correlation'].mean().idxmax()
    low_degree_best = degree_metrics_df[~degree_metrics_df['is_high_degree']].groupby('model')['correlation'].mean().idxmax()
    
    high_degree_corr = degree_metrics_df[degree_metrics_df['is_high_degree']].groupby('model')['correlation'].mean().max()
    low_degree_corr = degree_metrics_df[~degree_metrics_df['is_high_degree']].groupby('model')['correlation'].mean().max()
    
    print(f"   High-degree nodes: {high_degree_best} (avg correlation: {high_degree_corr:.4f})")
    print(f"   Low-degree nodes: {low_degree_best} (avg correlation: {low_degree_corr:.4f})")
    
    # 4. Error magnitude by degree combination
    print(f"\n4. ERROR PATTERNS BY DEGREE COMBINATION:")
    avg_mae_by_degree = degree_metrics_df.groupby('degree_combination')['mae'].mean().sort_values(ascending=False)
    print(f"   Highest error: {avg_mae_by_degree.index[0]} (MAE: {avg_mae_by_degree.iloc[0]:.4f})")
    print(f"   Lowest error: {avg_mae_by_degree.index[-1]} (MAE: {avg_mae_by_degree.iloc[-1]:.4f})")
    
    # 5. Sample size considerations
    print(f"\n5. SAMPLE SIZE CONSIDERATIONS:")
    min_samples_by_degree = degree_metrics_df.groupby('degree_combination')['n_samples'].min().sort_values()
    print(f"   Smallest sample size: {min_samples_by_degree.index[0]} ({min_samples_by_degree.iloc[0]} samples)")
    print(f"   Largest sample size: {min_samples_by_degree.index[-1]} ({min_samples_by_degree.iloc[-1]} samples)")
    
    low_sample_combinations = min_samples_by_degree[min_samples_by_degree < 100]
    if len(low_sample_combinations) > 0:
        print(f"   ⚠ Low sample size combinations (<100): {list(low_sample_combinations.index)}")
    
else:
    print("No degree metrics available for enhanced recommendations")

No degree metrics available for enhanced recommendations


## 7. Save Enhanced Results

In [11]:
# Save aggregated degree metrics
if not degree_metrics_df.empty:
    degree_metrics_file = summary_dir / 'aggregate_degree_metrics.csv'
    degree_metrics_df.to_csv(degree_metrics_file, index=False)
    print(f"Saved aggregated degree metrics to: {degree_metrics_file}")
    
    # Save summary statistics
    summary_stats = {
        'total_edge_types_analyzed': len(successful_degree_analyses),
        'degree_combinations_found': degree_metrics_df['degree_combination'].nunique(),
        'models_analyzed': degree_metrics_df['model'].nunique(),
        'total_records': len(degree_metrics_df),
        'analysis_mode': 'small_graph' if small_graph_mode else 'full_scale',
        'successful_edge_types': successful_degree_analyses,
        'failed_edge_types': failed_degree_analyses
    }
    
    summary_file = summary_dir / 'degree_analysis_summary.json'
    with open(summary_file, 'w') as f:
        json.dump(summary_stats, f, indent=2)
    print(f"Saved summary statistics to: {summary_file}")

# List all generated files
print(f"\n{'='*80}")
print(f"ANALYSIS COMPLETE")
print(f"{'='*80}")
print(f"\nGenerated files in {summary_dir}:")
for file in sorted(summary_dir.glob('*')):
    print(f"  - {file.name}")

print(f"\nDetailed degree analysis files in {degree_analysis_dir}:")
analysis_files = list(degree_analysis_dir.glob('*'))
print(f"  Total files: {len(analysis_files)}")
print(f"  CSV files: {len([f for f in analysis_files if f.suffix == '.csv'])}")
print(f"  PNG files: {len([f for f in analysis_files if f.suffix == '.png'])}")

if small_graph_mode:
    print(f"\n✓ Small graph validation complete!")
    print(f"✓ Framework ready for HPC deployment")
else:
    print(f"\n✓ Full-scale degree analysis complete!")
    print(f"✓ Results ready for publication")


ANALYSIS COMPLETE

Generated files in /projects/lgillenwater@xsede.org/repositories/Context-Aware-Path-Probability/results/model_comparison_summary_with_degree:

Detailed degree analysis files in /projects/lgillenwater@xsede.org/repositories/Context-Aware-Path-Probability/results/degree_analysis_enhanced:
  Total files: 0
  CSV files: 0
  PNG files: 0

✓ Small graph validation complete!
✓ Framework ready for HPC deployment
