In [1]:
import pandas as pd
import numpy as np
import json
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import re 
import warnings

In [2]:
class CorrectExtendedHypergraphResultsAggregator:
    
    def __init__(self, results_dir="results"):
        self.results_dir = Path(results_dir)
        self.methods = ['HPRA', 'Word_Embeddings', 'HyperGNN', 'CHESHIRE']
        
        # Task mapping based on your file structure
        self.task_file_mapping = {
            'hypergraph_reconstruction': 'hypergraph_reconstruction',
            'hypergraph_prediction': 'prediction',
            'hypergraph_temporal_prediction': 'temporal_prediction'
        }
        
        self.display_tasks = ['hypergraph_reconstruction', 'prediction', 'temporal_prediction']
        
        # EXACT metric names from your debug output
        self.ml_metrics = [
            'F1 Score', 'Recall', 'Precision', 'Accuracy', 'ROC-AUC', 'Log Loss', 'Matthews Coefficient'
        ]
        
        self.example_names = []  # Will be extracted from data
        self.all_results = {}
        
    def _parse_value(self, value):
        """
        Parse value that might be a number or a string like "0.7811 ± 0.0988".
        Returns the numeric value (mean if it's mean±std format).
        """
        if isinstance(value, (int, float)):
            return float(value)
        elif isinstance(value, str):
            # Try to parse "mean ± std" format
            match = re.match(r'([0-9.-]+)\s*±\s*([0-9.-]+)', value.strip())
            if match:
                mean_value = float(match.group(1))
                return mean_value
            else:
                # Try to parse as a simple number
                try:
                    return float(value)
                except ValueError:
                    return 0.0
        else:
            return 0.0
        
    def load_all_results(self):        
        for method in self.methods:
            method_dir = self.results_dir / method
            if not method_dir.exists():
                print(f"⚠️  Warning: Directory {method_dir} not found")
                continue
                
            self.all_results[method] = {}
            json_files = list(method_dir.glob("*.json"))
            print(f"\n🔍 {method}: Found {len(json_files)} files")
            
            for json_file in json_files:
                # Extract task name from filename
                filename = json_file.stem
                if filename.endswith('_results'):
                    file_task_name = filename[:-8]
                else:
                    file_task_name = filename
                
                # Map to display task name
                display_task = self.task_file_mapping.get(file_task_name, file_task_name)
                
                try:
                    with open(json_file, 'r') as f:
                        data = json.load(f)
                    self.all_results[method][display_task] = data
                    print(f"  ✅ {display_task} (from {json_file.name})")
                except Exception as e:
                    print(f"  ❌ Error loading {json_file}: {e}")
        
        self._extract_example_names()
        
    def _extract_example_names(self):
        """Extract actual example names from the loaded data."""
        example_names_set = set()
        
        for method in self.all_results:
            for task in self.all_results[method]:
                if 'results' in self.all_results[method][task]:
                    results = self.all_results[method][task]['results']
                    for result in results:
                        if 'Hypergraph' in result:
                            example_names_set.add(result['Hypergraph'])
        
        if example_names_set:
            self.example_names = sorted(list(example_names_set))
            print(f"\n📋 Found examples: {self.example_names}")
            self._abbreviate_example_names()
        else:
            print("\n⚠️  No examples found in data")
    
    def _abbreviate_example_names(self):
        """Abbreviate example names for better table formatting."""
        abbreviation_map = {
#             'Email Communication': 'Email Comm.',
#             'Sci. Collaboration': 'Sci. Collab.',
#             'Random Hypergraph': 'Random',
#             'Karate Club': 'Karate',
#             'Les Miserables': 'Les Mis.',
#             'Social Events': 'Soc. Events',
#             'Southern Women': 'South W.',
#             'Temporal Random': 'Temporal R.'
        }
        
        original_names = self.example_names.copy()
        self.example_names = [abbreviation_map.get(name, name) for name in self.example_names]
        print(f"📝 Abbreviated to: {self.example_names}")
        
        # Store mapping for reverse lookup
        self.name_mapping = {}
        for orig, abbrev in zip(original_names, self.example_names):
            self.name_mapping[abbrev] = orig
    
    def create_extended_method_comparison(self, task_name):
        """Create comparison table showing all methods for a specific task."""
        print(f"\n{'='*100}")
        print(f"📊 EXTENDED METHOD COMPARISON FOR TASK: {task_name.upper()}")
        print(f"All {len(self.ml_metrics)} ML Performance Metrics")
        print(f"{'='*100}")
        
        comparison_data = []
        
        for method in self.methods:
            if method in self.all_results and task_name in self.all_results[method]:
                results = self.all_results[method][task_name]['results']
                
                # Calculate average scores for all ML metrics
                row = {'Method': method}
                for metric in self.ml_metrics:
                    values = []
                    for r in results:
                        if metric in r:
                            parsed_value = self._parse_value(r[metric])
                            if parsed_value != 0.0 or r[metric] == 0:
                                values.append(parsed_value)
                    
                    avg_value = np.mean(values) if values else 0
                    row[metric] = avg_value
                
                comparison_data.append(row)
        
        if comparison_data:
            df = pd.DataFrame(comparison_data)
            print(df.to_string(index=False, float_format=lambda x: f'{x:.4f}'))
            return df
        else:
            print("❌ No data available for this task")
            return None
    
    def create_example_based_comparison(self, task_name):
        """Create comparison tables for each example."""
        print(f"\n{'='*100}")
        print(f"📊 EXAMPLE-BASED COMPARISON FOR TASK: {task_name.upper()}")
        print(f"Performance of Methods on Different Examples")
        print(f"{'='*100}")
        
        example_comparisons = {}
        
        for example_abbrev in self.example_names:
            original_name = self.name_mapping.get(example_abbrev, example_abbrev)
            print(f"\n📈 {example_abbrev.upper()}:")
            print("-" * 80)
            
            comparison_data = []
            
            for method in self.methods:
                if method in self.all_results and task_name in self.all_results[method]:
                    results = self.all_results[method][task_name]['results']
                    
                    # Find results for this specific example
                    example_result = None
                    for r in results:
                        if r.get('Hypergraph') == original_name:
                            example_result = r
                            break
                    
                    if example_result:
                        row = {'Method': method}
                        for metric in self.ml_metrics:
                            raw_value = example_result.get(metric, 0)
                            parsed_value = self._parse_value(raw_value)
                            row[metric] = parsed_value
                        comparison_data.append(row)
            
            if comparison_data:
                df = pd.DataFrame(comparison_data)
                example_comparisons[example_abbrev] = df
                # FIXED: pandas compatibility
                print(df.to_string(index=False, float_format=lambda x: f'{x:.3f}'))
            else:
                print(f"❌ No data available for {example_abbrev}")
        
        return example_comparisons
    
    def create_cross_example_analysis(self, task_name):
        """Create analysis showing how each method performs across different examples."""
        print(f"\n{'='*100}")
        print(f"📊 CROSS-EXAMPLE ANALYSIS FOR TASK: {task_name.upper()}")
        print(f"How Methods Perform Across Different Examples")
        print(f"{'='*100}")
        
        for metric in self.ml_metrics:
            print(f"\n📈 {metric.upper()} ACROSS EXAMPLES:")
            print("-" * 80)
            
            # Create matrix: methods vs examples
            matrix_data = []
            
            for method in self.methods:
                if method in self.all_results and task_name in self.all_results[method]:
                    row = {'Method': method}
                    results = self.all_results[method][task_name]['results']
                    
                    for example_abbrev in self.example_names:
                        original_name = self.name_mapping.get(example_abbrev, example_abbrev)
                        
                        # Find the value for this example
                        value = 0
                        for r in results:
                            if r.get('Hypergraph') == original_name:
                                raw_value = r.get(metric, 0)
                                value = self._parse_value(raw_value)
                                break
                        row[example_abbrev] = value
                    
                    matrix_data.append(row)
            
            if matrix_data:
                df = pd.DataFrame(matrix_data)
                
                # Split into two parts if too wide
                if len(self.example_names) > 4:
                    mid_point = len(self.example_names) // 2
                    examples_1 = self.example_names[:mid_point]
                    examples_2 = self.example_names[mid_point:]
                    
                    cols_1 = ['Method'] + examples_1
                    print(f"\nPart 1:")
                    # FIXED: pandas compatibility
                    print(df[cols_1].to_string(index=False, float_format=lambda x: f'{x:.4f}'))
                    
                    cols_2 = ['Method'] + examples_2
                    print(f"\nPart 2:")
                    # FIXED: pandas compatibility
                    print(df[cols_2].to_string(index=False, float_format=lambda x: f'{x:.4f}'))
                else:
                    # FIXED: pandas compatibility
                    print(df.to_string(index=False, float_format=lambda x: f'{x:.4f}'))
                
                print()  
    
    def create_best_performers_extended(self):
        print(f"\n{'='*100}")
        print("🏆 EXTENDED BEST PERFORMERS SUMMARY")
        print(f"{'='*100}")
        
        for task in self.display_tasks:
            if not any(task in self.all_results.get(method, {}) for method in self.methods):
                print(f"\n⚠️  No data available for task: {task}")
                continue
                
            print(f"\n🎯 BEST PERFORMERS FOR {task.replace('_', ' ').upper()}:")
            print("=" * 60)
            
            # Overall best performers (averaged across examples)
            print(f"\n📊 Overall Best (Averaged Across Examples):")
            print("-" * 50)
            
            for metric in self.ml_metrics:
                best_method = None
                best_score = -1
                
                for method in self.methods:
                    if method in self.all_results and task in self.all_results[method]:
                        results = self.all_results[method][task]['results']
                        values = []
                        for r in results:
                            if metric in r:
                                parsed_value = self._parse_value(r[metric])
                                if parsed_value != 0.0 or r[metric] == 0:  # Include actual zeros
                                    values.append(parsed_value)
                        avg_value = np.mean(values) if values else 0
                        
                        if avg_value > best_score:
                            best_score = avg_value
                            best_method = method
                
                if best_method:
                    print(f"{metric:<30}: {best_method:<15} ({best_score:.4f})")
                else:
                    print(f"{metric:<30}: No data available")
    
    def export_extended_results(self, output_file="extended_ml_analysis_correct.xlsx"):
        """Export all results to Excel."""
        print(f"\n💾 Exporting results to {output_file}...")
        
        try:
            with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
                # Complete data sheet
                summary_data = []
                for method in self.methods:
                    for task in self.display_tasks:
                        if method in self.all_results and task in self.all_results[method]:
                            results = self.all_results[method][task]['results']
                            for result in results:
                                row = {'Method': method, 'Task': task}
                                row.update(result)
                                summary_data.append(row)
                
                if summary_data:
                    summary_df = pd.DataFrame(summary_data)
                    summary_df.to_excel(writer, sheet_name='Complete_Data', index=False)
                    print(f"✅ Exported {len(summary_data)} rows to Complete_Data sheet")
                
                # Method comparison sheets
                for task in self.display_tasks:
                    df = self.create_extended_method_comparison(task)
                    if df is not None:
#                         df.to_excel(writer, sheet_name=f'Methods_{task}', index=False)
                        sheet_name = f'Methods_{task}'[:31]
                        df.to_excel(writer, sheet_name=sheet_name, index=False)
                        
            print(f"✅ Export completed: {output_file}")
        except Exception as e:
            print(f"⚠️  Export failed: {e}")

def run_correct_extended_analysis(results_dir="results"):  
    # Initialize aggregator
    aggregator = CorrectExtendedHypergraphResultsAggregator(results_dir)
    
    # Load all results
    aggregator.load_all_results()
    
    # Run all analyses
    for task in aggregator.display_tasks:
        # Method comparisons
        aggregator.create_extended_method_comparison(task)
        
        # Example-based comparisons
        aggregator.create_example_based_comparison(task)
        
        # Cross-example analysis
        aggregator.create_cross_example_analysis(task)
    
    # Best performers summary
    aggregator.create_best_performers_extended()
    
    # Export results
    aggregator.export_extended_results()
    
    return aggregator

# # Test function
# def test_correct_version():
#     """Test the correct version with one simple analysis."""
#     print("🧪 TESTING CORRECT VERSION")
#     print("="*40)
    
#     aggregator = CorrectExtendedHypergraphResultsAggregator("results")
#     aggregator.load_all_results()
    
#     # Test one method comparison
#     if aggregator.display_tasks:
#         first_task = aggregator.display_tasks[0]
#         print(f"\n🔬 Testing with task: {first_task}")
#         df = aggregator.create_extended_method_comparison(first_task)
        
#         if df is not None:
#             print("\n✅ SUCCESS! Real values found:")
#             print(df.head())
#         else:
#             print("\n❌ Still no data found")
    
#     return aggregator

# test_aggregator = test_correct_version()
aggregator = run_correct_extended_analysis("results")


🔍 HPRA: Found 3 files
  ✅ temporal_prediction (from hypergraph_temporal_prediction_results.json)
  ✅ hypergraph_reconstruction (from hypergraph_reconstruction_results.json)
  ✅ prediction (from hypergraph_prediction_results.json)

🔍 Word_Embeddings: Found 3 files
  ✅ temporal_prediction (from hypergraph_temporal_prediction_results.json)
  ✅ hypergraph_reconstruction (from hypergraph_reconstruction_results.json)
  ✅ prediction (from hypergraph_prediction_results.json)

🔍 HyperGNN: Found 3 files
  ✅ temporal_prediction (from hypergraph_temporal_prediction_results.json)
  ✅ hypergraph_reconstruction (from hypergraph_reconstruction_results.json)
  ✅ prediction (from hypergraph_prediction_results.json)

🔍 CHESHIRE: Found 1 files
  ✅ hypergraph_reconstruction (from hypergraph_reconstruction_results.json)

📋 Found examples: ['Email Communication', 'Karate Club', 'Les Miserables', 'Random Hypergraph', 'Sci. Collaboration', 'Social Events', 'Southern Women', 'Temporal Random']
📝 Abbreviated to