# Load Cached Results Comparison

This notebook loads pre-computed evaluation results from `enhanced_v3_summary.csv` files
and creates systematic comparisons without recalculating metrics.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Tuple, Optional
import warnings

warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")
print(f"Analysis timestamp: {TIMESTAMP}")
print("Libraries loaded successfully!")

## 1. Configuration

Define paths to all synthetic dataset results directories.

In [None]:
# =============================================================================
# CONFIGURATION - Modify these paths to match your data
# =============================================================================

OUTPUT_DIR = f'./cached_results_comparison_{TIMESTAMP}'
os.makedirs(OUTPUT_DIR, exist_ok=True)

SYNTHETIC_DATASETS = {
    # SCAPY (direct generation - only one version)
    'SCAPY': {
        'generated': '/home/smotaali/BGP_Traffic_Generation/results_zend/Scapy_enhanced_1215_v3'
    },

    # GAN Default Values
    'GAN_LSTM_default': {
        'same_rrc05': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs/Correlation_GAN_LSTM_rrc05',
        'diff_rrc04': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs/Correlation_GAN_LSTM_rrc04'
    },
    'GAN_TimeGAN_default': {
        'same_rrc05': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs/Correlation_GAN_TIME_rrc05',
        'diff_rrc04': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs/Correlation_GAN_TIME_rrc04'
    },
    'GAN_DoppelGanger_default': {
        'same_rrc05': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs/Correlation_GAN_Doppelganger_rrc05',
        'diff_rrc04': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs/Correlation_GAN_Doppelganger_rrc04'
    },

    # GAN Enhanced/Tuned Parameters
    'GAN_LSTM_enhanced': {
        'same_rrc05': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs_improved/Correlation_GAN_LSTM_rrc05',
        'diff_rrc04': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs_improved/Correlation_GAN_LSTM_rrc04'
    },
    'GAN_TimeGAN_enhanced': {
        'same_rrc05': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs_improved/Correlation_GAN_TIME_rrc05',
        'diff_rrc04': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs_improved/Correlation_GAN_TIME_rrc04'
    },
    'GAN_DoppelGanger_enhanced': {
        'same_rrc05': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs_improved/Correlation_GAN_Doppelganger_rrc05',
        'diff_rrc04': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs_improved/Correlation_GAN_Doppelganger_rrc04'
    },

    # SMOTE Variants
    'SMOTE_normal': {
        'same_rrc05': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/normal_rrc05',
        'diff_rrc04': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/normal_rrc04'
    },
    'SMOTE_borderline': {
        'same_rrc05': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/borderline_rrc05',
        'diff_rrc04': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/borderline_rrc04'
    },
    'SMOTE_kmeans': {
        'same_rrc05': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/kmeans_v3_rrc05',
        'diff_rrc04': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/kmeans_v3_rrc04'
    },
    'SMOTE_adasyn': {
        'same_rrc05': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/adasyn_rrc05',
        'diff_rrc04': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/adasyn_rrc04'
    },

    # Hybrid (SMOTE + GAN)
    'Hybrid_SMOTE_GAN': {
        'same_rrc05': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/synthetic_hybrid/compare_hybrid_rrc05',
        'diff_rrc04': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/synthetic_hybrid/compare_hybrid_rrc04'
    },

    # Copula
    'Copula': {
        'same_rrc05': '/home/smotaali/BGP_Traffic_Generation/results_zend/copula_rrc05',
        'diff_rrc04': '/home/smotaali/BGP_Traffic_Generation/results_zend/copula_rrc04'
    }
}

# Key metrics configuration
KEY_METRICS = {
    'Mean KS Statistic': {'direction': 'lower', 'weight': 1.5},
    'Mean Wasserstein Distance': {'direction': 'lower', 'weight': 1.5},
    'Weighted Wasserstein Distance': {'direction': 'lower', 'weight': 1.5},
    'PCA Centroid Distance': {'direction': 'lower', 'weight': 1.0},
    'Correlation Similarity (Pearson)': {'direction': 'higher', 'weight': 2.0},
    'Correlation Similarity (Spearman)': {'direction': 'higher', 'weight': 2.0},
    'Distribution Score (Weighted)': {'direction': 'higher', 'weight': 1.0},
    'Distribution Score (Unweighted)': {'direction': 'higher', 'weight': 1.0},
    'Correlation Score': {'direction': 'higher', 'weight': 1.5},
    'Effect Size Score (Weighted)': {'direction': 'higher', 'weight': 1.0},
    'Effect Size Score (Unweighted)': {'direction': 'higher', 'weight': 1.0},
    'Wasserstein Score (Weighted)': {'direction': 'higher', 'weight': 1.0},
    'KS Excellent Features': {'direction': 'higher', 'weight': 1.0},
    'KS Good or Better Features': {'direction': 'higher', 'weight': 1.0},
    'Negligible Effect Features': {'direction': 'higher', 'weight': 1.0},
}

print(f"Output directory: {OUTPUT_DIR}")
print(f"Number of methods configured: {len(SYNTHETIC_DATASETS)}")

## 2. Helper Functions

In [None]:
def find_summary_csv(directory: str) -> Optional[str]:
    """Find the enhanced_v3_summary.csv file in a directory."""
    summary_file = os.path.join(directory, 'enhanced_v3_summary.csv')
    if os.path.exists(summary_file):
        return summary_file
    
    # Try to find any summary.csv file
    for f in os.listdir(directory) if os.path.exists(directory) else []:
        if 'summary' in f.lower() and f.endswith('.csv'):
            return os.path.join(directory, f)
    
    return None


def load_summary_csv(filepath: str) -> Optional[Dict]:
    """Load a summary CSV and convert to dictionary."""
    try:
        df = pd.read_csv(filepath)
        if 'Metric' in df.columns and 'Value' in df.columns:
            result = {}
            for _, row in df.iterrows():
                metric = row['Metric']
                value = row['Value']
                # Handle strings like "100.0/100"
                if isinstance(value, str) and '/' in value:
                    try:
                        value = float(value.split('/')[0])
                    except:
                        pass
                else:
                    try:
                        value = float(value)
                    except:
                        pass
                result[metric] = value
            return result
        else:
            # Try first two columns
            result = {}
            cols = df.columns.tolist()
            for _, row in df.iterrows():
                metric = row[cols[0]]
                value = row[cols[1]]
                if isinstance(value, str) and '/' in value:
                    try:
                        value = float(value.split('/')[0])
                    except:
                        pass
                else:
                    try:
                        value = float(value)
                    except:
                        pass
                result[metric] = value
            return result
    except Exception as e:
        print(f"Error loading {filepath}: {e}")
        return None


def load_all_results(datasets: Dict) -> Dict:
    """Load all cached results from the dataset directories."""
    results = {}
    
    for method_name, variants in datasets.items():
        results[method_name] = {}
        for variant_name, path in variants.items():
            summary_file = find_summary_csv(path)
            if summary_file:
                data = load_summary_csv(summary_file)
                if data:
                    results[method_name][variant_name] = data
                    print(f"✓ Loaded: {method_name} - {variant_name}")
                else:
                    print(f"✗ Failed to parse: {method_name} - {variant_name}")
            else:
                print(f"✗ Not found: {method_name} - {variant_name} ({path})")
    
    return results


def create_comparison_dataframe(results: Dict, evaluation_type: str = 'same_rrc05') -> pd.DataFrame:
    """Create a comparison DataFrame for a specific evaluation type."""
    rows = []
    
    for method_name, variants in results.items():
        # Handle special cases like SCAPY which only has 'generated'
        if evaluation_type in variants:
            data = variants[evaluation_type]
        elif 'generated' in variants:
            data = variants['generated']
        else:
            continue
        
        row = {'Method': method_name}
        row.update(data)
        rows.append(row)
    
    if not rows:
        return pd.DataFrame()
    
    df = pd.DataFrame(rows)
    df.set_index('Method', inplace=True)
    return df


def calculate_overall_score(row: pd.Series, metrics: Dict = KEY_METRICS) -> float:
    """Calculate an overall weighted score for a method."""
    score = 0
    total_weight = 0
    
    for metric, config in metrics.items():
        if metric in row and pd.notna(row[metric]):
            value = row[metric]
            weight = config['weight']
            direction = config['direction']
            
            if direction == 'higher':
                if 'Score' in metric:
                    normalized = value / 100
                elif 'Correlation' in metric:
                    normalized = value
                else:
                    normalized = min(value / 100, 1.0)
            else:
                if 'KS' in metric:
                    normalized = max(0, 1 - value)
                elif 'Wasserstein' in metric or 'Distance' in metric:
                    normalized = max(0, 1 - value / 2)
                else:
                    normalized = max(0, 1 - value)
            
            score += normalized * weight
            total_weight += weight
    
    return (score / total_weight * 100) if total_weight > 0 else 0


def create_ranking_table(df: pd.DataFrame, metrics: List[str] = None) -> pd.DataFrame:
    """Create a ranking table for methods across metrics."""
    if metrics is None:
        metrics = list(KEY_METRICS.keys())
    
    available_metrics = [m for m in metrics if m in df.columns]
    rankings = pd.DataFrame(index=df.index)
    
    for metric in available_metrics:
        if metric in df.columns:
            config = KEY_METRICS.get(metric, {'direction': 'higher'})
            ascending = config['direction'] == 'lower'
            rankings[metric] = df[metric].rank(ascending=ascending, na_option='bottom')
    
    rankings['Average Rank'] = rankings.mean(axis=1)
    rankings = rankings.sort_values('Average Rank')
    
    return rankings

print("Helper functions defined!")

## 3. Load All Cached Results

In [None]:
print("Loading results from enhanced_v3_summary.csv files...")
print("=" * 60)
results = load_all_results(SYNTHETIC_DATASETS)
print("\nLoading complete!")

## 4. Create Comparison DataFrames

In [None]:
# Create comparison DataFrames
df_same = create_comparison_dataframe(results, 'same_rrc05')
df_diff = create_comparison_dataframe(results, 'diff_rrc04')

# For SCAPY which only has 'generated', include it in both
for method, variants in results.items():
    if 'generated' in variants and method not in df_same.index:
        row = variants['generated']
        row_df = pd.DataFrame([row], index=[method])
        df_same = pd.concat([df_same, row_df])
        df_diff = pd.concat([df_diff, row_df])

print(f"Same dataset (rrc05): {len(df_same)} methods")
print(f"Different dataset (rrc04): {len(df_diff)} methods")

## 5. Display Comparison Tables

In [None]:
# Select key metrics to display
display_metrics = [
    'Mean KS Statistic',
    'Mean Wasserstein Distance',
    'Correlation Similarity (Pearson)',
    'Correlation Similarity (Spearman)',
    'PCA Centroid Distance',
    'Distribution Score (Weighted)',
    'Correlation Score',
    'Effect Size Score (Weighted)',
    'KS Excellent Features',
    'KS Good or Better Features',
    'Negligible Effect Features'
]

available_display = [m for m in display_metrics if m in df_same.columns]

print("\n" + "=" * 80)
print("SAME DATASET (rrc05) COMPARISON")
print("=" * 80)
if not df_same.empty and available_display:
    display(df_same[available_display].round(4).style.background_gradient(cmap='RdYlGn', axis=0))

In [None]:
available_display_diff = [m for m in display_metrics if m in df_diff.columns]

print("\n" + "=" * 80)
print("DIFFERENT DATASET (rrc04) COMPARISON")
print("=" * 80)
if not df_diff.empty and available_display_diff:
    display(df_diff[available_display_diff].round(4).style.background_gradient(cmap='RdYlGn', axis=0))

## 6. Overall Rankings

In [None]:
# Calculate overall scores
scores_same = df_same.apply(calculate_overall_score, axis=1).sort_values(ascending=False)
scores_diff = df_diff.apply(calculate_overall_score, axis=1).sort_values(ascending=False)

print("\n" + "=" * 60)
print("OVERALL RANKINGS - Same Dataset (rrc05)")
print("=" * 60)
for rank, (method, score) in enumerate(scores_same.items(), 1):
    print(f"{rank:2d}. {method:30s}: {score:.2f}")

print("\n" + "=" * 60)
print("OVERALL RANKINGS - Different Dataset (rrc04)")
print("=" * 60)
for rank, (method, score) in enumerate(scores_diff.items(), 1):
    print(f"{rank:2d}. {method:30s}: {score:.2f}")

## 7. Visualizations

In [None]:
# Overall Score Comparison
fig, axes = plt.subplots(1, 2, figsize=(16, 8))

# Same dataset
colors_same = plt.cm.RdYlGn(np.linspace(0.3, 0.9, len(scores_same)))
axes[0].barh(range(len(scores_same)), scores_same, color=colors_same)
axes[0].set_yticks(range(len(scores_same)))
axes[0].set_yticklabels(scores_same.index)
axes[0].set_xlabel('Overall Score')
axes[0].set_title('Same Dataset (rrc05) - Overall Score', fontsize=14)
for i, (idx, v) in enumerate(scores_same.items()):
    axes[0].text(v, i, f' {v:.1f}', va='center', fontsize=9)

# Different dataset
colors_diff = plt.cm.RdYlGn(np.linspace(0.3, 0.9, len(scores_diff)))
axes[1].barh(range(len(scores_diff)), scores_diff, color=colors_diff)
axes[1].set_yticks(range(len(scores_diff)))
axes[1].set_yticklabels(scores_diff.index)
axes[1].set_xlabel('Overall Score')
axes[1].set_title('Different Dataset (rrc04) - Overall Score', fontsize=14)
for i, (idx, v) in enumerate(scores_diff.items()):
    axes[1].text(v, i, f' {v:.1f}', va='center', fontsize=9)

plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'overall_comparison.png'), dpi=150)
plt.show()

In [None]:
# Heatmap - Same Dataset
key_metric_names = list(KEY_METRICS.keys())
available_heatmap = [m for m in key_metric_names if m in df_same.columns]

if len(available_heatmap) >= 2:
    plot_data = df_same[available_heatmap].copy()
    
    # Normalize each column
    for col in plot_data.columns:
        config = KEY_METRICS.get(col, {'direction': 'higher'})
        values = plot_data[col]
        min_val, max_val = values.min(), values.max()
        if max_val > min_val:
            normalized = (values - min_val) / (max_val - min_val)
            if config['direction'] == 'lower':
                normalized = 1 - normalized
            plot_data[col] = normalized
    
    fig, ax = plt.subplots(figsize=(14, 8))
    sns.heatmap(plot_data, annot=True, fmt='.2f', cmap='RdYlGn',
                ax=ax, vmin=0, vmax=1, cbar_kws={'label': 'Normalized Score (higher=better)'})
    ax.set_title('Method Performance Heatmap (Same Dataset - rrc05)\n(Normalized: 1=best, 0=worst)', fontsize=14)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, 'heatmap_same_rrc05.png'), dpi=150)
    plt.show()

In [None]:
# Heatmap - Different Dataset
available_heatmap_diff = [m for m in key_metric_names if m in df_diff.columns]

if len(available_heatmap_diff) >= 2:
    plot_data = df_diff[available_heatmap_diff].copy()
    
    for col in plot_data.columns:
        config = KEY_METRICS.get(col, {'direction': 'higher'})
        values = plot_data[col]
        min_val, max_val = values.min(), values.max()
        if max_val > min_val:
            normalized = (values - min_val) / (max_val - min_val)
            if config['direction'] == 'lower':
                normalized = 1 - normalized
            plot_data[col] = normalized
    
    fig, ax = plt.subplots(figsize=(14, 8))
    sns.heatmap(plot_data, annot=True, fmt='.2f', cmap='RdYlGn',
                ax=ax, vmin=0, vmax=1, cbar_kws={'label': 'Normalized Score (higher=better)'})
    ax.set_title('Method Performance Heatmap (Different Dataset - rrc04)\n(Normalized: 1=best, 0=worst)', fontsize=14)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, 'heatmap_diff_rrc04.png'), dpi=150)
    plt.show()

In [None]:
# Key Metric Bar Charts
key_metrics_to_plot = [
    ('Mean KS Statistic', 'lower'),
    ('Correlation Similarity (Pearson)', 'higher'),
    ('Correlation Score', 'higher'),
    ('Distribution Score (Weighted)', 'higher')
]

fig, axes = plt.subplots(2, 2, figsize=(16, 12))
axes = axes.flatten()

for idx, (metric, direction) in enumerate(key_metrics_to_plot):
    if metric in df_same.columns:
        values = df_same[metric].dropna().sort_values(ascending=(direction == 'lower'))
        colors = plt.cm.RdYlGn(np.linspace(0.2, 0.8, len(values)))
        if direction == 'lower':
            colors = colors[::-1]
        
        axes[idx].barh(range(len(values)), values, color=colors)
        axes[idx].set_yticks(range(len(values)))
        axes[idx].set_yticklabels(values.index)
        axes[idx].set_xlabel(metric)
        axes[idx].set_title(f'{metric} (Same Dataset - rrc05)')
        
        for i, (method, v) in enumerate(values.items()):
            axes[idx].text(v, i, f' {v:.4f}', va='center', fontsize=8)

plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'key_metrics_comparison.png'), dpi=150)
plt.show()

## 8. Ranking Tables

In [None]:
# Create and display ranking tables
rankings_same = create_ranking_table(df_same)
rankings_diff = create_ranking_table(df_diff)

print("\n" + "=" * 80)
print("RANKING TABLE - Same Dataset (rrc05)")
print("(Lower rank = better performance)")
print("=" * 80)
display(rankings_same.round(2).style.background_gradient(cmap='RdYlGn_r', axis=0))

In [None]:
print("\n" + "=" * 80)
print("RANKING TABLE - Different Dataset (rrc04)")
print("(Lower rank = better performance)")
print("=" * 80)
display(rankings_diff.round(2).style.background_gradient(cmap='RdYlGn_r', axis=0))

## 9. Best Methods by Category

In [None]:
categories = {
    'Distribution Similarity': ['Mean KS Statistic', 'Mean Wasserstein Distance'],
    'Correlation Preservation': ['Correlation Similarity (Pearson)', 'Correlation Similarity (Spearman)'],
    'Overall Scores': ['Distribution Score (Weighted)', 'Correlation Score', 'Effect Size Score (Weighted)']
}

print("\n" + "=" * 80)
print("BEST METHODS BY METRIC CATEGORY")
print("=" * 80)

for eval_type, df in [('Same Dataset (rrc05)', df_same), ('Different Dataset (rrc04)', df_diff)]:
    print(f"\n{eval_type}:")
    print("-" * 40)
    
    for category, metrics in categories.items():
        print(f"\n  {category}:")
        for metric in metrics:
            if metric in df.columns:
                config = KEY_METRICS.get(metric, {'direction': 'higher'})
                if config['direction'] == 'higher':
                    best = df[metric].idxmax()
                    value = df[metric].max()
                else:
                    best = df[metric].idxmin()
                    value = df[metric].min()
                print(f"    {metric}: {best} ({value:.4f})")

## 10. Save Results

In [None]:
# Save all results
df_same.to_csv(os.path.join(OUTPUT_DIR, 'comparison_same_rrc05.csv'))
df_diff.to_csv(os.path.join(OUTPUT_DIR, 'comparison_diff_rrc04.csv'))
rankings_same.to_csv(os.path.join(OUTPUT_DIR, 'rankings_same_rrc05.csv'))
rankings_diff.to_csv(os.path.join(OUTPUT_DIR, 'rankings_diff_rrc04.csv'))

# Save overall scores
pd.DataFrame({'Method': scores_same.index, 'Overall Score': scores_same.values}).to_csv(
    os.path.join(OUTPUT_DIR, 'overall_scores_same_rrc05.csv'), index=False)
pd.DataFrame({'Method': scores_diff.index, 'Overall Score': scores_diff.values}).to_csv(
    os.path.join(OUTPUT_DIR, 'overall_scores_diff_rrc04.csv'), index=False)

print(f"\nAll results saved to: {OUTPUT_DIR}")
print("\nSaved files:")
for f in os.listdir(OUTPUT_DIR):
    print(f"  - {f}")

## 11. Summary

In [None]:
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)

print(f"\nTotal methods compared: {len(results)}")
print(f"Methods with same dataset (rrc05) results: {len(df_same)}")
print(f"Methods with different dataset (rrc04) results: {len(df_diff)}")

print("\nTop 3 Methods Overall:")
print("\n  Same Dataset (rrc05):")
for rank, (method, score) in enumerate(scores_same.head(3).items(), 1):
    print(f"    {rank}. {method}: {score:.2f}")

print("\n  Different Dataset (rrc04):")
for rank, (method, score) in enumerate(scores_diff.head(3).items(), 1):
    print(f"    {rank}. {method}: {score:.2f}")

print("\n" + "=" * 80)
print("Analysis complete!")
print("=" * 80)