# Load Cached Results Comparison - Anomaly Data

This notebook compares synthetic anomaly data generated by various methods against:
1. **Train Dataset**: `/home/smotaali/BGP_Traffic_Generation/RIPE/RIPE_INCIDENTS/all_incidents_anomalies_reinforced_v2.csv`
2. **Extended Dataset**: `/home/smotaali/BGP_Traffic_Generation/RIPE/RIPE_INCIDENTS_EXTENDED/all_incidents_anomalies_extended_reinforced.csv`

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.gridspec import GridSpec
import seaborn as sns
import os
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Tuple, Optional
from PIL import Image
import warnings

warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")
print(f"Analysis timestamp: {TIMESTAMP}")
print("Libraries loaded successfully!")

## 1. Configuration

Define paths to all synthetic anomaly dataset results directories.

**IMPORTANT**: Modify the `SYNTHETIC_ANOMALY_DATASETS` dictionary below to include your synthetic data paths.

In [None]:
# =============================================================================
# CONFIGURATION - Modify these paths to match your anomaly data
# =============================================================================

# Base path for results
RESULTS_BASE_PATH = '/home/smotaali/BGP_Traffic_Generation/results_evaluation_anomaly'

# Output directory
OUTPUT_DIR = f'{RESULTS_BASE_PATH}/comprehensive_evaluation/{TIMESTAMP}'
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Reference datasets for anomaly comparison
# 'same' = comparison against the same/train dataset (all_incidents_anomalies_reinforced_v2.csv)
# 'diff' = comparison against a different/extended dataset (all_incidents_anomalies_extended_reinforced.csv)
REFERENCE_DATASETS = {
    'same': '/home/smotaali/BGP_Traffic_Generation/RIPE/RIPE_INCIDENTS/all_incidents_anomalies_reinforced_v2.csv',
    'diff': '/home/smotaali/BGP_Traffic_Generation/RIPE/RIPE_INCIDENTS_EXTENDED/all_incidents_anomalies_extended_reinforced.csv'
}

# =============================================================================
# SYNTHETIC ANOMALY DATASETS - ADD YOUR PATHS HERE
# Format: 'MethodName': {
#     'same': 'path/to/results/comparing/against/same/train/dataset',
#     'diff': 'path/to/results/comparing/against/different/extended/dataset'
# }
# =============================================================================

SYNTHETIC_ANOMALY_DATASETS = {
    # MODIFY THESE PATHS TO YOUR ACTUAL RESULT DIRECTORIES
    # Each result directory should contain enhanced_v3_summary.csv and PNG files

    # GAN Default Values
    'GAN_LSTM_default': {
        'diff': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs/Correlation_GAN_LSTM_diff',
        'same': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs/Correlation_GAN_LSTM_same'
    },
    'GAN_TimeGAN_default': {
        'diff': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs/Correlation_GAN_TIME_diff',
        'same': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs/Correlation_GAN_TIME_same'
    },
    'GAN_DoppelGanger_default': {
        'diff': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs/Correlation_GAN_Doppelganger_diff',
        'same': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs/Correlation_GAN_Doppelganger_same'
    },

    # GAN Enhanced/Tuned Parameters
    'GAN_LSTM_enhanced': {
        'diff': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs_improved/Correlation_GAN_LSTM_diff',
        'same': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs_improved/Correlation_GAN_LSTM_same'
    },
    'GAN_TimeGAN_enhanced': {
        'diff': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs_improved/Correlation_GAN_TIME_diff',
        'same': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs_improved/Correlation_GAN_TIME_same'
    },
    'GAN_DoppelGanger_enhanced': {
        'diff': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs_improved/Correlation_GAN_Doppelganger_diff',
        'same': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/gan_outputs_improved/Correlation_GAN_Doppelganger_same'
    },

    # SMOTE Variants
    'SMOTE_normal': {
        'diff': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/normal_diff',
        'same': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/normal_same'
    },
    'SMOTE_borderline': {
        'diff': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/borderline_diff',
        'same': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/borderline_same'
    },
    'SMOTE_kmeans': {
        'diff': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/kmeans_v3_diff',
        'same': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/kmeans_v3_same'
    },
    'SMOTE_adasyn': {
        'diff': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/adasyn_diff',
        'same': '/home/smotaali/BGP_Traffic_Generation/results_zend/SMOTE_enhanced/adasyn_same'
    },

    # Hybrid (SMOTE + GAN)
    'Hybrid_SMOTE_GAN': {
        'diff': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/synthetic_hybrid/compare_hybrid_diff',
        'same': '/home/smotaali/BGP_Traffic_Generation/results_huarie/results/synthetic_hybrid/compare_hybrid_same'
    },

    # Copula
    'Copula': {
        'diff': '/home/smotaali/BGP_Traffic_Generation/results_zend/copula_diff',
        'same': '/home/smotaali/BGP_Traffic_Generation/results_zend/copula_same'
    }
}

# PNG files to collect and combine
PNG_FILES = {
    'tsne': 'enhanced_v3_tsne_overlay.png',
    'correlation_comparison': 'enhanced_v3_correlation_comparison.png',
    'correlation_scatter': 'enhanced_v3_correlation_scatter.png',
    'distribution_comparison': 'enhanced_v3_distribution_comparison.png',
    'distribution_tests': 'enhanced_v3_distribution_tests.png',
    'effect_sizes': 'enhanced_v3_effect_sizes.png',
    'pca_centroid': 'enhanced_v3_pca_centroid_analysis.png',
    'quality_dashboard': 'enhanced_v3_quality_dashboard.png',
    'top_k_worst': 'enhanced_v3_top_k_worst_features.png',
    'calibration': 'calibration_check_visualization.png'
}

# Key metrics configuration
KEY_METRICS = {
    'Mean KS Statistic': {'direction': 'lower', 'weight': 1.5},
    'Mean Wasserstein Distance': {'direction': 'lower', 'weight': 1.5},
    'Weighted Wasserstein Distance': {'direction': 'lower', 'weight': 1.5},
    'PCA Centroid Distance': {'direction': 'lower', 'weight': 1.0},
    'Correlation Similarity (Pearson)': {'direction': 'higher', 'weight': 2.0},
    'Correlation Similarity (Spearman)': {'direction': 'higher', 'weight': 2.0},
    'Distribution Score (Weighted)': {'direction': 'higher', 'weight': 1.0},
    'Distribution Score (Unweighted)': {'direction': 'higher', 'weight': 1.0},
    'Correlation Score': {'direction': 'higher', 'weight': 1.5},
    'Effect Size Score (Weighted)': {'direction': 'higher', 'weight': 1.0},
    'Effect Size Score (Unweighted)': {'direction': 'higher', 'weight': 1.0},
    'Wasserstein Score (Weighted)': {'direction': 'higher', 'weight': 1.0},
    'KS Excellent Features': {'direction': 'higher', 'weight': 1.0},
    'KS Good or Better Features': {'direction': 'higher', 'weight': 1.0},
    'Negligible Effect Features': {'direction': 'higher', 'weight': 1.0},
}

print(f"Output directory: {OUTPUT_DIR}")
print(f"Number of methods configured: {len(SYNTHETIC_ANOMALY_DATASETS)}")
print(f"Number of plot types to combine: {len(PNG_FILES)}")
print(f"\nReference datasets:")
for name, path in REFERENCE_DATASETS.items():
    exists = os.path.exists(path) if path else False
    status = 'EXISTS' if exists else 'NOT FOUND'
    print(f"  {name}: {path} [{status}]")

## 2. Helper Functions for CSV Data

In [None]:
def find_summary_csv(directory: str) -> Optional[str]:
    """Find the enhanced_v3_summary.csv file in a directory."""
    if not os.path.exists(directory):
        return None
    summary_file = os.path.join(directory, 'enhanced_v3_summary.csv')
    if os.path.exists(summary_file):
        return summary_file
    
    # Try to find any summary.csv file
    for f in os.listdir(directory):
        if 'summary' in f.lower() and f.endswith('.csv'):
            return os.path.join(directory, f)
    
    return None


def load_summary_csv(filepath: str) -> Optional[Dict]:
    """Load a summary CSV and convert to dictionary."""
    try:
        df = pd.read_csv(filepath)
        if 'Metric' in df.columns and 'Value' in df.columns:
            result = {}
            for _, row in df.iterrows():
                metric = row['Metric']
                value = row['Value']
                # Handle strings like "100.0/100"
                if isinstance(value, str) and '/' in value:
                    try:
                        value = float(value.split('/')[0])
                    except:
                        pass
                else:
                    try:
                        value = float(value)
                    except:
                        pass
                result[metric] = value
            return result
        else:
            # Try first two columns
            result = {}
            cols = df.columns.tolist()
            for _, row in df.iterrows():
                metric = row[cols[0]]
                value = row[cols[1]]
                if isinstance(value, str) and '/' in value:
                    try:
                        value = float(value.split('/')[0])
                    except:
                        pass
                else:
                    try:
                        value = float(value)
                    except:
                        pass
                result[metric] = value
            return result
    except Exception as e:
        print(f"Error loading {filepath}: {e}")
        return None


def load_all_results(datasets: Dict) -> Dict:
    """Load all cached results from the dataset directories."""
    results = {}
    
    for method_name, variants in datasets.items():
        results[method_name] = {}
        for variant_name, path in variants.items():
            summary_file = find_summary_csv(path)
            if summary_file:
                data = load_summary_csv(summary_file)
                if data:
                    results[method_name][variant_name] = data
                    print(f"✓ Loaded: {method_name} - {variant_name}")
                else:
                    print(f"✗ Failed to parse: {method_name} - {variant_name}")
            else:
                print(f"✗ Not found: {method_name} - {variant_name} ({path})")
    
    return results


def create_comparison_dataframe(results: Dict, evaluation_type: str = 'same') -> pd.DataFrame:
    """Create a comparison DataFrame for a specific evaluation type."""
    rows = []
    
    for method_name, variants in results.items():
        # Handle special cases like methods with only 'generated'
        if evaluation_type in variants:
            data = variants[evaluation_type]
        elif 'generated' in variants:
            data = variants['generated']
        else:
            continue
        
        row = {'Method': method_name}
        row.update(data)
        rows.append(row)
    
    if not rows:
        return pd.DataFrame()
    
    df = pd.DataFrame(rows)
    df.set_index('Method', inplace=True)
    return df


def calculate_overall_score(row: pd.Series, metrics: Dict = KEY_METRICS) -> float:
    """Calculate an overall weighted score for a method."""
    score = 0
    total_weight = 0
    
    for metric, config in metrics.items():
        if metric in row and pd.notna(row[metric]):
            value = row[metric]
            weight = config['weight']
            direction = config['direction']
            
            if direction == 'higher':
                if 'Score' in metric:
                    normalized = value / 100
                elif 'Correlation' in metric:
                    normalized = value
                else:
                    normalized = min(value / 100, 1.0)
            else:
                if 'KS' in metric:
                    normalized = max(0, 1 - value)
                elif 'Wasserstein' in metric or 'Distance' in metric:
                    normalized = max(0, 1 - value / 2)
                else:
                    normalized = max(0, 1 - value)
            
            score += normalized * weight
            total_weight += weight
    
    return (score / total_weight * 100) if total_weight > 0 else 0


def create_ranking_table(df: pd.DataFrame, metrics: List[str] = None) -> pd.DataFrame:
    """Create a ranking table for methods across metrics."""
    if metrics is None:
        metrics = list(KEY_METRICS.keys())
    
    available_metrics = [m for m in metrics if m in df.columns]
    rankings = pd.DataFrame(index=df.index)
    
    for metric in available_metrics:
        if metric in df.columns:
            config = KEY_METRICS.get(metric, {'direction': 'higher'})
            ascending = config['direction'] == 'lower'
            rankings[metric] = df[metric].rank(ascending=ascending, na_option='bottom')
    
    rankings['Average Rank'] = rankings.mean(axis=1)
    rankings = rankings.sort_values('Average Rank')
    
    return rankings

print("CSV helper functions defined!")

## 3. Helper Functions for PNG Image Collection

In [None]:
def find_png_file(directory: str, png_filename: str) -> Optional[str]:
    """Find a specific PNG file in a directory."""
    if not os.path.exists(directory):
        return None
    
    filepath = os.path.join(directory, png_filename)
    if os.path.exists(filepath):
        return filepath
    
    # Try to find similar named files
    base_name = png_filename.replace('.png', '').replace('enhanced_v3_', '')
    for f in os.listdir(directory):
        if f.endswith('.png') and base_name in f.lower():
            return os.path.join(directory, f)
    
    return None


def collect_all_pngs(datasets: Dict, png_files: Dict) -> Dict:
    """Collect all PNG file paths organized by plot type and method."""
    collected = {plot_type: {} for plot_type in png_files.keys()}
    
    for method_name, variants in datasets.items():
        for variant_name, path in variants.items():
            # Create a unique key for this method+variant
            if variant_name == 'generated':
                key = method_name
            else:
                key = f"{method_name}_{variant_name}"
            
            for plot_type, png_filename in png_files.items():
                png_path = find_png_file(path, png_filename)
                if png_path:
                    collected[plot_type][key] = png_path
    
    return collected


def load_image_safe(filepath: str) -> Optional[np.ndarray]:
    """Safely load an image file."""
    try:
        img = Image.open(filepath)
        return np.array(img)
    except Exception as e:
        print(f"Error loading {filepath}: {e}")
        return None


def create_combined_plot_grid(images_dict: Dict[str, str], plot_title: str, 
                               output_path: str, ncols: int = 3,
                               figsize_per_image: Tuple[int, int] = (6, 5)):
    """
    Create a grid of images with method names as titles.
    """
    if not images_dict:
        print(f"No images found for {plot_title}")
        return None
    
    n_images = len(images_dict)
    nrows = (n_images + ncols - 1) // ncols
    
    fig_width = figsize_per_image[0] * ncols
    fig_height = figsize_per_image[1] * nrows
    
    fig, axes = plt.subplots(nrows, ncols, figsize=(fig_width, fig_height))
    fig.suptitle(plot_title, fontsize=16, fontweight='bold', y=1.02)
    
    # Flatten axes for easy iteration
    if nrows == 1 and ncols == 1:
        axes = [axes]
    elif nrows == 1 or ncols == 1:
        axes = axes.flatten()
    else:
        axes = axes.flatten()
    
    # Sort methods for consistent ordering
    sorted_methods = sorted(images_dict.keys())
    
    for idx, method_name in enumerate(sorted_methods):
        img_path = images_dict[method_name]
        img = load_image_safe(img_path)
        
        if img is not None:
            axes[idx].imshow(img)
            # Create a cleaner title
            display_name = method_name.replace('_', ' ').replace(' same', ' (same)').replace(' diff', ' (diff)')
            axes[idx].set_title(display_name, fontsize=10, fontweight='bold')
        else:
            axes[idx].text(0.5, 0.5, f'Failed to load\n{method_name}', 
                          ha='center', va='center', fontsize=10)
            axes[idx].set_title(method_name, fontsize=10)
        
        axes[idx].axis('off')
    
    # Hide empty subplots
    for idx in range(n_images, len(axes)):
        axes[idx].axis('off')
    
    plt.tight_layout()
    plt.savefig(output_path, dpi=150, bbox_inches='tight', facecolor='white')
    plt.show()
    
    print(f"Saved: {output_path}")
    return fig


def create_side_by_side_comparison(images_same: Dict[str, str], images_diff: Dict[str, str],
                                    plot_title: str, output_path: str):
    """
    Create side-by-side comparison of same vs diff dataset evaluations.
    """
    # Get common methods
    methods_same = {k.replace('_same', ''): v for k, v in images_same.items()}
    methods_diff = {k.replace('_diff', ''): v for k, v in images_diff.items()}
    common_methods = sorted(set(methods_same.keys()) & set(methods_diff.keys()))
    
    if not common_methods:
        print(f"No common methods found for {plot_title}")
        return None
    
    n_methods = len(common_methods)
    fig, axes = plt.subplots(n_methods, 2, figsize=(14, 5 * n_methods))
    fig.suptitle(f"{plot_title}\nSame Dataset vs Different Dataset", 
                 fontsize=16, fontweight='bold', y=1.01)
    
    if n_methods == 1:
        axes = axes.reshape(1, 2)
    
    for idx, method in enumerate(common_methods):
        # Same dataset image
        img_same = load_image_safe(methods_same[method])
        if img_same is not None:
            axes[idx, 0].imshow(img_same)
        axes[idx, 0].set_title(f"{method.replace('_', ' ')} - Same", fontsize=10, fontweight='bold')
        axes[idx, 0].axis('off')
        
        # Diff dataset image
        img_diff = load_image_safe(methods_diff[method])
        if img_diff is not None:
            axes[idx, 1].imshow(img_diff)
        axes[idx, 1].set_title(f"{method.replace('_', ' ')} - Diff", fontsize=10, fontweight='bold')
        axes[idx, 1].axis('off')
    
    plt.tight_layout()
    plt.savefig(output_path, dpi=150, bbox_inches='tight', facecolor='white')
    plt.show()
    
    print(f"Saved: {output_path}")
    return fig

print("PNG helper functions defined!")

## 4. Load All Cached Results (CSV Data)

In [None]:
print("Loading results from enhanced_v3_summary.csv files...")
print("=" * 60)
results = load_all_results(SYNTHETIC_ANOMALY_DATASETS)
print("\nLoading complete!")

## 5. Collect All PNG Files

In [None]:
print("Collecting PNG files from all directories...")
print("=" * 60)

all_pngs = collect_all_pngs(SYNTHETIC_ANOMALY_DATASETS, PNG_FILES)

print("\nPNG files found:")
for plot_type, images in all_pngs.items():
    print(f"  {plot_type}: {len(images)} images")
    for method, path in images.items():
        print(f"    - {method}")

## 6. Create Comparison DataFrames

In [None]:
# Create comparison DataFrames
df_same = create_comparison_dataframe(results, 'same')
df_diff = create_comparison_dataframe(results, 'diff')

# For methods which only have 'generated', include it in both
for method, variants in results.items():
    if 'generated' in variants and method not in df_same.index:
        row = variants['generated']
        row_df = pd.DataFrame([row], index=[method])
        df_same = pd.concat([df_same, row_df])
        df_diff = pd.concat([df_diff, row_df])

print(f"Same dataset (train): {len(df_same)} methods")
print(f"Different dataset (extended): {len(df_diff)} methods")

## 7. Display Comparison Tables

In [None]:
# Select key metrics to display
display_metrics = [
    'Mean KS Statistic',
    'Mean Wasserstein Distance',
    'Correlation Similarity (Pearson)',
    'Correlation Similarity (Spearman)',
    'PCA Centroid Distance',
    'Distribution Score (Weighted)',
    'Correlation Score',
    'Effect Size Score (Weighted)',
    'KS Excellent Features',
    'KS Good or Better Features',
    'Negligible Effect Features'
]

available_display = [m for m in display_metrics if m in df_same.columns]

print("\n" + "=" * 80)
print("SAME DATASET COMPARISON (Train)")
print("(all_incidents_anomalies_reinforced_v2.csv)")
print("=" * 80)
if not df_same.empty and available_display:
    display(df_same[available_display].round(4).style.background_gradient(cmap='RdYlGn', axis=0))
else:
    print("No data available for Same dataset comparison.")

In [None]:
available_display_diff = [m for m in display_metrics if m in df_diff.columns]

print("\n" + "=" * 80)
print("DIFFERENT DATASET COMPARISON (Extended)")
print("(all_incidents_anomalies_extended_reinforced.csv)")
print("=" * 80)
if not df_diff.empty and available_display_diff:
    display(df_diff[available_display_diff].round(4).style.background_gradient(cmap='RdYlGn', axis=0))
else:
    print("No data available for Different dataset comparison.")

## 8. Overall Rankings

In [None]:
# Calculate overall scores
scores_same = df_same.apply(calculate_overall_score, axis=1).sort_values(ascending=False) if not df_same.empty else pd.Series()
scores_diff = df_diff.apply(calculate_overall_score, axis=1).sort_values(ascending=False) if not df_diff.empty else pd.Series()

print("\n" + "=" * 60)
print("OVERALL RANKINGS - Same Dataset (Train)")
print("(all_incidents_anomalies_reinforced_v2.csv)")
print("=" * 60)
if not scores_same.empty:
    for rank, (method, score) in enumerate(scores_same.items(), 1):
        print(f"{rank:2d}. {method:35s}: {score:.2f}")
else:
    print("No data available.")

print("\n" + "=" * 60)
print("OVERALL RANKINGS - Different Dataset (Extended)")
print("(all_incidents_anomalies_extended_reinforced.csv)")
print("=" * 60)
if not scores_diff.empty:
    for rank, (method, score) in enumerate(scores_diff.items(), 1):
        print(f"{rank:2d}. {method:35s}: {score:.2f}")
else:
    print("No data available.")

---
# Combined PNG Visualizations
---

## 9. t-SNE Plots Combined

In [None]:
# Combine all t-SNE plots
if 'tsne' in all_pngs and all_pngs['tsne']:
    print("\n" + "=" * 80)
    print("t-SNE OVERLAY PLOTS - ALL ANOMALY METHODS")
    print("=" * 80)
    
    create_combined_plot_grid(
        all_pngs['tsne'],
        't-SNE Overlay Comparison - All Anomaly Methods',
        os.path.join(OUTPUT_DIR, 'combined_tsne_all.png'),
        ncols=3,
        figsize_per_image=(7, 6)
    )
else:
    print("No t-SNE plots found.")

## 10. Correlation Comparison Plots Combined

In [None]:
# Combine all correlation comparison plots
if 'correlation_comparison' in all_pngs and all_pngs['correlation_comparison']:
    print("\n" + "=" * 80)
    print("CORRELATION COMPARISON PLOTS - ALL ANOMALY METHODS")
    print("=" * 80)
    
    create_combined_plot_grid(
        all_pngs['correlation_comparison'],
        'Correlation Comparison - All Anomaly Methods',
        os.path.join(OUTPUT_DIR, 'combined_correlation_comparison_all.png'),
        ncols=3,
        figsize_per_image=(7, 6)
    )
else:
    print("No correlation comparison plots found.")

## 11. Correlation Scatter Plots Combined

In [None]:
# Combine all correlation scatter plots
if 'correlation_scatter' in all_pngs and all_pngs['correlation_scatter']:
    print("\n" + "=" * 80)
    print("CORRELATION SCATTER PLOTS - ALL ANOMALY METHODS")
    print("=" * 80)
    
    create_combined_plot_grid(
        all_pngs['correlation_scatter'],
        'Correlation Scatter - All Anomaly Methods',
        os.path.join(OUTPUT_DIR, 'combined_correlation_scatter_all.png'),
        ncols=3,
        figsize_per_image=(7, 6)
    )
else:
    print("No correlation scatter plots found.")

## 12. Distribution Comparison Plots Combined

In [None]:
# Combine all distribution comparison plots
if 'distribution_comparison' in all_pngs and all_pngs['distribution_comparison']:
    print("\n" + "=" * 80)
    print("DISTRIBUTION COMPARISON PLOTS - ALL ANOMALY METHODS")
    print("=" * 80)
    
    create_combined_plot_grid(
        all_pngs['distribution_comparison'],
        'Distribution Comparison - All Anomaly Methods',
        os.path.join(OUTPUT_DIR, 'combined_distribution_comparison_all.png'),
        ncols=3,
        figsize_per_image=(7, 6)
    )
else:
    print("No distribution comparison plots found.")

## 13. Distribution Tests Plots Combined

In [None]:
# Combine all distribution test plots
if 'distribution_tests' in all_pngs and all_pngs['distribution_tests']:
    print("\n" + "=" * 80)
    print("DISTRIBUTION TESTS PLOTS - ALL ANOMALY METHODS")
    print("=" * 80)
    
    create_combined_plot_grid(
        all_pngs['distribution_tests'],
        'Distribution Tests - All Anomaly Methods',
        os.path.join(OUTPUT_DIR, 'combined_distribution_tests_all.png'),
        ncols=3,
        figsize_per_image=(7, 6)
    )
else:
    print("No distribution test plots found.")

## 14. Effect Sizes Plots Combined

In [None]:
# Combine all effect sizes plots
if 'effect_sizes' in all_pngs and all_pngs['effect_sizes']:
    print("\n" + "=" * 80)
    print("EFFECT SIZES PLOTS - ALL ANOMALY METHODS")
    print("=" * 80)
    
    create_combined_plot_grid(
        all_pngs['effect_sizes'],
        'Effect Sizes - All Anomaly Methods',
        os.path.join(OUTPUT_DIR, 'combined_effect_sizes_all.png'),
        ncols=3,
        figsize_per_image=(7, 6)
    )
else:
    print("No effect sizes plots found.")

## 15. PCA Centroid Analysis Plots Combined

In [None]:
# Combine all PCA centroid plots
if 'pca_centroid' in all_pngs and all_pngs['pca_centroid']:
    print("\n" + "=" * 80)
    print("PCA CENTROID ANALYSIS PLOTS - ALL ANOMALY METHODS")
    print("=" * 80)
    
    create_combined_plot_grid(
        all_pngs['pca_centroid'],
        'PCA Centroid Analysis - All Anomaly Methods',
        os.path.join(OUTPUT_DIR, 'combined_pca_centroid_all.png'),
        ncols=3,
        figsize_per_image=(7, 6)
    )
else:
    print("No PCA centroid plots found.")

## 16. Quality Dashboard Plots Combined

In [None]:
# Combine all quality dashboard plots
if 'quality_dashboard' in all_pngs and all_pngs['quality_dashboard']:
    print("\n" + "=" * 80)
    print("QUALITY DASHBOARD PLOTS - ALL ANOMALY METHODS")
    print("=" * 80)
    
    create_combined_plot_grid(
        all_pngs['quality_dashboard'],
        'Quality Dashboard - All Anomaly Methods',
        os.path.join(OUTPUT_DIR, 'combined_quality_dashboard_all.png'),
        ncols=3,
        figsize_per_image=(8, 7)
    )
else:
    print("No quality dashboard plots found.")

## 17. Top K Worst Features Plots Combined

In [None]:
# Combine all top k worst features plots
if 'top_k_worst' in all_pngs and all_pngs['top_k_worst']:
    print("\n" + "=" * 80)
    print("TOP K WORST FEATURES PLOTS - ALL ANOMALY METHODS")
    print("=" * 80)
    
    create_combined_plot_grid(
        all_pngs['top_k_worst'],
        'Top K Worst Features - All Anomaly Methods',
        os.path.join(OUTPUT_DIR, 'combined_top_k_worst_all.png'),
        ncols=3,
        figsize_per_image=(7, 6)
    )
else:
    print("No top k worst features plots found.")

## 18. Calibration Check Plots Combined

In [None]:
# Combine all calibration check plots
if 'calibration' in all_pngs and all_pngs['calibration']:
    print("\n" + "=" * 80)
    print("CALIBRATION CHECK PLOTS - ALL ANOMALY METHODS")
    print("=" * 80)
    
    create_combined_plot_grid(
        all_pngs['calibration'],
        'Calibration Check - All Anomaly Methods',
        os.path.join(OUTPUT_DIR, 'combined_calibration_all.png'),
        ncols=3,
        figsize_per_image=(7, 6)
    )
else:
    print("No calibration check plots found.")

---
# Separated by Evaluation Type (vs Train vs vs Extended)
---

## 19. t-SNE Plots - vs Train Dataset Only

In [None]:
# Filter for same only
tsne_same = {k: v for k, v in all_pngs.get('tsne', {}).items() 
             if '_same' in k or 'generated' in k or ('_same' not in k and '_diff' not in k)}

if tsne_same:
    print("\n" + "=" * 80)
    print("t-SNE PLOTS - SAME DATASET (Train)")
    print("(all_incidents_anomalies_reinforced_v2.csv)")
    print("=" * 80)
    
    create_combined_plot_grid(
        tsne_same,
        't-SNE Overlay - Same Dataset (Train)',
        os.path.join(OUTPUT_DIR, 'combined_tsne_same.png'),
        ncols=3,
        figsize_per_image=(7, 6)
    )
else:
    print("No t-SNE plots for Same dataset.")

## 20. t-SNE Plots - vs Extended Dataset Only

In [None]:
# Filter for diff only
tsne_diff = {k: v for k, v in all_pngs.get('tsne', {}).items() if '_diff' in k}

if tsne_diff:
    print("\n" + "=" * 80)
    print("t-SNE PLOTS - DIFFERENT DATASET (Extended)")
    print("(all_incidents_anomalies_extended_reinforced.csv)")
    print("=" * 80)
    
    create_combined_plot_grid(
        tsne_diff,
        't-SNE Overlay - Different Dataset (Extended)',
        os.path.join(OUTPUT_DIR, 'combined_tsne_diff.png'),
        ncols=3,
        figsize_per_image=(7, 6)
    )
else:
    print("No t-SNE plots for Different dataset.")

---
# Metric Visualizations
---

## 21. Overall Score Comparison

In [None]:
# Overall Score Comparison
fig, axes = plt.subplots(1, 2, figsize=(16, 8))

if not scores_same.empty:
    # Same dataset
    colors_same = plt.cm.RdYlGn(np.linspace(0.3, 0.9, len(scores_same)))
    axes[0].barh(range(len(scores_same)), scores_same, color=colors_same)
    axes[0].set_yticks(range(len(scores_same)))
    axes[0].set_yticklabels(scores_same.index)
    axes[0].set_xlabel('Overall Score')
    axes[0].set_xlim(0, 100)
    axes[0].set_title('Same Dataset (Train) - Overall Score\n(all_incidents_anomalies_reinforced_v2.csv)', fontsize=12)
    for i, (idx, v) in enumerate(scores_same.items()):
        axes[0].text(v, i, f' {v:.1f}', va='center', fontsize=9)
else:
    axes[0].text(0.5, 0.5, 'No data available', ha='center', va='center', fontsize=14)
    axes[0].set_title('Same Dataset - Overall Score')

if not scores_diff.empty:
    # Different dataset
    colors_diff = plt.cm.RdYlGn(np.linspace(0.3, 0.9, len(scores_diff)))
    axes[1].barh(range(len(scores_diff)), scores_diff, color=colors_diff)
    axes[1].set_yticks(range(len(scores_diff)))
    axes[1].set_yticklabels(scores_diff.index)
    axes[1].set_xlabel('Overall Score')
    axes[1].set_xlim(0, 100)
    axes[1].set_title('Different Dataset (Extended) - Overall Score\n(all_incidents_anomalies_extended_reinforced.csv)', fontsize=12)
    for i, (idx, v) in enumerate(scores_diff.items()):
        axes[1].text(v, i, f' {v:.1f}', va='center', fontsize=9)
else:
    axes[1].text(0.5, 0.5, 'No data available', ha='center', va='center', fontsize=14)
    axes[1].set_title('Different Dataset - Overall Score')

plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'overall_comparison_anomaly.png'), dpi=150)
plt.show()

## 22. Performance Heatmaps

In [None]:
# Heatmap - Same Dataset
key_metric_names = list(KEY_METRICS.keys())
available_heatmap = [m for m in key_metric_names if m in df_same.columns]

if len(available_heatmap) >= 2 and not df_same.empty:
    plot_data = df_same[available_heatmap].copy()
    
    # Normalize each column
    for col in plot_data.columns:
        config = KEY_METRICS.get(col, {'direction': 'higher'})
        values = plot_data[col]
        min_val, max_val = values.min(), values.max()
        if max_val > min_val:
            normalized = (values - min_val) / (max_val - min_val)
            if config['direction'] == 'lower':
                normalized = 1 - normalized
            plot_data[col] = normalized
    
    fig, ax = plt.subplots(figsize=(14, 8))
    sns.heatmap(plot_data, annot=True, fmt='.2f', cmap='RdYlGn',
                ax=ax, vmin=0, vmax=1, cbar_kws={'label': 'Normalized Score (higher=better)'})
    ax.set_title('Method Performance Heatmap (Same Dataset - Train)\n(Normalized: 1=best, 0=worst)', fontsize=14)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, 'heatmap_same.png'), dpi=150)
    plt.show()
else:
    print("Not enough data for heatmap (Same dataset).")

In [None]:
# Heatmap - Different Dataset
available_heatmap_diff = [m for m in key_metric_names if m in df_diff.columns]

if len(available_heatmap_diff) >= 2 and not df_diff.empty:
    plot_data = df_diff[available_heatmap_diff].copy()
    
    for col in plot_data.columns:
        config = KEY_METRICS.get(col, {'direction': 'higher'})
        values = plot_data[col]
        min_val, max_val = values.min(), values.max()
        if max_val > min_val:
            normalized = (values - min_val) / (max_val - min_val)
            if config['direction'] == 'lower':
                normalized = 1 - normalized
            plot_data[col] = normalized
    
    fig, ax = plt.subplots(figsize=(14, 8))
    sns.heatmap(plot_data, annot=True, fmt='.2f', cmap='RdYlGn',
                ax=ax, vmin=0, vmax=1, cbar_kws={'label': 'Normalized Score (higher=better)'})
    ax.set_title('Method Performance Heatmap (Different Dataset - Extended)\n(Normalized: 1=best, 0=worst)', fontsize=14)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, 'heatmap_diff.png'), dpi=150)
    plt.show()
else:
    print("Not enough data for heatmap (Different dataset).")

## 23. Key Metric Bar Charts

In [None]:
# Key Metric Bar Charts
key_metrics_to_plot = [
    ('Mean KS Statistic', 'lower'),
    ('Correlation Similarity (Pearson)', 'higher'),
    ('Correlation Score', 'higher'),
    ('Distribution Score (Weighted)', 'higher')
]

available_to_plot = [(m, d) for m, d in key_metrics_to_plot if m in df_same.columns]

if available_to_plot and not df_same.empty:
    n_plots = len(available_to_plot)
    ncols = min(2, n_plots)
    nrows = (n_plots + ncols - 1) // ncols
    
    fig, axes = plt.subplots(nrows, ncols, figsize=(8 * ncols, 6 * nrows))
    if n_plots == 1:
        axes = [axes]
    else:
        axes = axes.flatten()

    for idx, (metric, direction) in enumerate(available_to_plot):
        values = df_same[metric].dropna().sort_values(ascending=(direction == 'lower'))
        colors = plt.cm.RdYlGn(np.linspace(0.2, 0.8, len(values)))
        if direction == 'lower':
            colors = colors[::-1]
        
        axes[idx].barh(range(len(values)), values, color=colors)
        axes[idx].set_yticks(range(len(values)))
        axes[idx].set_yticklabels(values.index)
        axes[idx].set_xlabel(metric)
        axes[idx].set_title(f'{metric} (Same Dataset - Train)')
        
        for i, (method, v) in enumerate(values.items()):
            axes[idx].text(v, i, f' {v:.4f}', va='center', fontsize=8)
    
    # Hide unused axes
    for idx in range(len(available_to_plot), len(axes)):
        axes[idx].axis('off')

    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, 'key_metrics_comparison_anomaly.png'), dpi=150)
    plt.show()
else:
    print("No key metrics available to plot.")

## 24. Ranking Tables

In [None]:
# Create and display ranking tables
rankings_same = pd.DataFrame()
rankings_diff = pd.DataFrame()

if not df_same.empty:
    rankings_same = create_ranking_table(df_same)
    
    print("\n" + "=" * 80)
    print("RANKING TABLE - Same Dataset (Train)")
    print("(Lower rank = better performance)")
    print("=" * 80)
    display(rankings_same.round(2).style.background_gradient(cmap='RdYlGn_r', axis=0))
else:
    print("No data for ranking table (Same dataset).")

In [None]:
if not df_diff.empty:
    rankings_diff = create_ranking_table(df_diff)
    
    print("\n" + "=" * 80)
    print("RANKING TABLE - Different Dataset (Extended)")
    print("(Lower rank = better performance)")
    print("=" * 80)
    display(rankings_diff.round(2).style.background_gradient(cmap='RdYlGn_r', axis=0))
else:
    print("No data for ranking table (Different dataset).")

## 25. Best Methods by Category

In [None]:
categories = {
    'Distribution Similarity': ['Mean KS Statistic', 'Mean Wasserstein Distance'],
    'Correlation Preservation': ['Correlation Similarity (Pearson)', 'Correlation Similarity (Spearman)'],
    'Overall Scores': ['Distribution Score (Weighted)', 'Correlation Score', 'Effect Size Score (Weighted)']
}

print("\n" + "=" * 80)
print("BEST METHODS BY METRIC CATEGORY - ANOMALY DATA")
print("=" * 80)

for eval_type, df in [('Same Dataset (Train)', df_same), ('Different Dataset (Extended)', df_diff)]:
    if df.empty:
        continue
    print(f"\n{eval_type}:")
    print("-" * 40)
    
    for category, metrics in categories.items():
        print(f"\n  {category}:")
        for metric in metrics:
            if metric in df.columns:
                config = KEY_METRICS.get(metric, {'direction': 'higher'})
                if config['direction'] == 'higher':
                    best = df[metric].idxmax()
                    value = df[metric].max()
                else:
                    best = df[metric].idxmin()
                    value = df[metric].min()
                print(f"    {metric}: {best} ({value:.4f})")

## 26. Save Results

In [None]:
# Save all results
if not df_same.empty:
    df_same.to_csv(os.path.join(OUTPUT_DIR, 'comparison_same.csv'))
if not df_diff.empty:
    df_diff.to_csv(os.path.join(OUTPUT_DIR, 'comparison_diff.csv'))
if not rankings_same.empty:
    rankings_same.to_csv(os.path.join(OUTPUT_DIR, 'rankings_same.csv'))
if not rankings_diff.empty:
    rankings_diff.to_csv(os.path.join(OUTPUT_DIR, 'rankings_diff.csv'))

# Save overall scores
if not scores_same.empty:
    pd.DataFrame({'Method': scores_same.index, 'Overall Score': scores_same.values}).to_csv(
        os.path.join(OUTPUT_DIR, 'overall_scores_same.csv'), index=False)
if not scores_diff.empty:
    pd.DataFrame({'Method': scores_diff.index, 'Overall Score': scores_diff.values}).to_csv(
        os.path.join(OUTPUT_DIR, 'overall_scores_diff.csv'), index=False)

print(f"\nAll results saved to: {OUTPUT_DIR}")
print("\nSaved files:")
if os.path.exists(OUTPUT_DIR):
    for f in sorted(os.listdir(OUTPUT_DIR)):
        print(f"  - {f}")
else:
    print("  (Output directory not yet created - run cells to generate results)")

## 27. Summary

In [None]:
print("\n" + "=" * 80)
print("SUMMARY - ANOMALY DATA COMPARISON")
print("=" * 80)

print(f"\nReference Datasets:")
print(f"  Same (Train): {REFERENCE_DATASETS.get('same', 'Not set')}")
print(f"  Diff (Extended): {REFERENCE_DATASETS.get('diff', 'Not set')}")

print(f"\nTotal methods configured: {len(SYNTHETIC_ANOMALY_DATASETS)}")
print(f"Methods with Same dataset results: {len(df_same)}")
print(f"Methods with Different dataset results: {len(df_diff)}")

print("\nPNG plots collected:")
for plot_type, images in all_pngs.items():
    print(f"  {plot_type}: {len(images)} images")

if not scores_same.empty:
    print("\nTop 3 Methods Overall:")
    print("\n  Same Dataset (Train):")
    for rank, (method, score) in enumerate(scores_same.head(3).items(), 1):
        print(f"    {rank}. {method}: {score:.2f}")

if not scores_diff.empty:
    print("\n  Different Dataset (Extended):")
    for rank, (method, score) in enumerate(scores_diff.head(3).items(), 1):
        print(f"    {rank}. {method}: {score:.2f}")

print("\n" + "=" * 80)
print("Analysis complete!")
print("=" * 80)