In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

def load_and_clean_data(csv_path):
    """Load CSV data and clean invalid entries"""
    df = pd.read_csv(csv_path)

    # Convert 'invalid' strings to NaN
    for col in ['TTC_Lidar', 'TTC_Camera', 'TTC_Difference']:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    # Remove rows where both TTC values are invalid
    df = df.dropna(subset=['TTC_Lidar', 'TTC_Camera'], how='all')

    # Filter out extreme outliers (likely computation errors)
    df = df[df['TTC_Lidar'].between(0, 100)]  # Reasonable TTC range
    df = df[df['TTC_Camera'].between(0, 100)]

    return df

def calculate_performance_metrics(df):
    """Calculate performance metrics for each detector/descriptor combination"""
    results = []

    for combo in df.groupby(['Detector', 'Descriptor']):
        detector, descriptor = combo[0]
        data = combo[1]

        # Only calculate metrics where both TTC values are valid
        valid_data = data.dropna(subset=['TTC_Lidar', 'TTC_Camera'])

        if len(valid_data) == 0:
            continue

        # Calculate metrics
        mae = np.mean(np.abs(valid_data['TTC_Lidar'] - valid_data['TTC_Camera']))
        rmse = np.sqrt(np.mean((valid_data['TTC_Lidar'] - valid_data['TTC_Camera'])**2))

        # Relative error (percentage)
        rel_error = np.mean(np.abs(valid_data['TTC_Lidar'] - valid_data['TTC_Camera']) / valid_data['TTC_Lidar']) * 100

        # Standard deviation of camera TTC (stability measure)
        stability = np.std(valid_data['TTC_Camera'])

        # Success rate (percentage of valid computations)
        total_frames = len(data)
        valid_frames = len(valid_data)
        success_rate = (valid_frames / total_frames) * 100

        # Outlier rate (>50% error)
        outliers = np.sum(np.abs(valid_data['TTC_Lidar'] - valid_data['TTC_Camera']) / valid_data['TTC_Lidar'] > 0.5)
        outlier_rate = (outliers / valid_frames) * 100 if valid_frames > 0 else 100

        # Average keypoints and matches
        avg_keypoints = np.mean(data['Keypoints'])
        avg_matches = np.mean(data['Matches'])

        results.append({
            'Detector': detector,
            'Descriptor': descriptor,
            'Combination': f"{detector}+{descriptor}",
            'MAE': mae,
            'RMSE': rmse,
            'Relative_Error_%': rel_error,
            'Stability_StdDev': stability,
            'Success_Rate_%': success_rate,
            'Outlier_Rate_%': outlier_rate,
            'Avg_Keypoints': avg_keypoints,
            'Avg_Matches': avg_matches,
            'Valid_Frames': valid_frames,
            'Total_Frames': total_frames
        })

    return pd.DataFrame(results)

def create_summary_table(metrics_df):
    """Create a ranked summary table"""
    # Create ranking score (lower is better)
    # Weighted combination of MAE, stability, and outlier rate
    metrics_df['Ranking_Score'] = (
        metrics_df['MAE'] * 0.4 +
        metrics_df['Stability_StdDev'] * 0.3 +
        metrics_df['Outlier_Rate_%'] * 0.01 +
        (100 - metrics_df['Success_Rate_%']) * 0.02
    )

    # Sort by ranking score
    summary = metrics_df.sort_values('Ranking_Score').copy()

    # Add rank column
    summary['Rank'] = range(1, len(summary) + 1)

    # Select and reorder columns for summary
    summary_cols = [
        'Rank', 'Combination', 'MAE', 'RMSE', 'Relative_Error_%',
        'Stability_StdDev', 'Success_Rate_%', 'Outlier_Rate_%',
        'Avg_Keypoints', 'Avg_Matches'
    ]

    return summary[summary_cols].round(3)

def create_comparison_table(df):
    """Create frame-by-frame comparison table"""
    # Pivot to show TTC values for each combination across frames
    lidar_pivot = df.pivot_table(
        values='TTC_Lidar',
        index='Frame',
        columns=['Detector', 'Descriptor'],
        aggfunc='first'
    )

    camera_pivot = df.pivot_table(
        values='TTC_Camera',
        index='Frame',
        columns=['Detector', 'Descriptor'],
        aggfunc='first'
    )

    # Create comparison showing both Lidar and Camera TTC
    comparison_data = []

    for frame in sorted(df['Frame'].unique()):
        frame_data = {'Frame': frame}

        # Add Lidar TTC (should be same for all combinations)
        lidar_ttc = df[df['Frame'] == frame]['TTC_Lidar'].iloc[0] if len(df[df['Frame'] == frame]) > 0 else np.nan
        frame_data['TTC_Lidar'] = lidar_ttc

        # Add Camera TTC for each combination
        for combo in df.groupby(['Detector', 'Descriptor']):
            detector, descriptor = combo[0]
            combo_data = combo[1]
            combo_frame = combo_data[combo_data['Frame'] == frame]

            if len(combo_frame) > 0:
                camera_ttc = combo_frame['TTC_Camera'].iloc[0]
                frame_data[f"{detector}+{descriptor}"] = camera_ttc
            else:
                frame_data[f"{detector}+{descriptor}"] = np.nan

        comparison_data.append(frame_data)

    return pd.DataFrame(comparison_data)

def create_visualizations(df, metrics_df, output_dir="ttc_analysis_plots"):
    """Create visualization plots"""
    Path(output_dir).mkdir(exist_ok=True)
    plt.style.use('default')

    # 1. Performance ranking bar chart
    fig, ax = plt.subplots(figsize=(12, 8))
    top_10 = metrics_df.sort_values('Ranking_Score').head(10)
    bars = ax.barh(range(len(top_10)), top_10['MAE'])
    ax.set_yticks(range(len(top_10)))
    ax.set_yticklabels(top_10['Combination'])
    ax.set_xlabel('Mean Absolute Error (seconds)')
    ax.set_title('Top 10 Detector/Descriptor Combinations by MAE')
    ax.invert_yaxis()

    # Add value labels on bars
    for i, bar in enumerate(bars):
        width = bar.get_width()
        ax.text(width + 0.01, bar.get_y() + bar.get_height()/2,
                f'{width:.3f}', ha='left', va='center')

    plt.tight_layout()
    plt.savefig(f"{output_dir}/ranking_mae.png", dpi=300, bbox_inches='tight')
    plt.close()

    # 2. TTC comparison scatter plot for best performer
    best_combo = metrics_df.loc[metrics_df['MAE'].idxmin()]
    best_data = df[(df['Detector'] == best_combo['Detector']) &
                   (df['Descriptor'] == best_combo['Descriptor'])]

    fig, ax = plt.subplots(figsize=(10, 8))
    scatter = ax.scatter(best_data['TTC_Lidar'], best_data['TTC_Camera'],
                        alpha=0.6, s=50)

    # Add diagonal line (perfect correlation)
    min_val = min(best_data['TTC_Lidar'].min(), best_data['TTC_Camera'].min())
    max_val = max(best_data['TTC_Lidar'].max(), best_data['TTC_Camera'].max())
    ax.plot([min_val, max_val], [min_val, max_val], 'r--', alpha=0.8, label='Perfect correlation')

    ax.set_xlabel('TTC Lidar (seconds)')
    ax.set_ylabel('TTC Camera (seconds)')
    ax.set_title(f'TTC Correlation - Best Performer: {best_combo["Combination"]}')
    ax.legend()
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(f"{output_dir}/best_correlation.png", dpi=300, bbox_inches='tight')
    plt.close()

    # 3. Success rate vs MAE scatter
    fig, ax = plt.subplots(figsize=(10, 8))
    scatter = ax.scatter(metrics_df['Success_Rate_%'], metrics_df['MAE'],
                        s=100, alpha=0.6, c=metrics_df['Avg_Keypoints'],
                        cmap='viridis')

    # Add labels for interesting points
    for i, row in metrics_df.iterrows():
        if row['MAE'] < 2 or row['Success_Rate_%'] > 95:  # Good performers
            ax.annotate(row['Combination'], (row['Success_Rate_%'], row['MAE']),
                       xytext=(5, 5), textcoords='offset points', fontsize=8)

    ax.set_xlabel('Success Rate (%)')
    ax.set_ylabel('Mean Absolute Error (seconds)')
    ax.set_title('Success Rate vs Accuracy Trade-off')
    plt.colorbar(scatter, label='Average Keypoints')
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(f"{output_dir}/success_vs_accuracy.png", dpi=300, bbox_inches='tight')
    plt.close()

    # 4. Frame-by-frame comparison for top 5 methods
    top_5 = metrics_df.sort_values('MAE').head(5)

    fig, ax = plt.subplots(figsize=(14, 8))

    # Plot Lidar TTC as reference
    frames = sorted(df['Frame'].unique())
    lidar_ttc = [df[df['Frame'] == f]['TTC_Lidar'].iloc[0] if len(df[df['Frame'] == f]) > 0 else np.nan for f in frames]
    ax.plot(frames, lidar_ttc, 'k-', linewidth=2, label='Lidar (Ground Truth)', marker='o')

    # Plot top 5 camera methods
    colors = plt.cm.tab10(np.linspace(0, 1, 5))
    for i, (_, row) in enumerate(top_5.iterrows()):
        combo_data = df[(df['Detector'] == row['Detector']) &
                       (df['Descriptor'] == row['Descriptor'])]
        ax.plot(combo_data['Frame'], combo_data['TTC_Camera'],
                color=colors[i], label=f"{row['Combination']} (MAE: {row['MAE']:.3f})",
                marker='s', alpha=0.7)

    ax.set_xlabel('Frame Number')
    ax.set_ylabel('Time to Collision (seconds)')
    ax.set_title('TTC Estimates - Top 5 Performing Methods')
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(f"{output_dir}/frame_comparison.png", dpi=300, bbox_inches='tight')
    plt.close()

    print(f"Visualizations saved to {output_dir}/")

def main():
    # Configuration
    csv_file = "ttc_analysis_results.csv"  # Your CSV file from C++ analysis

    print("Loading and cleaning data...")
    df = load_and_clean_data(csv_file)
    print(f"Loaded {len(df)} valid data points")

    print("Calculating performance metrics...")
    metrics_df = calculate_performance_metrics(df)

    print("Creating summary table...")
    summary_table = create_summary_table(metrics_df)

    print("Creating comparison table...")
    comparison_table = create_comparison_table(df)

    # Save tables to CSV
    summary_table.to_csv("ttc_summary_table.csv", index=False)
    comparison_table.to_csv("ttc_comparison_table.csv", index=False)
    metrics_df.to_csv("ttc_detailed_metrics.csv", index=False)

    # Display results
    print("\n" + "="*80)
    print("TTC ANALYSIS SUMMARY - TOP 10 PERFORMERS")
    print("="*80)
    print(summary_table.head(10).to_string(index=False))

    print("\n" + "="*50)
    print("WORST 5 PERFORMERS")
    print("="*50)
    print(summary_table.tail(5).to_string(index=False))

    # Show some interesting statistics
    print(f"\nBest performer: {summary_table.iloc[0]['Combination']} (MAE: {summary_table.iloc[0]['MAE']:.3f}s)")
    print(f"Worst performer: {summary_table.iloc[-1]['Combination']} (MAE: {summary_table.iloc[-1]['MAE']:.3f}s)")
    print(f"Most stable: {metrics_df.loc[metrics_df['Stability_StdDev'].idxmin(), 'Combination']} (StdDev: {metrics_df['Stability_StdDev'].min():.3f}s)")
    print(f"Highest success rate: {metrics_df.loc[metrics_df['Success_Rate_%'].idxmax(), 'Combination']} ({metrics_df['Success_Rate_%'].max():.1f}%)")

    print("Creating visualizations...")
    create_visualizations(df, metrics_df)

    print(f"\nAnalysis complete! Files saved:")
    print(f"- ttc_summary_table.csv (ranked performance summary)")
    print(f"- ttc_comparison_table.csv (frame-by-frame comparison)")
    print(f"- ttc_detailed_metrics.csv (detailed metrics)")
    print(f"- ttc_analysis_plots/ (visualization plots)")

if __name__ == "__main__":
    main()

Loading and cleaning data...
Loaded 469 valid data points
Calculating performance metrics...
Creating summary table...
Creating comparison table...

TTC ANALYSIS SUMMARY - TOP 10 PERFORMERS
 Rank     Combination   MAE  RMSE  Relative_Error_%  Stability_StdDev  Success_Rate_%  Outlier_Rate_%  Avg_Keypoints  Avg_Matches
    1      FAST+BRISK 1.609 1.941            14.922             0.870           100.0           0.000       5282.333     2241.000
    2      FAST+FREAK 1.609 1.941            14.922             0.870           100.0           0.000       5282.333     2241.000
    3      FAST+BRIEF 1.609 1.941            14.922             0.870           100.0           0.000       5282.333     2241.000
    4        FAST+ORB 1.607 1.978            14.723             1.095           100.0           0.000       5282.333     2853.056
    5      SIFT+FREAK 0.870 1.130             7.353             2.696           100.0           0.000       1983.056      774.667
    6      SIFT+BRIEF 0.870 1.

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')