# SuperGlue Feature Matching Demo

This notebook demonstrates SuperGlue feature matching using video input from the videos directory. It shows:
- Real-time feature detection with SuperPoint
- Feature matching with SuperGlue (including Sinkhorn algorithm)
- Visualization of keypoints and matches
- Interactive parameter tuning
- Performance analysis and statistics

Based on the SuperGlue implementation from Magic Leap's pretrained network.

In [None]:
# Setup and imports
import os
import sys
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from pathlib import Path
import torch
import time
from collections import OrderedDict

# Add SuperGlue path
superglue_path = '../modules/hloc/third_party/SuperGluePretrainedNetwork'
sys.path.append(superglue_path)

from models.matching import Matching
from models.utils import AverageTimer, VideoStreamer, make_matching_plot_fast, frame2tensor

# Disable gradients for inference
torch.set_grad_enabled(False)

print("SuperGlue Demo Environment Setup Complete")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name()}")

## Configuration

Configure SuperGlue parameters including the Sinkhorn algorithm iterations.

In [None]:
# Configuration
INPUT_DIR = Path("../videos")
OUTPUT_DIR = Path("../outputs/superglue_demo")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# SuperGlue configuration
config = {
    'superpoint': {
        'nms_radius': 4,
        'keypoint_threshold': 0.005,
        'max_keypoints': 1024
    },
    'superglue': {
        'weights': 'outdoor',  # 'indoor' or 'outdoor'
        'sinkhorn_iterations': 20,  # Number of Sinkhorn iterations (key parameter!)
        'match_threshold': 0.2,
    }
}

# Processing parameters
RESIZE = [640, 480]  # [width, height]
SKIP_FRAMES = 1      # Process every N-th frame
MAX_FRAMES = 200     # Maximum frames to process (None for all)
SAVE_MATCHES = True  # Save matching visualizations

# Device setup
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Running inference on device: {device}")
print(f"Configuration: {config}")
print(f"Sinkhorn iterations: {config['superglue']['sinkhorn_iterations']}")

## Load Video Input

Load and preview video files from the videos directory.

In [None]:
# Find available videos
video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm'}
available_videos = []

if INPUT_DIR.exists():
    for file_path in INPUT_DIR.iterdir():
        if file_path.is_file() and file_path.suffix.lower() in video_extensions:
            available_videos.append(file_path)

print(f"Found {len(available_videos)} video files:")
for i, video in enumerate(available_videos):
    print(f"  {i}: {video.name}")

if not available_videos:
    print("No videos found! Please add video files to the videos/ directory.")
    print(f"Supported formats: {', '.join(video_extensions)}")
else:
    # Select first video by default
    selected_video = available_videos[0]
    print(f"\nSelected video: {selected_video.name}")
    
    # Get video info
    cap = cv2.VideoCapture(str(selected_video))
    if cap.isOpened():
        fps = cap.get(cv2.CAP_PROP_FPS)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        duration = frame_count / fps if fps > 0 else 0
        
        print(f"Video info:")
        print(f"  Resolution: {width}x{height}")
        print(f"  FPS: {fps:.2f}")
        print(f"  Frames: {frame_count}")
        print(f"  Duration: {duration:.2f} seconds")
        
        # Show first frame
        ret, first_frame = cap.read()
        if ret:
            plt.figure(figsize=(10, 6))
            plt.imshow(cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB))
            plt.title(f"First frame from {selected_video.name}")
            plt.axis('off')
            plt.show()
        
        cap.release()
    else:
        print(f"Error: Could not open video {selected_video}")

## Initialize SuperGlue Matcher

Initialize the SuperGlue matching system with SuperPoint feature detector.

In [None]:
if available_videos:
    # Initialize SuperGlue matching pipeline
    print("Initializing SuperGlue matcher...")
    matching = Matching(config).eval().to(device)
    keys = ['keypoints', 'scores', 'descriptors']
    
    print(f"SuperGlue model loaded successfully!")
    print(f"Model configuration:")
    print(f"  SuperPoint config: {matching.superpoint.config}")
    print(f"  SuperGlue config: {matching.superglue.config}")
    print(f"  Sinkhorn iterations: {matching.superglue.config['sinkhorn_iterations']}")
    
    # Initialize video streamer
    vs = VideoStreamer(str(selected_video), RESIZE, SKIP_FRAMES, max_length=MAX_FRAMES)
    
    # Process first frame to initialize
    frame, ret = vs.next_frame()
    if ret:
        print(f"Video streamer initialized. Frame shape: {frame.shape}")
        
        # Convert to tensor and extract features
        frame_tensor = frame2tensor(frame, device)
        last_data = matching.superpoint({'image': frame_tensor})
        last_data = {k+'0': last_data[k] for k in keys}
        last_data['image0'] = frame_tensor
        last_frame = frame
        last_frame_id = 0
        
        print(f"First frame processed:")
        print(f"  Keypoints detected: {len(last_data['keypoints0'][0])}")
        print(f"  Descriptor shape: {last_data['descriptors0'].shape}")
        
        # Visualize first frame keypoints
        kpts = last_data['keypoints0'][0].cpu().numpy()
        plt.figure(figsize=(12, 8))
        plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        plt.scatter(kpts[:, 0], kpts[:, 1], c='red', s=2, alpha=0.7)
        plt.title(f"SuperPoint keypoints on first frame ({len(kpts)} points)")
        plt.axis('off')
        plt.show()
    else:
        print("Error: Could not read first frame from video")
else:
    print("Skipping SuperGlue initialization - no videos available")

## SuperGlue Feature Matching Demo

Process video frames and demonstrate SuperGlue feature matching with Sinkhorn algorithm.

In [None]:
if available_videos and 'last_data' in locals():
    print("Starting SuperGlue feature matching demo...")
    
    # Initialize statistics tracking
    timer = AverageTimer()
    match_stats = {
        'frame_pairs': [],
        'keypoints_0': [],
        'keypoints_1': [],
        'matches_count': [],
        'match_scores': [],
        'processing_times': [],
        'sinkhorn_iterations': []
    }
    
    processed_frames = 0
    visualization_frames = []
    
    print(f"Processing up to {MAX_FRAMES if MAX_FRAMES else 'all'} frames...")
    
    while processed_frames < (MAX_FRAMES or float('inf')):
        frame, ret = vs.next_frame()
        if not ret:
            print("End of video reached")
            break
            
        timer.update('data')
        current_frame_id = vs.i - 1
        
        # Convert frame to tensor and run SuperGlue
        frame_tensor = frame2tensor(frame, device)
        
        start_time = time.time()
        pred = matching({**last_data, 'image1': frame_tensor})
        processing_time = time.time() - start_time
        
        # Extract results
        kpts0 = last_data['keypoints0'][0].cpu().numpy()
        kpts1 = pred['keypoints1'][0].cpu().numpy()
        matches = pred['matches0'][0].cpu().numpy()
        confidence = pred['matching_scores0'][0].cpu().numpy()
        
        timer.update('forward')
        
        # Calculate matches
        valid = matches > -1
        mkpts0 = kpts0[valid]
        mkpts1 = kpts1[matches[valid]]
        match_confidence = confidence[valid]
        
        # Store statistics
        match_stats['frame_pairs'].append((last_frame_id, current_frame_id))
        match_stats['keypoints_0'].append(len(kpts0))
        match_stats['keypoints_1'].append(len(kpts1))
        match_stats['matches_count'].append(len(mkpts0))
        match_stats['match_scores'].append(match_confidence.mean() if len(match_confidence) > 0 else 0)
        match_stats['processing_times'].append(processing_time)
        match_stats['sinkhorn_iterations'].append(config['superglue']['sinkhorn_iterations'])
        
        # Create visualization
        color = cm.jet(match_confidence) if len(match_confidence) > 0 else []
        text = [
            'SuperGlue Demo',
            f'Keypoints: {len(kpts0)}:{len(kpts1)}',
            f'Matches: {len(mkpts0)}'
        ]
        small_text = [
            f'Sinkhorn iterations: {config["superglue"]["sinkhorn_iterations"]}',
            f'Match threshold: {config["superglue"]["match_threshold"]}',
            f'Frame pair: {last_frame_id:03d}:{current_frame_id:03d}',
            f'Processing: {processing_time*1000:.1f}ms'
        ]
        
        out = make_matching_plot_fast(
            last_frame, frame, kpts0, kpts1, mkpts0, mkpts1, color, text,
            path=None, show_keypoints=True, small_text=small_text
        )
        
        # Save every 10th frame for visualization
        if processed_frames % 10 == 0:
            visualization_frames.append((out, current_frame_id, len(mkpts0)))
        
        # Save output if requested
        if SAVE_MATCHES and processed_frames < 20:  # Save first 20 for demo
            output_file = OUTPUT_DIR / f"match_{last_frame_id:03d}_{current_frame_id:03d}.png"
            cv2.imwrite(str(output_file), out)
        
        timer.update('viz')
        
        # Progress update
        if processed_frames % 20 == 0:
            print(f"Processed {processed_frames} frames - Matches: {len(mkpts0)} - Time: {processing_time*1000:.1f}ms")
        
        processed_frames += 1
    
    vs.cleanup()
    print(f"\nCompleted processing {processed_frames} frames!")
    
else:
    print("Skipping demo - no video or initialization failed")

## Visualization Results

Display sample matching results and visualizations.

In [None]:
if 'visualization_frames' in locals() and len(visualization_frames) > 0:
    print(f"Displaying {len(visualization_frames)} sample matching results:")
    
    # Show first few matching results
    num_display = min(4, len(visualization_frames))
    
    fig, axes = plt.subplots(num_display, 1, figsize=(15, 5*num_display))
    if num_display == 1:
        axes = [axes]
    
    for i in range(num_display):
        out, frame_id, match_count = visualization_frames[i]
        
        # Convert BGR to RGB for matplotlib
        out_rgb = cv2.cvtColor(out, cv2.COLOR_BGR2RGB)
        
        axes[i].imshow(out_rgb)
        axes[i].set_title(f"Frame {frame_id}: {match_count} matches")
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()
    
else:
    print("No visualization frames available")

## Performance Analysis

Analyze SuperGlue performance and the impact of Sinkhorn iterations.

In [None]:
if 'match_stats' in locals() and len(match_stats['matches_count']) > 0:
    print("SuperGlue Performance Analysis")
    print("=" * 40)
    
    # Calculate statistics
    total_frames = len(match_stats['matches_count'])
    avg_keypoints_0 = np.mean(match_stats['keypoints_0'])
    avg_keypoints_1 = np.mean(match_stats['keypoints_1'])
    avg_matches = np.mean(match_stats['matches_count'])
    avg_match_score = np.mean([s for s in match_stats['match_scores'] if s > 0])
    avg_processing_time = np.mean(match_stats['processing_times'])
    fps = 1.0 / avg_processing_time if avg_processing_time > 0 else 0
    
    print(f"Processed frames: {total_frames}")
    print(f"Average keypoints per frame: {avg_keypoints_0:.1f} / {avg_keypoints_1:.1f}")
    print(f"Average matches per frame: {avg_matches:.1f}")
    print(f"Average match score: {avg_match_score:.3f}")
    print(f"Average processing time: {avg_processing_time*1000:.1f}ms")
    print(f"Processing FPS: {fps:.1f}")
    print(f"Sinkhorn iterations used: {config['superglue']['sinkhorn_iterations']}")
    
    # Matching efficiency
    match_ratios = [m/(k0+k1) if (k0+k1) > 0 else 0 
                   for m, k0, k1 in zip(match_stats['matches_count'], 
                                       match_stats['keypoints_0'], 
                                       match_stats['keypoints_1'])]
    avg_match_ratio = np.mean(match_ratios)
    print(f"Average match ratio: {avg_match_ratio:.3f} (matches / total keypoints)")
    
    # Create performance plots
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Matches over time
    axes[0, 0].plot(match_stats['matches_count'], 'b-', alpha=0.7)
    axes[0, 0].axhline(y=avg_matches, color='r', linestyle='--', label=f'Avg: {avg_matches:.1f}')
    axes[0, 0].set_title('Matches per Frame')
    axes[0, 0].set_xlabel('Frame')
    axes[0, 0].set_ylabel('Number of Matches')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # Processing time over time
    processing_times_ms = [t*1000 for t in match_stats['processing_times']]
    axes[0, 1].plot(processing_times_ms, 'g-', alpha=0.7)
    axes[0, 1].axhline(y=avg_processing_time*1000, color='r', linestyle='--', 
                      label=f'Avg: {avg_processing_time*1000:.1f}ms')
    axes[0, 1].set_title('Processing Time per Frame')
    axes[0, 1].set_xlabel('Frame')
    axes[0, 1].set_ylabel('Time (ms)')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)
    
    # Keypoints distribution
    axes[1, 0].hist(match_stats['keypoints_0'], bins=20, alpha=0.7, label='Frame 0', color='blue')
    axes[1, 0].hist(match_stats['keypoints_1'], bins=20, alpha=0.7, label='Frame 1', color='orange')
    axes[1, 0].set_title('Keypoints Distribution')
    axes[1, 0].set_xlabel('Number of Keypoints')
    axes[1, 0].set_ylabel('Frequency')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    
    # Match scores distribution
    valid_scores = [s for s in match_stats['match_scores'] if s > 0]
    if valid_scores:
        axes[1, 1].hist(valid_scores, bins=20, alpha=0.7, color='purple')
        axes[1, 1].set_title('Match Confidence Distribution')
        axes[1, 1].set_xlabel('Average Match Score')
        axes[1, 1].set_ylabel('Frequency')
        axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
else:
    print("No performance data available")

## Sinkhorn Algorithm Analysis

Analyze the impact of different Sinkhorn iteration counts on matching performance.

In [None]:
if available_videos and 'last_data' in locals():
    print("Analyzing Sinkhorn Algorithm Impact")
    print("=" * 40)
    
    # Test different Sinkhorn iteration counts
    sinkhorn_test_values = [5, 10, 20, 50, 100]
    sinkhorn_results = []
    
    # Reset video to beginning for consistent testing
    vs_test = VideoStreamer(str(selected_video), RESIZE, SKIP_FRAMES, max_length=10)
    test_frame1, _ = vs_test.next_frame()
    test_frame2, _ = vs_test.next_frame()
    
    if test_frame1 is not None and test_frame2 is not None:
        test_tensor1 = frame2tensor(test_frame1, device)
        test_tensor2 = frame2tensor(test_frame2, device)
        
        print("Testing different Sinkhorn iteration counts...")
        
        for sinkhorn_iters in sinkhorn_test_values:
            print(f"Testing {sinkhorn_iters} iterations...")
            
            # Create new config with different Sinkhorn iterations
            test_config = config.copy()
            test_config['superglue']['sinkhorn_iterations'] = sinkhorn_iters
            
            # Initialize new matcher
            test_matching = Matching(test_config).eval().to(device)
            
            # Extract features for first frame
            test_data1 = test_matching.superpoint({'image': test_tensor1})
            test_data1 = {k+'0': test_data1[k] for k in keys}
            test_data1['image0'] = test_tensor1
            
            # Measure performance
            start_time = time.time()
            pred = test_matching({**test_data1, 'image1': test_tensor2})
            processing_time = time.time() - start_time
            
            # Calculate results
            kpts0 = test_data1['keypoints0'][0].cpu().numpy()
            kpts1 = pred['keypoints1'][0].cpu().numpy()
            matches = pred['matches0'][0].cpu().numpy()
            confidence = pred['matching_scores0'][0].cpu().numpy()
            
            valid = matches > -1
            num_matches = np.sum(valid)
            avg_confidence = confidence[valid].mean() if num_matches > 0 else 0
            
            sinkhorn_results.append({
                'iterations': sinkhorn_iters,
                'matches': num_matches,
                'confidence': avg_confidence,
                'time_ms': processing_time * 1000,
                'keypoints_0': len(kpts0),
                'keypoints_1': len(kpts1)
            })
        
        vs_test.cleanup()
        
        # Display results
        print("\nSinkhorn Iteration Analysis Results:")
        print("Iterations | Matches | Avg Confidence | Time (ms)")
        print("-" * 50)
        for result in sinkhorn_results:
            print(f"{result['iterations']:9d} | {result['matches']:7d} | {result['confidence']:12.3f} | {result['time_ms']:8.1f}")
        
        # Plot comparison
        fig, axes = plt.subplots(1, 3, figsize=(18, 5))
        
        iterations = [r['iterations'] for r in sinkhorn_results]
        matches = [r['matches'] for r in sinkhorn_results]
        confidences = [r['confidence'] for r in sinkhorn_results]
        times = [r['time_ms'] for r in sinkhorn_results]
        
        # Matches vs iterations
        axes[0].plot(iterations, matches, 'bo-', linewidth=2, markersize=8)
        axes[0].set_xlabel('Sinkhorn Iterations')
        axes[0].set_ylabel('Number of Matches')
        axes[0].set_title('Matches vs Sinkhorn Iterations')
        axes[0].grid(True, alpha=0.3)
        
        # Confidence vs iterations
        axes[1].plot(iterations, confidences, 'ro-', linewidth=2, markersize=8)
        axes[1].set_xlabel('Sinkhorn Iterations')
        axes[1].set_ylabel('Average Match Confidence')
        axes[1].set_title('Confidence vs Sinkhorn Iterations')
        axes[1].grid(True, alpha=0.3)
        
        # Processing time vs iterations
        axes[2].plot(iterations, times, 'go-', linewidth=2, markersize=8)
        axes[2].set_xlabel('Sinkhorn Iterations')
        axes[2].set_ylabel('Processing Time (ms)')
        axes[2].set_title('Processing Time vs Sinkhorn Iterations')
        axes[2].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        # Recommendations
        print("\nRecommendations:")
        best_match_idx = np.argmax(matches)
        best_confidence_idx = np.argmax(confidences)
        fastest_idx = np.argmin(times)
        
        print(f"• Best match count: {iterations[best_match_idx]} iterations ({matches[best_match_idx]} matches)")
        print(f"• Best confidence: {iterations[best_confidence_idx]} iterations ({confidences[best_confidence_idx]:.3f} confidence)")
        print(f"• Fastest processing: {iterations[fastest_idx]} iterations ({times[fastest_idx]:.1f}ms)")
        
        # Find sweet spot (balance of matches and speed)
        normalized_matches = np.array(matches) / max(matches)
        normalized_times = np.array(times) / max(times)
        efficiency_score = normalized_matches - 0.3 * normalized_times  # Weight matches more than speed
        best_efficiency_idx = np.argmax(efficiency_score)
        
        print(f"• Recommended (efficiency): {iterations[best_efficiency_idx]} iterations")
        print(f"  ({matches[best_efficiency_idx]} matches, {confidences[best_efficiency_idx]:.3f} confidence, {times[best_efficiency_idx]:.1f}ms)")
    
    else:
        print("Could not load test frames for Sinkhorn analysis")
else:
    print("Sinkhorn analysis not available - no video or initialization failed")

## Summary and Conclusions

Summary of the SuperGlue demo results and key findings about the Sinkhorn algorithm.

In [None]:
print("SuperGlue Demo Summary")
print("=" * 50)

if 'match_stats' in locals() and len(match_stats['matches_count']) > 0:
    print(f"✓ Successfully processed {len(match_stats['matches_count'])} frame pairs")
    print(f"✓ Average processing speed: {1000/np.mean(match_stats['processing_times']):.1f} FPS")
    print(f"✓ Average matches per frame: {np.mean(match_stats['matches_count']):.1f}")
    print(f"✓ Using {config['superglue']['sinkhorn_iterations']} Sinkhorn iterations")
    
    if SAVE_MATCHES:
        print(f"✓ Saved matching visualizations to: {OUTPUT_DIR}")
else:
    print("⚠ No processing statistics available")

print("\nKey Findings:")
print("• SuperGlue combines SuperPoint feature detection with learned matching")
print("• The Sinkhorn algorithm enables optimal transport-based feature assignment")
print("• More Sinkhorn iterations generally improve match quality but increase computation time")
print("• Typical sweet spot: 10-20 iterations for real-time applications, 50-100 for accuracy")

if 'sinkhorn_results' in locals():
    print("\nSinkhorn Algorithm Insights:")
    print(f"• Tested iteration counts: {[r['iterations'] for r in sinkhorn_results]}")
    print(f"• Performance varies with scene content and motion")
    print(f"• Consider adjusting based on application requirements (speed vs accuracy)")

print("\nOutput Files:")
print(f"📁 Output directory: {OUTPUT_DIR}")
if SAVE_MATCHES:
    print("📄 Matching visualizations: match_XXX_YYY.png")
print("📊 Performance statistics available in notebook variables")

print("\nNext Steps:")
print("• Experiment with different SuperGlue weights (indoor/outdoor)")
print("• Try different keypoint thresholds and NMS settings")
print("• Test with your own video content")
print("• Integrate with SLAM pipeline for pose estimation")

print("=" * 50)
print("SuperGlue Demo Complete!")