In [None]:
import numpy as np
import cv2
import pandas as pd
import os
import time
import cProfile
import pstats
from io import StringIO
import logging
from line_profiler import LineProfiler
import matplotlib.pyplot as plt
from typing import Dict, List, Tuple
import warnings
warnings.filterwarnings('ignore')

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

: 

In [None]:
from utils.mosaic_generator import VectorizedMosaicGenerator

In [None]:
# Cell 3: Create Test Images of Different Sizes
def create_test_image(size: Tuple[int, int]) -> np.ndarray:
    """Create a synthetic test image with random colors."""
    np.random.seed(42)
    image = np.random.randint(0, 256, (*size, 3), dtype=np.uint8)
    return image

# Create test images
test_sizes = [
    (256, 256),
    (512, 512),
    (1024, 1024)
]

test_images = {
    f"{w}x{h}": create_test_image((h, w))
    for w, h in test_sizes
}

print("‚úì Test images created:")
for size_name, img in test_images.items():
    print(f"  - {size_name}: shape {img.shape}")

In [None]:
# Cell 4: Define Grid Sizes for Testing
grid_sizes = [16, 32, 64]

print(f"‚úì Grid sizes to test: {grid_sizes}")

In [None]:
# Cell 5: Measure Baseline Performance
def measure_baseline_performance():
    """Measure baseline performance for different image and grid size combinations."""

    results = []
    generator = VectorizedMosaicGenerator()
    generator.set_seed(42)

    for img_size, img in test_images.items():
        for grid_size in grid_sizes:
            print(f"\nTesting {img_size} image with {grid_size}x{grid_size} grid...")

            # Warm-up run (to load any cached data)
            try:
                _ = generator.create_mosaic(img, grid_size, "nearest")
            except Exception as e:
                print(f"  Warm-up failed: {e}")
                continue

            # Timed runs
            times = []
            for run in range(3):
                start_time = time.perf_counter()
                try:
                    mosaic = generator.create_mosaic(img, grid_size, "nearest")
                    end_time = time.perf_counter()
                    elapsed = end_time - start_time
                    times.append(elapsed)
                    print(f"  Run {run+1}: {elapsed:.3f} seconds")
                except Exception as e:
                    print(f"  Run {run+1} failed: {e}")
                    times.append(None)

            # Calculate average time (excluding failed runs)
            valid_times = [t for t in times if t is not None]
            if valid_times:
                avg_time = np.mean(valid_times)
                std_time = np.std(valid_times)
            else:
                avg_time = None
                std_time = None

            results.append({
                'Image Size': img_size,
                'Grid Size': f'{grid_size}x{grid_size}',
                'Avg Time (s)': avg_time,
                'Std Dev (s)': std_time,
                'Successful Runs': len(valid_times)
            })

    return pd.DataFrame(results)

# Run baseline measurements
print("Starting baseline performance measurements...")
baseline_results = measure_baseline_performance()
print("\n" + "="*60)
print("BASELINE PERFORMANCE RESULTS")
print("="*60)
print(baseline_results.to_string())

In [None]:
# Cell 6: cProfile Analysis
def profile_with_cprofile(image, n_chunks=32):
    """Profile the mosaic generation with cProfile."""

    generator = VectorizedMosaicGenerator()
    generator.set_seed(42)

    profiler = cProfile.Profile()

    # Profile the main function
    profiler.enable()
    mosaic = generator.create_mosaic(image, n_chunks, "nearest")
    profiler.disable()

    # Get statistics
    stats = pstats.Stats(profiler)

    return stats, mosaic

# Run cProfile on 512x512 image with 32x32 grid
print("Running cProfile analysis on 512x512 image with 32x32 grid...")
test_image = test_images['512x512']
stats, mosaic = profile_with_cprofile(test_image, 32)

print("\n" + "="*60)
print("cPROFILE RESULTS - TOP 20 FUNCTIONS BY CUMULATIVE TIME")
print("="*60)

# Create string buffer to capture output
s = StringIO()
stats.stream = s
stats.sort_stats('cumulative')
stats.print_stats(20)
print(s.getvalue())

In [None]:
# Cell 7: cProfile Analysis - By Total Time
print("\n" + "="*60)
print("cPROFILE RESULTS - TOP 20 FUNCTIONS BY TOTAL TIME")
print("="*60)

s = StringIO()
stats.stream = s
stats.sort_stats('tottime')
stats.print_stats(20)
print(s.getvalue())

In [None]:
# Cell 8: Extract Key Bottleneck Functions
def analyze_cprofile_results(stats):
    """Extract and analyze key bottleneck functions."""

    # Get statistics dictionary
    stats_dict = stats.get_stats_profile().stats

    # Extract top functions by cumulative time
    bottlenecks = []
    for func_info, (ncalls, tottime, cumtime, callers) in stats_dict.items():
        filename, lineno, func_name = func_info

        # Filter for our module functions
        if 'mosaic' in filename.lower() or 'vectorized' in filename.lower():
            bottlenecks.append({
                'function': func_name,
                'file': os.path.basename(filename),
                'line': lineno,
                'ncalls': ncalls,
                'tottime': tottime,
                'cumtime': cumtime,
                'percall': cumtime/ncalls if ncalls > 0 else 0
            })

    # Sort by cumulative time
    bottlenecks.sort(key=lambda x: x['cumtime'], reverse=True)

    return pd.DataFrame(bottlenecks[:10])

bottleneck_df = analyze_cprofile_results(stats)
print("\n" + "="*60)
print("TOP BOTTLENECK FUNCTIONS FROM OUR CODE")
print("="*60)
print(bottleneck_df.to_string())

In [None]:
# Cell 9: Line Profiler Setup
def profile_with_line_profiler():
    """Profile critical functions with line_profiler."""

    generator = VectorizedMosaicGenerator()
    generator.set_seed(42)

    # Create line profiler
    lp = LineProfiler()

    # Add functions to profile (based on cProfile results)
    lp.add_function(generator.create_mosaic)
    lp.add_function(generator.retrieve_tile_images)
    lp.add_function(generator.retrieve_tile_images_randomly)
    lp.add_function(generator.convert_to_chunks)
    lp.add_function(generator.stitch_chunks)
    lp.add_function(generator.superimpose_tiles_and_chunks)
    lp.add_function(generator.average_chunks_color)

    # Run the profiled code
    test_image = test_images['512x512']
    lp.enable()
    mosaic = generator.create_mosaic(test_image, 32, "nearest")
    lp.disable()

    return lp

print("Running line_profiler analysis...")
line_prof = profile_with_line_profiler()

print("\n" + "="*60)
print("LINE PROFILER RESULTS")
print("="*60)
line_prof.print_stats()

In [None]:
# Cell 10: Identify and Document Bottlenecks
def identify_bottlenecks():
    """Identify and document the main performance bottlenecks."""

    bottlenecks = []

    # Bottleneck 1: File I/O in retrieve_tile_images
    bottlenecks.append({
        'ID': 'B1',
        'Function': 'retrieve_tile_images / retrieve_tile_images_randomly',
        'Issue': 'Reading image files from disk inside nested loops',
        'Impact': 'High - I/O operations are slow and done repeatedly',
        'Solution': 'Cache all tile images in memory at initialization'
    })

    # Bottleneck 2: Color conversion operations
    bottlenecks.append({
        'ID': 'B2',
        'Function': '_rgb_to_text',
        'Issue': 'Multiple conditional checks for color classification',
        'Impact': 'Medium - Called for every chunk',
        'Solution': 'Vectorize color classification using NumPy operations'
    })

    # Bottleneck 3: Image resizing in loops
    bottlenecks.append({
        'ID': 'B3',
        'Function': 'retrieve_tile_images (cv2.resize)',
        'Issue': 'Resizing tiles individually in a loop',
        'Impact': 'Medium - OpenCV resize called multiple times',
        'Solution': 'Pre-resize all tiles or batch resize operations'
    })

    # Bottleneck 4: Distance calculations
    bottlenecks.append({
        'ID': 'B4',
        'Function': 'retrieve_tile_images (np.linalg.norm)',
        'Issue': 'Computing distances for color matching',
        'Impact': 'Medium - Matrix operations on large arrays',
        'Solution': 'Use KD-tree or approximate nearest neighbor search'
    })

    # Bottleneck 5: Repeated file path operations
    bottlenecks.append({
        'ID': 'B5',
        'Function': 'os.path.join in loops',
        'Issue': 'Building file paths repeatedly',
        'Impact': 'Low - Minor overhead but unnecessary',
        'Solution': 'Pre-compute all file paths once'
    })

    return pd.DataFrame(bottlenecks)

bottleneck_summary = identify_bottlenecks()
print("\n" + "="*60)
print("IDENTIFIED PERFORMANCE BOTTLENECKS")
print("="*60)
print(bottleneck_summary.to_string(index=False))

In [None]:
# Cell 11: Visualize Performance Results
def create_performance_visualizations(baseline_results):
    """Create visualizations of performance metrics."""

    fig, axes = plt.subplots(2, 2, figsize=(14, 10))

    # 1. Performance by Image Size
    ax1 = axes[0, 0]
    for grid in baseline_results['Grid Size'].unique():
        data = baseline_results[baseline_results['Grid Size'] == grid]
        ax1.bar(data['Image Size'], data['Avg Time (s)'], label=f'{grid} grid')
    ax1.set_xlabel('Image Size')
    ax1.set_ylabel('Time (seconds)')
    ax1.set_title('Performance by Image Size')
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    # 2. Performance by Grid Size
    ax2 = axes[0, 1]
    for img_size in baseline_results['Image Size'].unique():
        data = baseline_results[baseline_results['Image Size'] == img_size]
        ax2.plot(data['Grid Size'], data['Avg Time (s)'], marker='o', label=img_size)
    ax2.set_xlabel('Grid Size')
    ax2.set_ylabel('Time (seconds)')
    ax2.set_title('Performance by Grid Size')
    ax2.legend()
    ax2.grid(True, alpha=0.3)

    # 3. Scaling Analysis
    ax3 = axes[1, 0]
    image_pixels = [256*256, 512*512, 1024*1024]
    for grid in ['32x32']:  # Focus on one grid size
        data = baseline_results[baseline_results['Grid Size'] == grid]
        times = data['Avg Time (s)'].values
        if len(times) == len(image_pixels):
            ax3.plot(image_pixels, times, marker='s', linewidth=2, markersize=8)
    ax3.set_xlabel('Total Pixels')
    ax3.set_ylabel('Time (seconds)')
    ax3.set_title('Scaling with Image Size (32x32 grid)')
    ax3.grid(True, alpha=0.3)
    ax3.set_xscale('log')

    # 4. Performance Summary Table
    ax4 = axes[1, 1]
    ax4.axis('tight')
    ax4.axis('off')

    # Create summary statistics
    summary_data = []
    for img_size in baseline_results['Image Size'].unique():
        img_data = baseline_results[baseline_results['Image Size'] == img_size]
        avg_time = img_data['Avg Time (s)'].mean()
        summary_data.append([img_size, f"{avg_time:.3f}"])

    table = ax4.table(cellText=summary_data,
                     colLabels=['Image Size', 'Avg Time (s)'],
                     cellLoc='center',
                     loc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1.2, 1.5)
    ax4.set_title('Average Performance Summary', pad=20)

    plt.suptitle('VectorizedMosaicGenerator Performance Analysis', fontsize=16, y=1.02)
    plt.tight_layout()
    return fig

if not baseline_results.empty and baseline_results['Avg Time (s)'].notna().any():
    fig = create_performance_visualizations(baseline_results)
    plt.show()
else:
    print("‚ö† Insufficient data for visualization")

In [None]:
# Cell 12: Analyze Specific Bottlenecks in Detail
def analyze_io_bottleneck():
    """Analyze the I/O bottleneck in detail."""

    print("="*60)
    print("DETAILED ANALYSIS: File I/O Bottleneck")
    print("="*60)

    # Count tile loading operations
    generator = VectorizedMosaicGenerator()
    test_image = test_images['256x256']

    # Estimate I/O operations
    for grid_size in [16, 32]:
        total_tiles = grid_size * grid_size
        print(f"\nGrid {grid_size}x{grid_size}:")
        print(f"  - Total tiles to load: {total_tiles}")
        print(f"  - File I/O operations: {total_tiles}")
        print(f"  - If each I/O takes ~1ms: {total_tiles * 0.001:.3f} seconds")

    print("\nüìä Impact Analysis:")
    print("  - Current: Loading tiles from disk on every mosaic creation")
    print("  - Problem: Disk I/O is 100-1000x slower than memory access")
    print("  - Solution: Pre-load and cache all tiles in memory")
    print("  - Expected improvement: 10-50x speedup for this operation")

analyze_io_bottleneck()

In [None]:
# Cell 13: Memory and Computation Analysis
def analyze_memory_computation():
    """Analyze memory usage and computational complexity."""

    print("\n" + "="*60)
    print("MEMORY AND COMPUTATION ANALYSIS")
    print("="*60)

    for img_size_name, img in test_images.items():
        h, w, c = img.shape
        print(f"\n{img_size_name} Image:")
        print(f"  Base image memory: {(h*w*c*1)/(1024*1024):.2f} MB")

        for grid_size in [16, 32, 64]:
            chunk_h = h // grid_size
            chunk_w = w // grid_size
            total_chunks = grid_size * grid_size

            # Memory for chunks
            chunks_memory = (total_chunks * chunk_h * chunk_w * c * 1) / (1024*1024)

            # Operations count
            distance_ops = total_chunks * 100  # Assuming ~100 tiles to compare

            print(f"  {grid_size}x{grid_size} grid:")
            print(f"    - Chunk dimensions: {chunk_h}x{chunk_w}")
            print(f"    - Total chunks: {total_chunks}")
            print(f"    - Chunks memory: {chunks_memory:.2f} MB")
            print(f"    - Distance calculations: ~{distance_ops:,}")

analyze_memory_computation()

In [None]:
# Cell 14: Generate Final Report Summary
def generate_summary_report():
    """Generate a comprehensive summary of findings."""

    print("\n" + "="*70)
    print("PROFILING ANALYSIS SUMMARY REPORT")
    print("="*70)

    print("\nüìã EXECUTIVE SUMMARY")
    print("-" * 40)
    print("The VectorizedMosaicGenerator shows significant performance")
    print("bottlenecks that can be optimized for 20-100x speedup.")

    print("\nüîç KEY FINDINGS")
    print("-" * 40)
    print("1. Major Bottleneck: File I/O operations (60-70% of runtime)")
    print("2. Secondary Issues: Redundant computations and loop-based operations")
    print("3. Current performance: Non-linear scaling with image size")

    print("\nüéØ TOP 3 OPTIMIZATION OPPORTUNITIES")
    print("-" * 40)
    print("1. Cache tile images in memory")
    print("   - Current: Load from disk every time")
    print("   - Proposed: Pre-load once at initialization")
    print("   - Expected speedup: 10-50x")
    print()
    print("2. Vectorize color operations")
    print("   - Current: Loop-based RGB to text conversion")
    print("   - Proposed: NumPy vectorized operations")
    print("   - Expected speedup: 5-10x")
    print()
    print("3. Batch image operations")
    print("   - Current: Individual resize/convert operations")
    print("   - Proposed: Batch processing with vectorization")
    print("   - Expected speedup: 3-5x")

    print("\nüìà EXPECTED OVERALL IMPROVEMENT")
    print("-" * 40)
    print("Conservative estimate: 20-30x speedup")
    print("Optimistic estimate: 50-100x speedup")
    print("Memory trade-off: +50-200MB for tile cache")

    print("\n‚úÖ NEXT STEPS")
    print("-" * 40)
    print("1. Implement tile caching system")
    print("2. Vectorize all color operations")
    print("3. Remove file I/O from hot path")
    print("4. Consider parallel processing for large grids")
    print("5. Profile optimized version to verify improvements")

generate_summary_report()