## Setup: Install Dependencies

In [None]:
# Install required packages if needed
import sys
import subprocess

packages = [
    'rasterio',
    'numpy',
    'matplotlib',
    'opencv-python',
    'scipy',
    'utm',
    'pillow',
    'scikit-learn'
]

for package in packages:
    try:
        # Handle special case: scikit-learn imports as sklearn
        import_name = 'sklearn' if package == 'scikit-learn' else package.replace('-', '_')
        __import__(import_name)
    except ImportError:
        print(f"Installing {package}...")
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])

print("✓ Dependencies installed")

Installing opencv-python...
Defaulting to user installation because normal site-packages is not writeable
Installing pillow...
Defaulting to user installation because normal site-packages is not writeable
✓ Dependencies installed


## Step 1: Setup - Imports and Paths

In [2]:
import numpy as np
import rasterio
from rasterio.transform import xy
from rasterio.warp import transform as transform_coords
from pathlib import Path
import cv2
import matplotlib.pyplot as plt
from scipy import ndimage
import json
import csv
import utm
from typing import Dict, List, Tuple, Optional
import warnings
warnings.filterwarnings('ignore')

# Setup paths
data_dir = Path("/Users/mauriciohessflores/Documents/Code/Data/New Westminster Oct _25")
output_dir = Path("outputs")

# Input files
basemap_path = data_dir / "Michael_RTK_orthos" / "TestsiteNewWest_Spexigeo_RTK.tiff"
gcp_csv_path = data_dir / "25-3288-CONTROL-NAD83-UTM10N-EGM2008.csv"
ortho_no_gcps_path = output_dir / "orthomosaics" / "orthomosaic_no_gcps.tif"
ortho_with_gcps_path = output_dir / "orthomosaics" / "orthomosaic_with_gcps.tif"

# Output directories
gcp_matching_dir = output_dir / "gcp_matching"
gcp_matching_dir.mkdir(parents=True, exist_ok=True)

patches_dir = gcp_matching_dir / "patches"
patches_dir.mkdir(exist_ok=True)

matches_dir = gcp_matching_dir / "matches"
matches_dir.mkdir(exist_ok=True)

registered_dir = gcp_matching_dir / "registered"
registered_dir.mkdir(exist_ok=True)

print(f"✓ Output directory: {gcp_matching_dir}")
print(f"  - Patches: {patches_dir}")
print(f"  - Matches: {matches_dir}")
print(f"  - Registered: {registered_dir}")

✓ Output directory: outputs/gcp_matching
  - Patches: outputs/gcp_matching/patches
  - Matches: outputs/gcp_matching/matches
  - Registered: outputs/gcp_matching/registered


In [3]:
# Load GCPs from UTM CSV file
def load_gcps_from_csv(csv_path: Path) -> List[Dict]:
    """
    Load GCPs from UTM CSV file and convert to WGS84.
    
    Expected format: ID, Northing, Easting, Elevation, Name
    """
    import csv
    
    gcps = []
    
    with open(csv_path, 'r') as f:
        # Try to detect if there's a header
        first_line = f.readline().strip()
        f.seek(0)  # Reset to beginning
        
        # Check if first line is numeric (no header)
        try:
            float(first_line.split(',')[0])
            has_header = False
        except (ValueError, IndexError):
            has_header = True
        
        reader = csv.reader(f) if not has_header else csv.DictReader(f)
        
        for row_idx, row in enumerate(reader):
            try:
                if has_header:
                    # Try to find columns
                    northing = float(row.get('Northing', row.get('northing', row.get('Y', 0))))
                    easting = float(row.get('Easting', row.get('easting', row.get('X', 0))))
                    gcp_id = row.get('Name', row.get('name', row.get('ID', row.get('id', f"GCP_{row_idx+1}"))))
                else:
                    # Positional format: ID, Northing, Easting, Elevation, Name
                    if len(row) < 3:
                        continue
                    gcp_id = row[0].strip() if row[0] else f"GCP_{row_idx+1}"
                    northing = float(row[1])  # Column 1 = Northing
                    easting = float(row[2])   # Column 2 = Easting
                
                # Convert UTM to WGS84 (UTM Zone 10N)
                lat, lon = utm.to_latlon(easting, northing, 10, 'N')
                
                gcps.append({
                    'id': gcp_id,
                    'lat': lat,
                    'lon': lon,
                    'x_utm': easting,
                    'y_utm': northing
                })
            except (ValueError, IndexError) as e:
                print(f"⚠️  Skipping row {row_idx+1}: {e}")
                continue
    
    return gcps

## Step 2: Load GCPs from CSV and Convert to WGS84

In [4]:
# Load GCPs - try existing WGS84 files first, otherwise parse CSV
import json

# Check for existing WGS84 GCP files from ground control comparison
gcps_wgs84_geojson = output_dir / "ground_control_comparison" / "gcps_wgs84.geojson"
gcps_wgs84_csv = output_dir / "ground_control_comparison" / "gcps_wgs84.csv"

gcps = []

# Try GeoJSON first (preferred)
if gcps_wgs84_geojson.exists():
    print(f"Loading GCPs from GeoJSON: {gcps_wgs84_geojson}")
    with open(gcps_wgs84_geojson, 'r') as f:
        geojson_data = json.load(f)
    
    if 'features' in geojson_data:
        for feature in geojson_data['features']:
            props = feature.get('properties', {})
            geom = feature.get('geometry', {})
            
            if geom.get('type') == 'Point':
                coords = geom.get('coordinates', [])
                if len(coords) >= 2:
                    lon, lat = coords[0], coords[1]
                    
                    # Get UTM coordinates from properties or convert
                    x_utm = props.get('x_utm')
                    y_utm = props.get('y_utm')
                    
                    if x_utm is None or y_utm is None:
                        # Convert WGS84 to UTM
                        import utm
                        x_utm, y_utm, zone_num, zone_letter = utm.from_latlon(lat, lon)
                    
                    gcps.append({
                        'id': props.get('id', props.get('name', f"GCP_{len(gcps)+1}")),
                        'lat': lat,
                        'lon': lon,
                        'x_utm': float(x_utm),
                        'y_utm': float(y_utm)
                    })
    
    print(f"✓ Loaded {len(gcps)} GCPs from GeoJSON")

# Try CSV if GeoJSON not found
elif gcps_wgs84_csv.exists():
    print(f"Loading GCPs from WGS84 CSV: {gcps_wgs84_csv}")
    with open(gcps_wgs84_csv, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                lat = float(row.get('lat', row.get('latitude', 0)))
                lon = float(row.get('lon', row.get('longitude', row.get('lon', 0))))
                gcp_id = row.get('id', row.get('name', row.get('label', f"GCP_{len(gcps)+1}")))
                
                # Get UTM from row or convert
                x_utm_str = row.get('x_utm', '')
                y_utm_str = row.get('y_utm', '')
                
                if x_utm_str and y_utm_str:
                    x_utm = float(x_utm_str)
                    y_utm = float(y_utm_str)
                else:
                    import utm
                    x_utm, y_utm, zone_num, zone_letter = utm.from_latlon(lat, lon)
                
                gcps.append({
                    'id': gcp_id,
                    'lat': lat,
                    'lon': lon,
                    'x_utm': x_utm,
                    'y_utm': y_utm
                })
            except (ValueError, KeyError) as e:
                print(f"⚠️  Skipping row: {e}")
                continue
    
    print(f"✓ Loaded {len(gcps)} GCPs from WGS84 CSV")

# Fallback to parsing UTM CSV
if len(gcps) == 0:
    print(f"\nNo WGS84 GCP files found, parsing UTM CSV: {gcp_csv_path}")
    gcps = load_gcps_from_csv(gcp_csv_path)
    print(f"✓ Loaded {len(gcps)} GCPs from UTM CSV")

if len(gcps) > 0:
    print(f"\nFirst few GCPs:")
    for gcp in gcps[:3]:
        print(f"  {gcp['id']}: UTM=({gcp['x_utm']:.2f}, {gcp['y_utm']:.2f}), WGS84=({gcp['lat']:.6f}, {gcp['lon']:.6f})")
else:
    print(f"⚠️  No GCPs loaded!")
    print(f"   Checked:")
    print(f"   - {gcps_wgs84_geojson}")
    print(f"   - {gcps_wgs84_csv}")
    print(f"   - {gcp_csv_path}")


Loading GCPs from GeoJSON: outputs/ground_control_comparison/gcps_wgs84.geojson
✓ Loaded 23 GCPs from GeoJSON

First few GCPs:
  GCP1: UTM=(506914.12, 5450945.53), WGS84=(49.211262, -122.905068)
  GCP2: UTM=(506657.79, 5450730.01), WGS84=(49.209326, -122.908591)
  GCP3: UTM=(506577.77, 5450480.01), WGS84=(49.207078, -122.909694)


## Step 3: Convert GCPs to Pixel Coordinates in Basemap

In [5]:
# Convert GCPs (UTM) to pixel coordinates in basemap
def gcp_to_pixel_coords_from_utm(gcp_x_utm: float, gcp_y_utm: float, raster_path: Path) -> Optional[Tuple[int, int]]:
    """
    Convert GCP UTM coordinates to pixel coordinates in raster.
    
    Args:
        gcp_x_utm: UTM Easting (EPSG:32610)
        gcp_y_utm: UTM Northing (EPSG:32610)
        raster_path: Path to raster file
    
    Returns:
        (col, row) or None if outside bounds.
    """
    with rasterio.open(raster_path) as src:
        # Raster should be in EPSG:32610 (UTM Zone 10N)
        if src.crs != 'EPSG:32610':
            # Transform UTM to raster CRS if needed
            x, y = transform_coords(
                'EPSG:32610',
                src.crs,
                [gcp_x_utm],
                [gcp_y_utm]
            )
            utm_x, utm_y = x[0], y[0]
        else:
            utm_x, utm_y = gcp_x_utm, gcp_y_utm
        
        # Convert to pixel coordinates
        row, col = rasterio.transform.rowcol(src.transform, utm_x, utm_y)
        
        # Check if within bounds
        if 0 <= row < src.height and 0 <= col < src.width:
            return (col, row)
        else:
            return None

# Get basemap CRS and transform
with rasterio.open(basemap_path) as basemap_src:
    basemap_crs = basemap_src.crs
    basemap_transform = basemap_src.transform
    basemap_width = basemap_src.width
    basemap_height = basemap_src.height
    basemap_bounds = basemap_src.bounds

print(f"Basemap CRS: {basemap_crs}")
print(f"Basemap dimensions: {basemap_width}x{basemap_height}")
print(f"Basemap bounds: {basemap_bounds}")
print(f"Basemap transform: {basemap_transform}")

# Convert all GCPs to pixel coordinates
gcp_pixel_coords = {}
for gcp in gcps:
    # Debug: show GCP UTM coordinates
    print(f"\nGCP {gcp['id']}: UTM=({gcp['x_utm']:.2f}, {gcp['y_utm']:.2f})")
    
    pixel_coords = gcp_to_pixel_coords_from_utm(gcp['x_utm'], gcp['y_utm'], basemap_path)
    if pixel_coords:
        gcp_pixel_coords[gcp['id']] = {
            'gcp': gcp,
            'pixel_col': pixel_coords[0],
            'pixel_row': pixel_coords[1],
            'utm_x': gcp.get('x_utm'),
            'utm_y': gcp.get('y_utm'),
        }
        print(f"  ✓ Found at pixel: col={pixel_coords[0]}, row={pixel_coords[1]}")
    else:
        # Debug: show why it's outside bounds
        with rasterio.open(basemap_path) as src:
            row, col = rasterio.transform.rowcol(src.transform, gcp['x_utm'], gcp['y_utm'])
            print(f"  ⚠️  Outside bounds: col={col}, row={row}")
            print(f"     Basemap: {src.width}x{src.height}")
            print(f"     Basemap bounds: {src.bounds}")
            # Check if coordinates are in bounds in UTM space
            in_x = src.bounds.left <= gcp['x_utm'] <= src.bounds.right
            in_y = src.bounds.bottom <= gcp['y_utm'] <= src.bounds.top
            print(f"     UTM X in bounds: {in_x} ({src.bounds.left:.2f} <= {gcp['x_utm']:.2f} <= {src.bounds.right:.2f})")
            print(f"     UTM Y in bounds: {in_y} ({src.bounds.bottom:.2f} <= {gcp['y_utm']:.2f} <= {src.bounds.top:.2f})")

print(f"\n✓ Found {len(gcp_pixel_coords)} GCPs within basemap bounds")
if len(gcp_pixel_coords) > 0:
    print(f"\nFirst few GCP pixel coordinates:")
    for gcp_id, coords in list(gcp_pixel_coords.items())[:3]:
        print(f"  {gcp_id}: col={coords['pixel_col']}, row={coords['pixel_row']}")


Basemap CRS: EPSG:32610
Basemap dimensions: 90129x90188
Basemap bounds: BoundingBox(left=506424.37839793676, bottom=5450017.622213458, right=507501.0951215451, top=5451095.043774429)
Basemap transform: | 0.01, 0.00, 506424.38|
| 0.00,-0.01, 5451095.04|
| 0.00, 0.00, 1.00|

GCP GCP1: UTM=(506914.12, 5450945.53)
  ✓ Found at pixel: col=40995, row=12515

GCP GCP2: UTM=(506657.79, 5450730.01)
  ✓ Found at pixel: col=19538, row=30556

GCP GCP3: UTM=(506577.77, 5450480.01)
  ✓ Found at pixel: col=12840, row=51482

GCP GCP4: UTM=(506765.03, 5450578.63)
  ✓ Found at pixel: col=28515, row=43227

GCP GCP5: UTM=(506926.13, 5450715.96)
  ✓ Found at pixel: col=42000, row=31732

GCP GCP6: UTM=(507071.92, 5450992.66)
  ✓ Found at pixel: col=54203, row=8570

GCP GCP7: UTM=(507089.40, 5450794.23)
  ✓ Found at pixel: col=55667, row=25180

GCP GCP8: UTM=(507315.01, 5450717.85)
  ✓ Found at pixel: col=74551, row=31574

GCP GCP9: UTM=(507252.65, 5450536.03)
  ✓ Found at pixel: col=69332, row=46793

GCP GCP

## Step 4: Extract Patches from Basemap

In [6]:
# Extract patches from basemap centered on GCPs
def extract_patch(raster_path: Path, center_col: int, center_row: int, patch_size: int) -> Optional[np.ndarray]:
    """
    Extract a patch from raster centered on given pixel coordinates.
    
    Args:
        raster_path: Path to raster file
        center_col: Center column (x)
        center_row: Center row (y)
        patch_size: Size of patch (must be odd, e.g., 29, 39, 49)
    
    Returns:
        Patch array (H, W, C) or None if out of bounds
    """
    half_size = patch_size // 2
    
    with rasterio.open(raster_path) as src:
        # Calculate bounds
        col_start = max(0, center_col - half_size)
        col_end = min(src.width, center_col + half_size + 1)
        row_start = max(0, center_row - half_size)
        row_end = min(src.height, center_row + half_size + 1)
        
        # Check if patch would be out of bounds
        if col_end - col_start < patch_size or row_end - row_start < patch_size:
            return None
        
        # Read patch
        patch = src.read(
            window=rasterio.windows.Window(col_start, row_start, col_end - col_start, row_end - row_start)
        )
        
        # Transpose to (H, W, C) format
        if len(patch.shape) == 3:
            patch = np.transpose(patch, (1, 2, 0))
        
        # If single band, convert to 3-channel grayscale
        if len(patch.shape) == 2:
            patch = np.stack([patch, patch, patch], axis=-1)
        
        return patch

def create_gcp_patch_visualization(
    patch: np.ndarray,
    patch_size: int,
    output_path: Path
):
    """
    Create visualization of patch with GCP location marked.
    """
    import matplotlib.pyplot as plt
    import matplotlib.patches as mpatches
    
    # Normalize patch if needed
    if patch.dtype != np.uint8:
        patch_min = patch.min()
        patch_max = patch.max()
        if patch_max > patch_min:
            patch = ((patch - patch_min) / (patch_max - patch_min) * 255).astype(np.uint8)
        else:
            patch = np.zeros_like(patch, dtype=np.uint8)
    
    # Create figure
    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
    ax.imshow(patch)
    
    # Mark center (GCP location) with bright red dot
    center_row, center_col = patch.shape[0] // 2, patch.shape[1] // 2
    ax.plot(center_col, center_row, 'ro', markersize=15, markeredgewidth=2, markeredgecolor='white')
    
    # Draw yellow square around patch boundary
    rect = mpatches.Rectangle(
        (0, 0), patch.shape[1], patch.shape[0],
        linewidth=3, edgecolor='yellow', facecolor='none'
    )
    ax.add_patch(rect)
    
    ax.set_title(f'Matched Patch ({patch_size}x{patch_size})', fontsize=14, fontweight='bold')
    ax.axis('off')
    
    plt.tight_layout()
    plt.savefig(output_path, dpi=150, bbox_inches='tight')
    plt.close()

# Extract patches for different patch sizes
patch_sizes = [49, 59, 79, 99, 119]  # Larger patches for better matching
basemap_patches = {}

for patch_size in patch_sizes:
    basemap_patches[patch_size] = {}
    
    for gcp_id, coords in gcp_pixel_coords.items():
        patch = extract_patch(
            basemap_path,
            coords['pixel_col'],
            coords['pixel_row'],
            patch_size
        )
        
        if patch is not None:
            basemap_patches[patch_size][gcp_id] = patch
            
            # Save patch as image for visualization
            patch_path = patches_dir / f"basemap_{gcp_id}_{patch_size}x{patch_size}.png"
            plt.imsave(patch_path, patch.astype(np.uint8))
    
    print(f"✓ Extracted {len(basemap_patches[patch_size])} patches of size {patch_size}x{patch_size}")

print(f"\n✓ Patch extraction complete!")

✓ Extracted 23 patches of size 49x49
✓ Extracted 23 patches of size 59x59
✓ Extracted 23 patches of size 79x79
✓ Extracted 23 patches of size 99x99
✓ Extracted 23 patches of size 119x119

✓ Patch extraction complete!


## Step 5: Reproject Orthomosaics to Match Basemap CRS

In [7]:
from rasterio.warp import calculate_default_transform, reproject, Resampling, transform_bounds
from rasterio.transform import from_bounds
from rasterio.enums import Resampling as RasterioResampling
from affine import Affine

# Reproject orthos to match basemap CRS and resolution
def reproject_ortho_to_basemap(ortho_path: Path, basemap_path: Path, output_path: Path) -> Path:
    """
    Reproject orthomosaic to match basemap CRS and bounds.
    Uses manual transform construction to avoid CPLE_AppDefinedError.
    """
    if output_path.exists():
        print(f"  ✓ Already reprojected: {output_path}")
        return output_path
    
    with rasterio.open(basemap_path) as basemap_src:
        target_crs = basemap_src.crs
        target_bounds = basemap_src.bounds
        target_transform = basemap_src.transform
        target_width = basemap_src.width
        target_height = basemap_src.height
    
    with rasterio.open(ortho_path) as ortho_src:
        source_crs = ortho_src.crs
        source_bounds = ortho_src.bounds
        
        if source_crs == target_crs:
            print(f"  ✓ Already in target CRS")
            import shutil
            shutil.copy(ortho_path, output_path)
            return output_path
        
        # Transform source bounds to target CRS
        print(f"  Transforming source bounds to target CRS...")
        src_bounds_target_crs = transform_bounds(
            source_crs, target_crs,
            source_bounds.left, source_bounds.bottom,
            source_bounds.right, source_bounds.top
        )
        
        print(f"  Source bounds in target CRS: {src_bounds_target_crs}")
        
        # Get target pixel size
        target_pixel_size_x = abs(target_transform[0])
        target_pixel_size_y = abs(target_transform[4])
        
        # Use intersection of bounds
        output_left = max(src_bounds_target_crs[0], target_bounds.left)
        output_bottom = max(src_bounds_target_crs[1], target_bounds.bottom)
        output_right = min(src_bounds_target_crs[2], target_bounds.right)
        output_top = min(src_bounds_target_crs[3], target_bounds.top)
        
        print(f"  Output bounds (intersection): left={output_left:.2f}, bottom={output_bottom:.2f}, right={output_right:.2f}, top={output_top:.2f}")
        
        # Validate bounds
        if output_right <= output_left or output_top <= output_bottom:
            raise ValueError(f"Invalid output bounds: width={output_right-output_left}, height={output_top-output_bottom}")
        
        # Calculate dimensions using target pixel size
        width = int((output_right - output_left) / target_pixel_size_x)
        height = int((output_top - output_bottom) / target_pixel_size_y)
        
        # Validate dimensions
        if width <= 0 or height <= 0:
            raise ValueError(f"Invalid dimensions: width={width}, height={height}")
        
        # Create transform for output
        transform = Affine.translation(output_left, output_top) * Affine.scale(target_pixel_size_x, -target_pixel_size_y)
        
        print(f"  ✓ Transform calculated: {width}x{height} pixels")
        
        # Read source data
        source_data = ortho_src.read()
        source_count = ortho_src.count
        
        # Reproject
        reprojected_data = np.zeros((source_count, height, width), dtype=source_data.dtype)
        
        for band_idx in range(1, source_count + 1):
            reproject(
                source=rasterio.band(ortho_src, band_idx),
                destination=reprojected_data[band_idx - 1],
                src_transform=ortho_src.transform,
                src_crs=source_crs,
                dst_transform=transform,
                dst_crs=target_crs,
                resampling=Resampling.bilinear
            )
        
        # Save
        with rasterio.open(
            output_path,
            'w',
            driver='GTiff',
            height=height,
            width=width,
            count=source_count,
            dtype=reprojected_data.dtype,
            crs=target_crs,
            transform=transform,
            compress='jpeg',
            jpeg_quality=90,
            BIGTIFF='YES',
            tiled=True,
            blockxsize=512,
            blockysize=512
        ) as dst:
            dst.write(reprojected_data)
    
    return output_path

# Check for existing reprojected files from test_matching notebook
existing_reprojected_dir = output_dir / "test_matching" / "reprojected"
reprojected_dir = gcp_matching_dir / "reprojected"
reprojected_dir.mkdir(exist_ok=True)

ortho_paths = {
    'no_gcps': ortho_no_gcps_path,
    'with_gcps': ortho_with_gcps_path
}

reprojected_paths = {}
for ortho_name, ortho_path in ortho_paths.items():
    if not ortho_path.exists():
        print(f"⚠️  Ortho not found: {ortho_path}")
        continue
    
    # Check for existing reprojected file from test_matching
    existing_reprojected = existing_reprojected_dir / f"{ortho_name}_reprojected.tif"
    if existing_reprojected.exists():
        print(f"\nFound existing reprojected file: {existing_reprojected}")
        # Copy to our directory
        import shutil
        reprojected_path = reprojected_dir / f"{ortho_name}_reprojected.tif"
        if not reprojected_path.exists():
            shutil.copy(existing_reprojected, reprojected_path)
            print(f"  ✓ Copied to: {reprojected_path}")
        else:
            print(f"  ✓ Already exists: {reprojected_path}")
        reprojected_paths[ortho_name] = reprojected_path
        continue
    
    # Otherwise, reproject
    print(f"\nReprojecting {ortho_name}...")
    reprojected_path = reproject_ortho_to_basemap(
        ortho_path,
        basemap_path,
        reprojected_dir / f"{ortho_name}_reprojected.tif"
    )
    reprojected_paths[ortho_name] = reprojected_path

print(f"\n✓ Reprojection complete!")


Found existing reprojected file: outputs/test_matching/reprojected/no_gcps_reprojected.tif
  ✓ Already exists: outputs/gcp_matching/reprojected/no_gcps_reprojected.tif

Found existing reprojected file: outputs/test_matching/reprojected/with_gcps_reprojected.tif
  ✓ Already exists: outputs/gcp_matching/reprojected/with_gcps_reprojected.tif

✓ Reprojection complete!


## Step 6: Find GCP Patches in Orthomosaics Using Template Matching

In [8]:
# Find GCP patches in orthomosaics using template matching
def find_patch_in_ortho(
    template_patch: np.ndarray,
    ortho_path: Path,
    search_center_col: int,
    search_center_row: int,
    search_radius: int = 300  # Reduced for more precise matching
) -> Optional[Tuple[int, int, float]]:
    """
    Find template patch in orthomosaic using template matching.
    
    Returns:
        (col, row, confidence) or None if not found
    """
    # Convert template to grayscale if needed
    if len(template_patch.shape) == 3:
        template_gray = cv2.cvtColor(template_patch.astype(np.uint8), cv2.COLOR_RGB2GRAY)
    else:
        template_gray = template_patch.astype(np.uint8)
    
    with rasterio.open(ortho_path) as ortho_src:
        # Define search window
        search_col_start = max(0, search_center_col - search_radius)
        search_col_end = min(ortho_src.width, search_center_col + search_radius)
        search_row_start = max(0, search_center_row - search_radius)
        search_row_end = min(ortho_src.height, search_center_row + search_radius)
        
        # Read search region
        search_window = rasterio.windows.Window(
            search_col_start,
            search_row_start,
            search_col_end - search_col_start,
            search_row_end - search_row_start
        )
        
        search_region = ortho_src.read(window=search_window)
        
        # Convert to (H, W, C) and then grayscale
        if len(search_region.shape) == 3:
            search_region = np.transpose(search_region, (1, 2, 0))
            if search_region.shape[2] == 1:
                search_gray = search_region[:, :, 0]
            else:
                search_gray = cv2.cvtColor(search_region.astype(np.uint8), cv2.COLOR_RGB2GRAY)
        else:
            search_gray = search_region
        
        # Normalize to uint8
        if search_gray.dtype != np.uint8:
            search_min = search_gray.min()
            search_max = search_gray.max()
            if search_max > search_min:
                search_gray = ((search_gray - search_min) / (search_max - search_min) * 255).astype(np.uint8)
            else:
                search_gray = np.zeros_like(search_gray, dtype=np.uint8)
        
        # Template matching
        result = cv2.matchTemplate(search_gray, template_gray, cv2.TM_CCOEFF_NORMED)
        
        # Find best match
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
        
        # Convert back to global coordinates
        match_col = search_col_start + max_loc[0] + template_gray.shape[1] // 2
        match_row = search_row_start + max_loc[1] + template_gray.shape[0] // 2
        
        # Return if confidence is high enough
        if max_val > 0.5:  # Threshold for match confidence
            return (match_col, match_row, float(max_val))
        else:
            return None

# Find GCPs in each orthomosaic
# Create directory for matching patches
matching_patches_dir = gcp_matching_dir / "matching_patches"
matching_patches_dir.mkdir(exist_ok=True)
matching_patches_dir.mkdir(exist_ok=True)

matching_results = {}

for ortho_name, reprojected_path in reprojected_paths.items():

    matching_results[ortho_name] = {}
    
    # Get ortho transform for coordinate conversion
    with rasterio.open(reprojected_path) as ortho_src:
        ortho_transform = ortho_src.transform
    
    # Try different patch sizes
    best_patch_size = None
    best_matches = 0
    
    for patch_size in patch_sizes:
        matches_found = 0
        
        for gcp_id, coords in gcp_pixel_coords.items():
            if gcp_id not in basemap_patches[patch_size]:
                continue
            
            template = basemap_patches[patch_size][gcp_id]
            
            # Convert GCP UTM coordinates to pixel coordinates in THIS ortho
            gcp_utm_x = coords.get('utm_x') or coords.get('x_utm')
            gcp_utm_y = coords.get('utm_y') or coords.get('y_utm')
            
            if gcp_utm_x is not None and gcp_utm_y is not None:
                # Convert UTM to pixel coordinates using ortho's transform
                expected_col, expected_row = ~ortho_transform * (gcp_utm_x, gcp_utm_y)
                expected_col = int(expected_col)
                expected_row = int(expected_row)
            else:
                # Fallback: use pixel coordinates from basemap
                expected_col = coords['pixel_col']
                expected_row = coords['pixel_row']
            
            # Search for patch using multi-scale matching
            if 'find_patch_in_ortho_multiscale' in globals():
                match = find_patch_in_ortho_multiscale(
                    template,
                    reprojected_path,
                    expected_col,
                    expected_row,
                    search_radius=300
                )
            else:
                match = find_patch_in_ortho(
                    template,
                    reprojected_path,
                    expected_col,
                    expected_row,
                    search_radius=300
                )
            
            # Validate match quality
            if match and match[2] < 0.3:  # Confidence threshold
                match = None
            
            if match:
                match_col, match_row, confidence = match
                matches_found += 1
                
                if gcp_id not in matching_results[ortho_name]:
                    matching_results[ortho_name][gcp_id] = {}
                
                matching_results[ortho_name][gcp_id][patch_size] = {
                    'expected_col': expected_col,
                    'expected_row': expected_row,
                    'matched_col': match_col,
                    'matched_row': match_row,
                    'offset_col': match_col - expected_col,
                    'offset_row': match_row - expected_row,
                    'confidence': confidence
                }
        
        print(f"  Patch size {patch_size}x{patch_size}: {matches_found}/{len(gcp_pixel_coords)} matches")
        
        if matches_found > best_matches:
            best_matches = matches_found
            best_patch_size = patch_size
    
    print(f"\n  ✓ Best patch size: {best_patch_size}x{best_patch_size} ({best_matches} matches)")

    print(f"\n  ✓ Best patch size: {best_patch_size}x{best_patch_size} ({best_matches} matches)")

    # Create subdirectory for this ortho's matching patches
    ortho_patches_dir = matching_patches_dir / ortho_name
    ortho_patches_dir.mkdir(exist_ok=True)

    # Save matching patches for visual verification
    print(f"  Saving matching patches to {ortho_patches_dir}...")
    for gcp_id, match_data in matching_results[ortho_name].items():
        if best_patch_size in match_data:
            match = match_data[best_patch_size]
            
            # Extract patch from ortho at matched location
            matched_col = match['matched_col']
            matched_row = match['matched_row']
            
            # Extract patch (same size as template)
            patch = extract_patch(
                reprojected_path,
                matched_col,
                matched_row,
                best_patch_size
            )
            
            if patch is not None:
                # Normalize patch for saving
                if patch.dtype != np.uint8:
                    patch_min = patch.min()
                    patch_max = patch.max()
                    if patch_max > patch_min:
                        patch = ((patch - patch_min) / (patch_max - patch_min) * 255).astype(np.uint8)
                    else:
                        patch = np.zeros_like(patch, dtype=np.uint8)
                
                # Save matching patch
                match_patch_path = ortho_patches_dir / f"{gcp_id}_{best_patch_size}x{best_patch_size}_matched.png"
                plt.imsave(match_patch_path, patch)
                
                # Also create visualization with GCP location marked
                vis_patch_path = ortho_patches_dir / f"{gcp_id}_{best_patch_size}x{best_patch_size}_matched_vis.png"
                create_gcp_patch_visualization(patch, best_patch_size, vis_patch_path)
    
    print(f"  ✓ Saved {len([g for g in matching_results[ortho_name].keys() if best_patch_size in matching_results[ortho_name][g]])} matching patches")

print(f"\n✓ Patch matching complete!")

# Create comprehensive visualization
print(f"\nCreating visualization of GCP matches...")

def create_gcp_matching_visualization(
    basemap_path: Path,
    ortho_paths: Dict[str, Path],
    gcp_pixel_coords: Dict,
    matching_results: Dict,
    output_path: Path,
    max_dimension: int = 4000
):
    """
    Create visualization showing basemap with GCPs and orthos with matched patches.
    """
    # Load basemap
    with rasterio.open(basemap_path) as src:
        basemap_data = src.read()
        basemap_transform = src.transform
        
        # Convert to (H, W, C)
        if len(basemap_data.shape) == 3:
            basemap_img = np.transpose(basemap_data, (1, 2, 0))
            if basemap_img.shape[2] == 1:
                basemap_img = np.stack([basemap_img[:, :, 0]] * 3, axis=-1)
            elif basemap_img.shape[2] == 4:
                basemap_img = basemap_img[:, :, :3]  # Take RGB
        else:
            basemap_img = np.stack([basemap_data] * 3, axis=-1)
        
        # Normalize to uint8
        if basemap_img.dtype != np.uint8:
            basemap_min = basemap_img.min()
            basemap_max = basemap_img.max()
            if basemap_max > basemap_min:
                basemap_img = ((basemap_img - basemap_min) / (basemap_max - basemap_min) * 255).astype(np.uint8)
            else:
                basemap_img = np.zeros_like(basemap_img, dtype=np.uint8)
    
    # Downsample if too large
    h, w = basemap_img.shape[:2]
    if max(h, w) > max_dimension:
        scale = max_dimension / max(h, w)
        new_h, new_w = int(h * scale), int(w * scale)
        basemap_img = cv2.resize(basemap_img, (new_w, new_h), interpolation=cv2.INTER_AREA)
        scale_factor = scale
    else:
        scale_factor = 1.0
    
    # Load orthos and create panels
    num_orthos = len(ortho_paths)
    fig, axes = plt.subplots(1, num_orthos + 1, figsize=(8 * (num_orthos + 1), 8))
    
    # Basemap panel (left)
    ax = axes[0]
    basemap_display = basemap_img.copy()
    
    # Draw GCP positions on basemap
    for gcp_id, coords in gcp_pixel_coords.items():
        # Scale coordinates
        col = int(coords['pixel_col'] * scale_factor)
        row = int(coords['pixel_row'] * scale_factor)
        
        if 0 <= row < basemap_display.shape[0] and 0 <= col < basemap_display.shape[1]:
            # Draw red circle
            cv2.circle(basemap_display, (col, row), 10, (255, 0, 0), 3)
    
    ax.imshow(basemap_display)
    ax.set_title('Basemap with GCP Locations', fontsize=14, fontweight='bold')
    
    # Add GCP labels
    for gcp_id, coords in gcp_pixel_coords.items():
        col = int(coords['pixel_col'] * scale_factor)
        row = int(coords['pixel_row'] * scale_factor)
        if 0 <= row < basemap_display.shape[0] and 0 <= col < basemap_display.shape[1]:
            ax.text(col, row - 15, gcp_id, color='red', fontsize=8, fontweight='bold',
                   ha='center', va='bottom')
    
    ax.axis('off')
    
    # Ortho panels (right)
    for ortho_idx, (ortho_name, ortho_path) in enumerate(ortho_paths.items(), 1):
        ax = axes[ortho_idx]
        
        # Load ortho
        with rasterio.open(ortho_path) as src:
            ortho_data = src.read()
            
            # Convert to (H, W, C)
            if len(ortho_data.shape) == 3:
                ortho_img = np.transpose(ortho_data, (1, 2, 0))
                if ortho_img.shape[2] == 1:
                    ortho_img = np.stack([ortho_img[:, :, 0]] * 3, axis=-1)
                elif ortho_img.shape[2] == 4:
                    ortho_img = ortho_img[:, :, :3]
            else:
                ortho_img = np.stack([ortho_data] * 3, axis=-1)
            
            # Normalize
            if ortho_img.dtype != np.uint8:
                ortho_min = ortho_img.min()
                ortho_max = ortho_img.max()
                if ortho_max > ortho_min:
                    ortho_img = ((ortho_img - ortho_min) / (ortho_max - ortho_min) * 255).astype(np.uint8)
                else:
                    ortho_img = np.zeros_like(ortho_img, dtype=np.uint8)
        
        # Downsample if too large
        h, w = ortho_img.shape[:2]
        if max(h, w) > max_dimension:
            scale = max_dimension / max(h, w)
            new_h, new_w = int(h * scale), int(w * scale)
            ortho_img = cv2.resize(ortho_img, (new_w, new_h), interpolation=cv2.INTER_AREA)
            ortho_scale = scale
        else:
            ortho_scale = 1.0
        
        ortho_display = ortho_img.copy()
        
        # Draw matched patch centers
        if ortho_name in matching_results:
            for gcp_id, match_data in matching_results[ortho_name].items():
                # Get best patch size match
                best_patch_size = max(match_data.keys()) if match_data else None
                if best_patch_size:
                    match = match_data[best_patch_size]
                    matched_col = int(match['matched_col'] * ortho_scale)
                    matched_row = int(match['matched_row'] * ortho_scale)
                    
                    if 0 <= matched_row < ortho_display.shape[0] and 0 <= matched_col < ortho_display.shape[1]:
                        # Draw yellow circle
                        cv2.circle(ortho_display, (matched_col, matched_row), 10, (255, 255, 0), 3)
        
        ax.imshow(ortho_display)
        ax.set_title(f'{ortho_name.replace("_", " ").title()} with Matched Patches', fontsize=14, fontweight='bold')
        
        # Add labels
        if ortho_name in matching_results:
            for gcp_id, match_data in matching_results[ortho_name].items():
                best_patch_size = max(match_data.keys()) if match_data else None
                if best_patch_size:
                    match = match_data[best_patch_size]
                    matched_col = int(match['matched_col'] * ortho_scale)
                    matched_row = int(match['matched_row'] * ortho_scale)
                    if 0 <= matched_row < ortho_display.shape[0] and 0 <= matched_col < ortho_display.shape[1]:
                        ax.text(matched_col, matched_row - 15, gcp_id, color='yellow', fontsize=8, fontweight='bold',
                               ha='center', va='bottom')
        
        ax.axis('off')
    
    plt.tight_layout()
    plt.savefig(output_path, dpi=300, bbox_inches='tight', format='PNG')
    plt.close()
    
    print(f"✓ Visualization saved: {output_path}")

# Create visualization for each ortho
for ortho_name in reprojected_paths.keys():
    if ortho_name not in matching_results:
        continue
    
    vis_path = matches_dir / f"gcp_matching_visualization_{ortho_name}.png"

    # Check if visualization already exists
    if vis_path.exists():
        print(f"  ✓ Visualization already exists: {vis_path}")
        print(f"  Skipping visualization creation...")
        continue

    
    create_gcp_matching_visualization(
        basemap_path,
        {ortho_name: reprojected_paths[ortho_name]},
        gcp_pixel_coords,
        matching_results,
        vis_path,
        max_dimension=4000
    )


  Patch size 49x49: 21/23 matches
  Patch size 59x59: 21/23 matches
  Patch size 79x79: 19/23 matches
  Patch size 99x99: 18/23 matches
  Patch size 119x119: 18/23 matches

  ✓ Best patch size: 49x49 (21 matches)

  ✓ Best patch size: 49x49 (21 matches)
  Saving matching patches to outputs/gcp_matching/matching_patches/no_gcps...
  ✓ Saved 21 matching patches
  Patch size 49x49: 21/23 matches
  Patch size 59x59: 20/23 matches
  Patch size 79x79: 19/23 matches
  Patch size 99x99: 18/23 matches
  Patch size 119x119: 17/23 matches

  ✓ Best patch size: 49x49 (21 matches)

  ✓ Best patch size: 49x49 (21 matches)
  Saving matching patches to outputs/gcp_matching/matching_patches/with_gcps...
  ✓ Saved 21 matching patches

✓ Patch matching complete!

Creating visualization of GCP matches...
  ✓ Visualization already exists: outputs/gcp_matching/matches/gcp_matching_visualization_no_gcps.png
  Skipping visualization creation...
  ✓ Visualization already exists: outputs/gcp_matching/matches/gc

## Step 7: Compute 2D Shift or Affine Transformation

In [9]:
import numpy as np
from typing import Dict, List, Optional, Tuple
import cv2

def remove_outliers_ransac(src_points: np.ndarray, dst_points: np.ndarray, threshold: float = 50.0, min_samples: int = 3) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Remove outliers using RANSAC with proper model fitting.
    
    Returns:
        (inlier_src, inlier_dst, inlier_mask)
    """
    
    if len(src_points) < min_samples:
        mask = np.ones(len(src_points), dtype=bool)
        return src_points, dst_points, mask
    
    # Convert to numpy arrays if needed
    src_points = np.array(src_points, dtype=np.float32)
    dst_points = np.array(dst_points, dtype=np.float32)
    
    # Use RANSAC for X and Y separately, then combine
    # For 2D shift, we fit a simple translation model
    # Compute median shift as initial estimate
    shifts = dst_points - src_points
    median_shift = np.median(shifts, axis=0)
    
    # Compute distances from median shift
    expected_dst = src_points + median_shift
    distances = np.sqrt(np.sum((dst_points - expected_dst)**2, axis=1))
    
    # Use IQR method for outlier detection
    q1 = np.percentile(distances, 25)
    q3 = np.percentile(distances, 75)
    iqr = q3 - q1
    outlier_threshold = q3 + 2.5 * iqr  # More aggressive (was 1.5)
    
    # Also use absolute threshold (in pixels)
    absolute_threshold = max(threshold, 100.0)  # At least 100 pixels
    
    # Mark outliers
    inlier_mask = (distances <= outlier_threshold) & (distances <= absolute_threshold)
    
    # Ensure we have at least min_samples inliers
    if np.sum(inlier_mask) < min_samples:
        # Keep the min_samples points closest to the median
        sorted_indices = np.argsort(distances)
        inlier_mask = np.zeros(len(src_points), dtype=bool)
        inlier_mask[sorted_indices[:min_samples]] = True
    
    # Ensure inlier_mask is a proper boolean array
    inlier_mask = np.asarray(inlier_mask, dtype=bool)
    
    # Return filtered points
    return src_points[inlier_mask], dst_points[inlier_mask], inlier_mask

def compute_transformation(matches: Dict, transformation_type: str = 'shift', match_distances: Optional[List[float]] = None) -> Dict:
    """
    Compute transformation from GCP matches.
    
    Args:
        matches: Dictionary with GCP matches
        transformation_type: 'shift', 'affine', 'homography', or 'deformable'
        match_distances: Optional list of match distances for RANSAC weighting
    
    Returns:
        Dictionary with transformation parameters
    """
    # Collect source and destination points
    src_points = []
    dst_points = []
    
    for gcp_id, match_data in matches.items():
        # Use the best patch size match
        best_patch_size = max(match_data.keys())
        match = match_data[best_patch_size]
        
        src_points.append([match['expected_col'], match['expected_row']])
        dst_points.append([match['matched_col'], match['matched_row']])
    
    src_points = np.array(src_points, dtype=np.float32)
    dst_points = np.array(dst_points, dtype=np.float32)

    # Remove outliers using RANSAC with distance weighting
    src_points, dst_points, inlier_mask = remove_outliers_ransac(src_points, dst_points, threshold=100.0, min_samples=3)
    
    if len(src_points) < 3:
        return {'type': 'insufficient_points', 'error': 'Need at least 3 matches after outlier removal'}
    
    # Compute transformation based on type
    if transformation_type == 'shift':
        # Compute 2D shift (mean offset)
        offsets = dst_points - src_points
        shift_x = float(np.mean(offsets[:, 0]))
        shift_y = float(np.mean(offsets[:, 1]))
        
        # Compute RMSE
        errors = offsets - np.array([shift_x, shift_y])
        rmse = float(np.sqrt(np.mean(np.sum(errors**2, axis=1))))
        
        return {
            'type': 'shift',
            'shift_x': shift_x,
            'shift_y': shift_y,
            'rmse': rmse,
            'num_points': len(src_points)
        }
    
    elif transformation_type == 'affine':
        # Compute affine transformation using least squares (all points)
        if len(src_points) < 3:
            return {'type': 'insufficient_points', 'error': 'Need at least 3 points for affine'}
        
        # Build system: A * params = b
        A = np.zeros((2 * len(src_points), 6))
        b = np.zeros(2 * len(src_points))
        
        for k in range(len(src_points)):
            x, y = src_points[k]
            xp, yp = dst_points[k]
            A[2*k, :] = [x, y, 1, 0, 0, 0]
            b[2*k] = xp
            A[2*k+1, :] = [0, 0, 0, x, y, 1]
            b[2*k+1] = yp
        
        # Solve using least squares
        params, residuals, rank, s = np.linalg.lstsq(A, b, rcond=None)
        transform_matrix = params.reshape(2, 3)
        
        # Apply to all points to compute error
        ones = np.ones((len(src_points), 1))
        src_homogeneous = np.hstack([src_points, ones])
        transformed = (transform_matrix @ src_homogeneous.T).T
        
        errors = dst_points - transformed
        rmse = float(np.sqrt(np.mean(np.sum(errors**2, axis=1))))
        
        return {
            'type': 'affine',
            'matrix': transform_matrix.tolist(),
            'rmse': rmse,
            'num_points': len(src_points)
        }
    
    elif transformation_type == 'homography':
        # Compute homography transformation (8 parameters, requires at least 4 points)
        if len(src_points) < 4:
            return {'type': 'insufficient_points', 'error': 'Need at least 4 points for homography'}
        
        # Use cv2.findHomography with RANSAC
        try:
            homography_matrix, inlier_mask = cv2.findHomography(
                src_points.reshape(-1, 1, 2),
                dst_points.reshape(-1, 1, 2),
                method=cv2.RANSAC,
                ransacReprojThreshold=5.0,
                maxIters=2000,
                confidence=0.99
            )
            
            if homography_matrix is not None:
                inlier_src = src_points[inlier_mask.ravel() == 1]
                
                # Apply to all points to compute error
                ones = np.ones((len(src_points), 1))
                src_homogeneous = np.hstack([src_points, ones])
                transformed = (homography_matrix @ src_homogeneous.T).T
                transformed = transformed[:, :2] / transformed[:, 2:3]
                
                errors = dst_points - transformed
                rmse = float(np.sqrt(np.mean(np.sum(errors**2, axis=1))))
                
                return {
                    'type': 'homography',
                    'matrix': homography_matrix.tolist(),
                    'rmse': rmse,
                    'num_points': len(inlier_src),
                    'num_inliers': int(np.sum(inlier_mask))
                }
            else:
                return {'type': 'homography_failed', 'error': 'Homography computation failed'}
        except Exception as e:
            return {'type': 'homography_error', 'error': str(e)}
    
    elif transformation_type == 'deformable':
        # Compute deformable (curvilinear) transformation using thin-plate spline
        if len(src_points) < 3:
            return {'type': 'insufficient_points', 'error': 'Need at least 3 points for deformable transformation'}
        
        try:
            from scipy.interpolate import RBFInterpolator
            
            # Use RANSAC to select inliers based on distance
            if match_distances is not None and len(match_distances) == len(src_points):
                # Weight by inverse distance (lower distance = higher weight)
                weights = 1.0 / (np.array(match_distances) + 1e-6)
                weights = weights / weights.sum()
                # Select top 80% of points by weight (inliers)
                sorted_indices = np.argsort(weights)[::-1]
                num_inliers = max(3, int(0.8 * len(src_points)))
                inlier_indices = sorted_indices[:num_inliers]
                inlier_src = src_points[inlier_indices]
                inlier_dst = dst_points[inlier_indices]
            else:
                inlier_src = src_points
                inlier_dst = dst_points
            
            # Fit RBF interpolator (thin-plate spline)
            rbf = RBFInterpolator(inlier_src, inlier_dst, kernel='thin_plate_spline', smoothing=0.0)
            
            # Evaluate on all points to compute error
            transformed = rbf(src_points)
            errors = dst_points - transformed
            rmse = float(np.sqrt(np.mean(np.sum(errors**2, axis=1))))
            
            return {
                'type': 'deformable',
                'rmse': rmse,
                'num_points': len(inlier_src),
                'inlier_indices': inlier_indices.tolist() if 'inlier_indices' in locals() else None,
                'src_points': inlier_src.tolist() if 'inlier_src' in locals() else src_points.tolist(),
                'dst_points': inlier_dst.tolist() if 'inlier_dst' in locals() else dst_points.tolist()
            }
        except ImportError:
            return {'type': 'deformable_error', 'error': 'scipy.interpolate.RBFInterpolator not available'}
        except Exception as e:
            return {'type': 'deformable_error', 'error': str(e)}
    
    else:
        return {'type': 'unknown', 'error': f'Unknown transformation type: {transformation_type}'}

# Compute transformations for each ortho
print("=" * 60)
print("Step 7: Compute Transformations")
print("=" * 60)

# Check if matching_results is defined, load from file if not
try:
    _ = matching_results
    print("✓ matching_results found in memory")
except NameError:
    print("matching_results not in memory, attempting to load from file...")
    from pathlib import Path
    import json
    try:
        try:
            _ = output_dir
        except NameError:
            output_dir = Path("outputs")
        try:
            _ = gcp_matching_dir
        except NameError:
            gcp_matching_dir = output_dir / "gcp_matching"
        matches_dir = gcp_matching_dir / "matches"
        
        matching_results_file = matches_dir / "matching_results.json"
        if matching_results_file.exists():
            with open(matching_results_file, 'r') as f:
                matching_results = json.load(f)
            print(f"✓ Loaded matching_results from {matching_results_file}")
        else:
            raise FileNotFoundError(f"matching_results.json not found at {matching_results_file}")
    except Exception as e:
        print(f"❌ Could not load matching_results: {e}")
        print("Please run Step 6 (Patch Matching) first.")
        raise

# Compute transformations for each ortho
transformations = {}

for ortho_name in matching_results.keys():
    print(f"\nProcessing {ortho_name}...")
    
    # Get matches for this ortho
    matches = matching_results[ortho_name]
    
    if len(matches) < 3:
        print(f"  ⚠️  Insufficient matches ({len(matches)}) for {ortho_name}")
        transformations[ortho_name] = {'error': f'Insufficient matches: {len(matches)}'}
        continue
    
    # Extract match distances for RANSAC weighting
    match_distances = []
    for gcp_id, match_data in matches.items():
        best_patch_size = max(match_data.keys())
        match = match_data[best_patch_size]
        if 'distance_m' in match:
            match_distances.append(match['distance_m'])
    
    if len(match_distances) != len(matches):
        match_distances = None
    
    # Compute all transformation types
    transformation_results = {}
    
    for trans_type in ['shift', 'affine', 'homography', 'deformable']:
        try:
            result = compute_transformation(matches, trans_type, match_distances)
            if 'error' not in result:
                transformation_results[trans_type] = result
                print(f"  ✓ {trans_type}: RMSE = {result.get('rmse', 'N/A'):.2f} pixels")
            else:
                print(f"  ⚠️  {trans_type}: {result.get('error', 'Unknown error')}")
        except Exception as e:
            print(f"  ❌ {trans_type} failed: {e}")
    
    # Select top two transformations by RMSE
    if len(transformation_results) == 0:
        print(f"  ❌ No valid transformations for {ortho_name}")
        transformations[ortho_name] = {'error': 'No valid transformations'}
        continue
    
    # Sort by RMSE (lower is better)
    sorted_transforms = sorted(
        transformation_results.items(),
        key=lambda x: x[1].get('rmse', float('inf'))
    )
    
    # Primary (best) transformation
    primary_type, primary_trans = sorted_transforms[0]
    print(f"\n  Primary: {primary_type} (RMSE: {primary_trans.get('rmse', 'N/A'):.2f} pixels)")
    
    # Secondary (second best) transformation if available
    secondary_trans = None
    if len(sorted_transforms) > 1:
        secondary_type, secondary_trans = sorted_transforms[1]
        print(f"  Secondary: {secondary_type} (RMSE: {secondary_trans.get('rmse', 'N/A'):.2f} pixels)")
    
    # Store transformations
    transformations[ortho_name] = {
        'primary': primary_trans,
        'secondary': secondary_trans
    }

# Save transformations to file
try:
    _ = matches_dir
except NameError:
    from pathlib import Path
    try:
        _ = output_dir
    except NameError:
        output_dir = Path("outputs")
    try:
        _ = gcp_matching_dir
    except NameError:
        gcp_matching_dir = output_dir / "gcp_matching"
    matches_dir = gcp_matching_dir / "matches"
    matches_dir.mkdir(parents=True, exist_ok=True)

transformations_file = matches_dir / "transformations.json"

# Convert numpy types to native Python types for JSON serialization
def convert_to_native_types(obj):
    """Recursively convert numpy types to native Python types."""
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {key: convert_to_native_types(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_native_types(item) for item in obj]
    return obj

transformations_serializable = convert_to_native_types(transformations)

with open(transformations_file, 'w') as f:
    json.dump(transformations_serializable, f, indent=2)

print(f"\n✓ Transformations saved to {transformations_file}")
print(f"  Total orthos processed: {len(transformations)}")


Step 7: Compute Transformations
✓ matching_results found in memory

Processing no_gcps...
  ✓ shift: RMSE = 55.19 pixels
  ✓ affine: RMSE = 43.13 pixels
  ✓ homography: RMSE = 43.30 pixels
  ✓ deformable: RMSE = 0.00 pixels

  Primary: deformable (RMSE: 0.00 pixels)
  Secondary: affine (RMSE: 43.13 pixels)

Processing with_gcps...
  ✓ shift: RMSE = 46.51 pixels
  ✓ affine: RMSE = 19.70 pixels
  ✓ homography: RMSE = 17.30 pixels
  ✓ deformable: RMSE = 0.00 pixels

  Primary: deformable (RMSE: 0.00 pixels)
  Secondary: homography (RMSE: 17.30 pixels)

✓ Transformations saved to outputs/gcp_matching/matches/transformations.json
  Total orthos processed: 2


## Step 8: Apply Transformation and Register Orthomosaics

In [10]:
# Apply transformation to orthomosaic
def apply_transformation(
    ortho_path: Path,
    transformation: Dict,
    output_path: Path,
    basemap_path: Path
) -> Path:
    """
    Apply transformation to register orthomosaic to basemap.
    """
    # Import required modules at function level
    from scipy import ndimage
    import numpy as np
    
    with rasterio.open(basemap_path) as basemap_src:
        target_width = basemap_src.width
        target_height = basemap_src.height
        target_transform = basemap_src.transform
        target_crs = basemap_src.crs
    
    with rasterio.open(ortho_path) as ortho_src:
        source_data = ortho_src.read()
        source_count = ortho_src.count
        source_height = ortho_src.height
        source_width = ortho_src.width
        
        # Tile size for processing (used by all transformation types)
        tile_size = 2048  # Process 2048x2048 pixel tiles
        
        # Initialize output array
        registered_data = np.zeros((source_count, target_height, target_width), dtype=source_data.dtype)
        
        # Apply transformation based on type
        if transformation['type'] == 'shift':
            shift_x = transformation['shift_x']
            shift_y = transformation['shift_y']
            
            print(f"  Processing {source_count} band(s) in tiles of {tile_size}x{tile_size}...")
            
            for band_idx in range(source_count):
                print(f"    Band {band_idx + 1}/{source_count}...")
                
                # Process in tiles
                for tile_y in range(0, target_height, tile_size):
                    for tile_x in range(0, target_width, tile_size):
                        tile_h = min(tile_size, target_height - tile_y)
                        tile_w = min(tile_size, target_width - tile_x)
                        
                        # Create coordinate grid for this tile
                        tgt_y_tile, tgt_x_tile = np.mgrid[
                            tile_y:tile_y+tile_h,
                            tile_x:tile_x+tile_w
                        ].astype(np.float32)
                        
                        # Apply inverse shift to get source coordinates
                        src_x_tile = tgt_x_tile - shift_x
                        src_y_tile = tgt_y_tile - shift_y
                        
                        # Clamp to source bounds
                        src_x_tile = np.clip(src_x_tile, 0, source_width - 1)
                        src_y_tile = np.clip(src_y_tile, 0, source_height - 1)
                        
                        # Sample from source
                        tile_data = ndimage.map_coordinates(
                            source_data[band_idx],
                            [src_y_tile, src_x_tile],
                            order=1,
                            mode='constant',
                            cval=0
                        )
                        
                        registered_data[band_idx, tile_y:tile_y+tile_h, tile_x:tile_x+tile_w] = tile_data
                        
                        # Progress update
                        progress = 100.0 * (tile_y * target_width + tile_x) / (target_height * target_width)
                        if int(progress) % 10 == 0 and tile_x == 0:
                            print(f"      Progress: {progress:.1f}%")
        
        elif transformation['type'] == 'affine':
            transform_matrix = np.array(transformation['matrix'], dtype=np.float32)
            
            # Extract transformation components
            matrix_2x2 = transform_matrix[:2, :2]
            offset = transform_matrix[:2, 2]
            
            print(f"  Processing {source_count} band(s) in tiles of {tile_size}x{tile_size}...")
            
            for band_idx in range(source_count):
                print(f"    Band {band_idx + 1}/{source_count}...")
                
                # Process in tiles
                for tile_y in range(0, target_height, tile_size):
                    for tile_x in range(0, target_width, tile_size):
                        tile_h = min(tile_size, target_height - tile_y)
                        tile_w = min(tile_size, target_width - tile_x)
                        
                        # Create coordinate grid for this tile
                        tgt_y_tile, tgt_x_tile = np.mgrid[
                            tile_y:tile_y+tile_h,
                            tile_x:tile_x+tile_w
                        ].astype(np.float32)
                        
                        # Apply inverse affine to get source coordinates
                        inv_matrix = np.linalg.inv(matrix_2x2)
                        coords = np.stack([tgt_x_tile.ravel() - offset[0], tgt_y_tile.ravel() - offset[1]], axis=1)
                        src_coords = (inv_matrix @ coords.T).T
                        
                        src_x_tile = src_coords[:, 0].reshape(tile_h, tile_w)
                        src_y_tile = src_coords[:, 1].reshape(tile_h, tile_w)
                        
                        # Clamp to source bounds
                        src_x_tile = np.clip(src_x_tile, 0, source_width - 1)
                        src_y_tile = np.clip(src_y_tile, 0, source_height - 1)
                        
                        # Sample from source
                        tile_data = ndimage.map_coordinates(
                            source_data[band_idx],
                            [src_y_tile, src_x_tile],
                            order=1,
                            mode='constant',
                            cval=0
                        )
                        
                        registered_data[band_idx, tile_y:tile_y+tile_h, tile_x:tile_x+tile_w] = tile_data
                        
                        # Progress update
                        progress = 100.0 * (tile_y * target_width + tile_x) / (target_height * target_width)
                        if int(progress) % 10 == 0 and tile_x == 0:
                            print(f"      Progress: {progress:.1f}%")
        
        elif transformation['type'] == 'homography':
            homography_matrix = np.array(transformation['matrix'], dtype=np.float32)
            
            print(f"  Processing {source_count} band(s) in tiles of {tile_size}x{tile_size}...")
            
            for band_idx in range(source_count):
                print(f"    Band {band_idx + 1}/{source_count}...")
                
                # Process in tiles
                for tile_y in range(0, target_height, tile_size):
                    for tile_x in range(0, target_width, tile_size):
                        tile_h = min(tile_size, target_height - tile_y)
                        tile_w = min(tile_size, target_width - tile_x)
                        
                        # Create coordinate grid for this tile
                        tgt_y_tile, tgt_x_tile = np.mgrid[
                            tile_y:tile_y+tile_h,
                            tile_x:tile_x+tile_w
                        ].astype(np.float32)
                        
                        # Apply inverse homography to get source coordinates
                        inv_homography = np.linalg.inv(homography_matrix)
                        
                        # Convert to homogeneous coordinates
                        tgt_coords_hom = np.stack([
                            tgt_x_tile.ravel(),
                            tgt_y_tile.ravel(),
                            np.ones(tgt_x_tile.size)
                        ], axis=0)
                        
                        src_coords_hom = inv_homography @ tgt_coords_hom
                        src_coords_hom = src_coords_hom / src_coords_hom[2, :]  # Normalize
                        
                        src_x_tile = src_coords_hom[0, :].reshape(tile_h, tile_w)
                        src_y_tile = src_coords_hom[1, :].reshape(tile_h, tile_w)
                        
                        # Clamp to source bounds
                        src_x_tile = np.clip(src_x_tile, 0, source_width - 1)
                        src_y_tile = np.clip(src_y_tile, 0, source_height - 1)
                        
                        # Sample from source
                        tile_data = ndimage.map_coordinates(
                            source_data[band_idx],
                            [src_y_tile, src_x_tile],
                            order=1,
                            mode='constant',
                            cval=0
                        )
                        
                        registered_data[band_idx, tile_y:tile_y+tile_h, tile_x:tile_x+tile_w] = tile_data
                        
                        # Progress update
                        progress = 100.0 * (tile_y * target_width + tile_x) / (target_height * target_width)
                        if int(progress) % 10 == 0 and tile_x == 0:
                            print(f"      Progress: {progress:.1f}%")
        
        elif transformation['type'] == 'deformable':
            from scipy.interpolate import RBFInterpolator
            
            # Recreate RBF interpolator from stored points
            src_points = np.array(transformation.get('src_points', []), dtype=np.float32)
            dst_points = np.array(transformation.get('dst_points', []), dtype=np.float32)
            
            if len(src_points) < 3:
                raise ValueError('Insufficient points for deformable transformation')
            
            # Fit RBF interpolator (reverse: target -> source)
            rbf = RBFInterpolator(dst_points, src_points, kernel='thin_plate_spline', smoothing=0.0)
            
            print(f"  Processing {source_count} band(s) in tiles of {tile_size}x{tile_size}...")
            
            for band_idx in range(source_count):
                print(f"    Band {band_idx + 1}/{source_count}...")
                
                # Process in tiles
                for tile_y in range(0, target_height, tile_size):
                    for tile_x in range(0, target_width, tile_size):
                        tile_h = min(tile_size, target_height - tile_y)
                        tile_w = min(tile_size, target_width - tile_x)
                        
                        # Create coordinate grid for this tile
                        tgt_y_tile, tgt_x_tile = np.mgrid[
                            tile_y:tile_y+tile_h,
                            tile_x:tile_x+tile_w
                        ].astype(np.float32)
                        
                        # Get target pixel coordinates
                        tgt_coords = np.stack([tgt_x_tile.ravel(), tgt_y_tile.ravel()], axis=1)
                        
                        # Apply RBF to get source coordinates
                        src_coords = rbf(tgt_coords)
                        
                        src_x_tile = src_coords[:, 0].reshape(tile_h, tile_w)
                        src_y_tile = src_coords[:, 1].reshape(tile_h, tile_w)
                        
                        # Clamp to source bounds
                        src_x_tile = np.clip(src_x_tile, 0, source_width - 1)
                        src_y_tile = np.clip(src_y_tile, 0, source_height - 1)
                        
                        # Sample from source
                        tile_data = ndimage.map_coordinates(
                            source_data[band_idx],
                            [src_y_tile, src_x_tile],
                            order=1,
                            mode='constant',
                            cval=0
                        )
                        
                        registered_data[band_idx, tile_y:tile_y+tile_h, tile_x:tile_x+tile_w] = tile_data
                        
                        # Progress update
                        progress = 100.0 * (tile_y * target_width + tile_x) / (target_height * target_width)
                        if int(progress) % 10 == 0 and tile_x == 0:
                            print(f"      Progress: {progress:.1f}%")
        
        else:
            raise ValueError(f"Unknown transformation type: {transformation.get('type', 'unknown')}")
        
        # Save registered orthomosaic
        with rasterio.open(
            output_path,
            'w',
            driver='GTiff',
            height=target_height,
            width=target_width,
            count=source_count,
            dtype=registered_data.dtype,
            crs=target_crs,
            transform=target_transform,
            compress='jpeg',
            jpeg_quality=90,
            BIGTIFF='YES',
            tiled=True,
            blockxsize=512,
            blockysize=512
        ) as dst:
            dst.write(registered_data)
        
        return output_path


In [None]:
# Load transformations from Step 7
try:
    _ = transformations
except NameError:
    # Load from file
    try:
        _ = matches_dir
    except NameError:
        from pathlib import Path
        try:
            _ = output_dir
        except NameError:
            output_dir = Path("outputs")
        try:
            _ = gcp_matching_dir
        except NameError:
            gcp_matching_dir = output_dir / "gcp_matching"
        matches_dir = gcp_matching_dir / "matches"
    
    transformations_file = matches_dir / "transformations.json"
    if transformations_file.exists():
        import json
        with open(transformations_file, 'r') as f:
            transformations = json.load(f)
        print(f"✓ Loaded transformations from {transformations_file}")
    else:
        print(f"⚠️  Transformations file not found: {transformations_file}")
        print("  Please run Step 7 first.")
        transformations = {}

# Load reprojected paths
try:
    _ = reprojected_paths
except NameError:
    # Try to load from Step 5
    try:
        _ = gcp_matching_dir
    except NameError:
        from pathlib import Path
        try:
            _ = output_dir
        except NameError:
            output_dir = Path("outputs")
        gcp_matching_dir = output_dir / "gcp_matching"
    
    reprojected_dir = gcp_matching_dir / "reprojected"
    reprojected_paths = {}
    for ortho_name in ['no_gcps', 'with_gcps']:
        reprojected_path = reprojected_dir / f"{ortho_name}_reprojected.tif"
        if reprojected_path.exists():
            reprojected_paths[ortho_name] = reprojected_path
    
    if len(reprojected_paths) == 0:
        print("⚠️  No reprojected orthos found. Please run Step 5 first.")

# Ensure registered_dir exists
try:
    _ = registered_dir
except NameError:
    try:
        _ = gcp_matching_dir
    except NameError:
        from pathlib import Path
        try:
            _ = output_dir
        except NameError:
            output_dir = Path("outputs")
        gcp_matching_dir = output_dir / "gcp_matching"
    registered_dir = gcp_matching_dir / "registered"
    registered_dir.mkdir(parents=True, exist_ok=True)

# Ensure basemap_path exists
try:
    _ = basemap_path
except NameError:
    from pathlib import Path
    data_dir = Path("/Users/mauriciohessflores/Documents/Code/Data/New Westminster Oct _25")
    basemap_path = data_dir / "Michael_RTK_orthos" / "TestsiteNewWest_Spexigeo_RTK.tiff"

# Apply transformations to register orthomosaics
print(f"\n{'='*60}")
print(f"Step 8: Apply Transformation and Register Orthomosaics")
print(f"{'='*60}")

registered_paths = {}

for ortho_name in transformations.keys():
    if ortho_name not in reprojected_paths:
        print(f"\n⚠️  Reprojected ortho not found for {ortho_name}")
        continue
    
    reprojected_path = reprojected_paths[ortho_name]
    
    # Get transformations
    ortho_transforms = transformations[ortho_name]
    
    if 'error' in ortho_transforms:
        print(f"\n⚠️  Skipping {ortho_name}: {ortho_transforms['error']}")
        continue
    
    primary_trans = ortho_transforms.get('primary')
    secondary_trans = ortho_transforms.get('secondary')
    
    if not primary_trans:
        print(f"\n⚠️  No primary transformation for {ortho_name}")
        continue
    
    print(f"\nRegistering {ortho_name}...")
    
    # Apply primary transformation
    registered_path_primary = registered_dir / f"{ortho_name}_primary_registered.tif"
    registered_path_primary = registered_dir / f"{ortho_name}_primary_registered.tif"
    
    if not registered_path_primary.exists():
        print(f"  Applying primary transformation ({primary_trans.get('type', 'unknown')})...")
        registered_path_primary = apply_transformation(
            reprojected_path,
            primary_trans,
            registered_path_primary,
            basemap_path
        )
        print(f"  ✓ Saved primary: {registered_path_primary}")
    else:
        print(f"  ✓ Primary already exists: {registered_path_primary}")
    
    registered_paths[f"{ortho_name}_primary"] = registered_path_primary
    
    # Apply secondary transformation if available
    if secondary_trans:
        registered_path_secondary = registered_dir / f"{ortho_name}_secondary_registered.tif"
        
        if not registered_path_secondary.exists():
            print(f"  Applying secondary transformation ({secondary_trans.get('type', 'unknown')})...")
            registered_path_secondary = apply_transformation(
                reprojected_path,
                secondary_trans,
                registered_path_secondary,
                basemap_path
            )
            print(f"  ✓ Saved secondary: {registered_path_secondary}")
        else:
            print(f"  ✓ Secondary already exists: {registered_path_secondary}")
        
        registered_paths[f"{ortho_name}_secondary"] = registered_path_secondary
    
    # Also save the primary as the main registered version
    registered_path_main = registered_dir / f"{ortho_name}_registered.tif"
    if not registered_path_main.exists():
        import shutil
        shutil.copy(registered_path_primary, registered_path_main)
    registered_paths[ortho_name] = registered_path_main

print(f"\n✓ Registration complete!")
print(f"  Registered {len(registered_paths)} orthomosaics")



Step 8: Apply Transformation and Register Orthomosaics

Registering no_gcps...
  ✓ Primary already exists: outputs/gcp_matching/registered/no_gcps_primary_registered.tif
  Applying secondary transformation (affine)...
  Processing 4 band(s) in tiles of 2048x2048...
    Band 1/4...
      Progress: 0.0%


## Step 9: Evaluate Accuracy Improvement

In [1]:
# Check for required variables and set defaults if needed
try:
    _ = output_dir
except NameError:
    from pathlib import Path
    output_dir = Path("outputs")
    print(f"output_dir not defined, using default: {output_dir}")

try:
    _ = gcp_matching_dir
except NameError:
    gcp_matching_dir = output_dir / "gcp_matching"
    print(f"gcp_matching_dir not defined, using default: {gcp_matching_dir}")

try:
    _ = matches_dir
except NameError:
    matches_dir = gcp_matching_dir / "matches"
    matches_dir.mkdir(parents=True, exist_ok=True)
    print(f"matches_dir not defined, using default: {matches_dir}")

try:
    _ = registered_dir
except NameError:
    registered_dir = gcp_matching_dir / "registered"
    registered_dir.mkdir(parents=True, exist_ok=True)
    print(f"registered_dir not defined, using default: {registered_dir}")

try:
    _ = basemap_path
except NameError:
    from pathlib import Path
    data_dir = Path("/Users/mauriciohessflores/Documents/Code/Data/New Westminster Oct _25")
    basemap_path = data_dir / "Michael_RTK_orthos" / "TestsiteNewWest_Spexigeo_RTK.tiff"
    print(f"basemap_path not defined, using default: {basemap_path}")

try:
    _ = gcps
except NameError:
    print("⚠️  gcps not defined. Please run Step 2 first.")
    gcps = []

# Compare registered orthos to basemap

# Import required modules
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import rasterio
import numpy as np

def evaluate_accuracy(ortho_path: Path, basemap_path: Path, gcps: List[Dict]) -> Dict:
    """
    Evaluate accuracy by comparing pixel values at GCP locations.
    """
    with rasterio.open(basemap_path) as basemap_src:
        basemap_data = basemap_src.read()
    
    with rasterio.open(ortho_path) as ortho_src:
        ortho_data = ortho_src.read()
    
    errors = []
    
    for gcp in gcps:
        pixel_coords = gcp_to_pixel_coords_from_utm(gcp['x_utm'], gcp['y_utm'], basemap_path)
        if not pixel_coords:
            continue
        
        col, row = pixel_coords
        
        if 0 <= row < basemap_data.shape[1] and 0 <= col < basemap_data.shape[2]:
            basemap_pixel = basemap_data[:, row, col]
            
            if 0 <= row < ortho_data.shape[1] and 0 <= col < ortho_data.shape[2]:
                ortho_pixel = ortho_data[:, row, col]
                
                # Compute error (Euclidean distance in pixel space)
                error = np.sqrt(np.sum((basemap_pixel.astype(float) - ortho_pixel.astype(float))**2))
                errors.append(error)
    
    if errors:
        return {
            'mean_error': float(np.mean(errors)),
            'rmse': float(np.sqrt(np.mean(np.array(errors)**2))),
            'max_error': float(np.max(errors)),
            'min_error': float(np.min(errors)),
            'num_points': len(errors)
        }
    else:
        return {
            'mean_error': 0.0,
            'rmse': 0.0,
            'max_error': 0.0,
            'min_error': 0.0,
            'num_points': 0
        }


# Evaluate accuracy for each registered ortho
print(f"\n{'='*60}")
print(f"Evaluating Accuracy Improvement")
print(f"{'='*60}")

# Check if registered_paths is defined
try:
    _ = registered_paths
except NameError:
    print("⚠️  registered_paths not defined. Please run Step 8 first.")
    registered_paths = {}

    # Try to load from transformations file or reconstruct from registered directory
    try:
        # Check if transformations file exists (from Step 7)
        transformations_file = matches_dir / "transformations.json"
        if transformations_file.exists():
            with open(transformations_file, 'r') as f:
                transformations = json.load(f)
            
            # Reconstruct registered_paths from transformations
            registered_paths = {}
            for ortho_name in transformations.keys():
                registered_path = registered_dir / f"{ortho_name}_registered.tif"
                if registered_path.exists():
                    registered_paths[ortho_name] = registered_path
            
            if len(registered_paths) > 0:
                print(f"✓ Reconstructed registered_paths from {transformations_file}")
                print(f"  Found {len(registered_paths)} registered orthos")
        else:
            # Try to find registered files directly
            if registered_dir.exists():
                # Try to find registered files directly
                # Look for all registered file patterns
                registered_files = list(registered_dir.glob("*_registered.tif"))
        
        if registered_files:
            registered_paths = {}
            for reg_file in registered_files:
                # Extract ortho name from filename
                # Patterns: no_gcps_primary_registered.tif, no_gcps_secondary_registered.tif, no_gcps_registered.tif
                stem = reg_file.stem  # e.g., 'no_gcps_primary_registered'
                
                # Remove '_registered' suffix
                if stem.endswith('_registered'):
                    ortho_name = stem[:-11]  # Remove '_registered'
                else:
                    ortho_name = stem
                
                # Store with the full name (including primary/secondary suffix if present)
                registered_paths[ortho_name] = reg_file
                
                # Also add main version if this is primary/secondary
                if '_primary' in ortho_name:
                    main_name = ortho_name.replace('_primary', '')
                    if main_name not in registered_paths:
                        registered_paths[main_name] = reg_file
                elif '_secondary' in ortho_name:
                    main_name = ortho_name.replace('_secondary', '')
                    if main_name not in registered_paths:
                        registered_paths[main_name] = reg_file
            
            if len(registered_paths) > 0:
                print(f"✓ Found {len(registered_paths)} registered orthos in {registered_dir}")
                for name, path in registered_paths.items():
                    print(f"  - {name}: {path.name}")
    except Exception as e:
        print(f"⚠️  Could not reconstruct registered_paths: {e}")

if len(registered_paths) == 0:
    print("⚠️  No registered orthos found. Please run Step 8 first.")
else:
    for ortho_name in registered_paths.keys():
        print(f"\nEvaluating {ortho_name}...")
        
        registered_path = registered_paths[ortho_name]
        
        if not registered_path.exists():
            print(f"  ⚠️  Registered ortho not found: {registered_path}")
            continue
        
        # Evaluate accuracy
        try:
            accuracy_metrics = evaluate_accuracy(
                registered_path,
                basemap_path,
                gcps
            )
            
            print(f"\n  Accuracy Metrics:")
            mean_err = accuracy_metrics.get('mean_error', 'N/A')
            if isinstance(mean_err, (int, float)):
                print(f"    Mean Error: {mean_err:.3f} m")
            else:
                print(f"    Mean Error: {mean_err}")
            
            rmse = accuracy_metrics.get('rmse', 'N/A')
            if isinstance(rmse, (int, float)):
                print(f"    RMSE: {rmse:.3f} m")
            else:
                print(f"    RMSE: {rmse}")
            
            max_err = accuracy_metrics.get('max_error', 'N/A')
            if isinstance(max_err, (int, float)):
                print(f"    Max Error: {max_err:.3f} m")
            else:
                print(f"    Max Error: {max_err}")
            
            print(f"    Points Evaluated: {accuracy_metrics.get('num_points', 0)}")

        except Exception as e:
            print(f"  ⚠️  Error evaluating {ortho_name}: {e}")
# Generate LaTeX report
latex_report_path = output_dir / "gcp_matching" / "accuracy_report.tex"
latex_report_path.parent.mkdir(parents=True, exist_ok=True)

latex_content = []
latex_content.append("\\documentclass[11pt]{article}")
latex_content.append("\\usepackage[utf8]{inputenc}")
latex_content.append("\\usepackage{graphicx}")
latex_content.append("\\usepackage{geometry}")
latex_content.append("\\geometry{a4paper, margin=1in}")
latex_content.append("\\usepackage{booktabs}")
latex_content.append("\\usepackage{float}")
latex_content.append("\\usepackage{caption}")
latex_content.append("\\begin{document}")
latex_content.append("\\title{GCP Matching Accuracy Evaluation Report}")
latex_content.append("\\author{Automated Analysis}")
latex_content.append("\\date{\\today}")
latex_content.append("\\maketitle")
latex_content.append("\\section{Executive Summary}")
latex_content.append("This report presents the accuracy evaluation of registered orthomosaics against the ground control basemap.")
latex_content.append("\\section{Accuracy Metrics}")
latex_content.append("\\begin{table}[H]")
latex_content.append("\\centering")
latex_content.append("\\begin{tabular}{lcccc}")
latex_content.append("\\toprule")
latex_content.append("Orthomosaic & Mean Error (m) & RMSE (m) & Max Error (m) & Points \\\\")
latex_content.append("\\midrule")

# Collect all accuracy metrics
all_accuracy_metrics = {}

for ortho_name in registered_paths.keys():
    registered_path = registered_paths[ortho_name]
    if not registered_path.exists():
        continue
    
    try:
        accuracy_metrics = evaluate_accuracy(registered_path, basemap_path, gcps)
        all_accuracy_metrics[ortho_name] = accuracy_metrics
        
        # Add to LaTeX table
        mean_err = accuracy_metrics.get('mean_error', 0.0)
        rmse = accuracy_metrics.get('rmse', 0.0)
        max_err = accuracy_metrics.get('max_error', 0.0)
        num_pts = accuracy_metrics.get('num_points', 0)
        
        mean_str = f"{mean_err:.3f}" if isinstance(mean_err, (int, float)) else "N/A"
        rmse_str = f"{rmse:.3f}" if isinstance(rmse, (int, float)) else "N/A"
        max_str = f"{max_err:.3f}" if isinstance(max_err, (int, float)) else "N/A"
        
        ortho_display = ortho_name.replace('_', ' ').title()
        latex_content.append(f"{ortho_display} & {mean_str} & {rmse_str} & {max_str} & {num_pts} \\\\")
    except Exception as e:
        print(f"  ⚠️  Error evaluating {ortho_name}: {e}")
        continue

latex_content.append("\\bottomrule")
latex_content.append("\\end{tabular}")
latex_content.append("\\caption{Accuracy metrics for registered orthomosaics}")
latex_content.append("\\label{tab:accuracy}")
latex_content.append("\\end{table}")

# Add visualization section if figures exist
latex_content.append("\\section{Visualizations}")

# Check for visualization files
vis_dir = gcp_matching_dir / "visualizations"
if vis_dir.exists():
    vis_files = list(vis_dir.glob("*.png")) + list(vis_dir.glob("*.jpg"))
    for vis_file in sorted(vis_files):
        # Copy to report directory for LaTeX
        report_vis_dir = latex_report_path.parent / "figures"
        report_vis_dir.mkdir(exist_ok=True)
        import shutil
        dest_file = report_vis_dir / vis_file.name
        if not dest_file.exists():
            shutil.copy(vis_file, dest_file)
        
        # Add figure to LaTeX
        fig_name = vis_file.stem.replace('_', ' ').title()
        latex_content.append(f"\\begin{{figure}}[H]")
        latex_content.append(f"\\centering")
        latex_content.append(f"\\includegraphics[width=0.8\\textwidth]{{figures/{vis_file.name}}}")
        latex_content.append(f"\\caption{{{fig_name}}}")
        latex_content.append(f"\\label{{fig:{vis_file.stem}}}")
        latex_content.append(f"\\end{{figure}}")

latex_content.append("\\section{Conclusion}")
latex_content.append("The registered orthomosaics show improved alignment with the ground control basemap.")
latex_content.append("\\end{document}")

# Write LaTeX file
with open(latex_report_path, 'w') as f:
    f.write('\n'.join(latex_content))

print(f"\n✓ LaTeX report generated: {latex_report_path}")
print(f"  To compile: pdflatex {latex_report_path.name}")


IndentationError: unexpected indent (2127697898.py, line 165)

## Step 10: Generalized Patch Matching with Evenly-Spaced Seed Points


In [2]:
# Step 10: Generalized Patch Matching with Evenly-Spaced Seed Points

# Check for required variables and set defaults if needed
try:
    _ = output_dir
except NameError:
    from pathlib import Path
    output_dir = Path("outputs")
    print(f"output_dir not defined, using default: {output_dir}")

try:
    _ = gcp_matching_dir
except NameError:
    gcp_matching_dir = output_dir / "gcp_matching"
    print(f"gcp_matching_dir not defined, using default: {gcp_matching_dir}")

try:
    _ = basemap_path
except NameError:
    from pathlib import Path
    data_dir = Path("/Users/mauriciohessflores/Documents/Code/Data/New Westminster Oct _25")
    basemap_path = data_dir / "Michael_RTK_orthos" / "TestsiteNewWest_Spexigeo_RTK.tiff"
    print(f"basemap_path not defined, using default: {basemap_path}")

try:
    _ = reprojected_paths
except NameError:
    print("⚠️  reprojected_paths not defined. Please run Step 5 first.")
    reprojected_paths = {}
    # Try to find reprojected files
    reprojected_dir = gcp_matching_dir / "reprojected"
    if reprojected_dir.exists():
        for ortho_name in ["no_gcps", "with_gcps"]:
            reproj_path = reprojected_dir / f"{ortho_name}_reprojected.tif"
            if reproj_path.exists():
                reprojected_paths[ortho_name] = reproj_path
                print(f"  Found: {reproj_path}")

print(f"\n{'='*60}")
print(f"Step 10: Generalized Patch Matching")
print(f"{'='*60}")

# Create output directories
generalized_dir = gcp_matching_dir / "generalized_patch_matching"
generalized_patches_dir = generalized_dir / "patches"
generalized_matching_patches_dir = generalized_dir / "matching_patches"
generalized_matches_dir = generalized_dir / "matches"

generalized_dir.mkdir(parents=True, exist_ok=True)
generalized_patches_dir.mkdir(exist_ok=True)
generalized_matching_patches_dir.mkdir(exist_ok=True)
generalized_matches_dir.mkdir(exist_ok=True)

# Import required modules
import numpy as np
import cv2
import matplotlib.pyplot as plt
import rasterio
from typing import Dict, List, Tuple, Optional

# Get basemap dimensions
with rasterio.open(basemap_path) as src:
    basemap_width = src.width
    basemap_height = src.height
    basemap_transform = src.transform

print(f"\nBasemap dimensions: {basemap_width} x {basemap_height}")

# Function to create evenly-spaced seed points
def create_evenly_spaced_seeds(width: int, height: int, num_points: int) -> List[Tuple[int, int]]:
    """
    Create evenly-spaced seed points on the basemap.
    Returns list of (col, row) tuples.
    """
    # Calculate grid dimensions
    # For num_points, we want roughly sqrt(num_points) x sqrt(num_points) grid
    grid_size = int(np.sqrt(num_points))
    
    # Calculate spacing
    col_spacing = width / (grid_size + 1)
    row_spacing = height / (grid_size + 1)
    
    seeds = []
    for i in range(1, grid_size + 1):
        for j in range(1, grid_size + 1):
            col = int(i * col_spacing)
            row = int(j * row_spacing)
            # Ensure within bounds
            col = max(0, min(col, width - 1))
            row = max(0, min(row, height - 1))
            seeds.append((col, row))
    
    return seeds

# Function to extract patch (reuse from Step 4)
def extract_patch(image_path: Path, center_col: int, center_row: int, patch_size: int) -> Optional[np.ndarray]:
    """
    Extract a square patch from the image centered at (center_col, center_row).
    """
    half_size = patch_size // 2
    
    with rasterio.open(image_path) as src:
        # Calculate bounds
        col_start = max(0, center_col - half_size)
        col_end = min(src.width, center_col + half_size + 1)
        row_start = max(0, center_row - half_size)
        row_end = min(src.height, center_row + half_size + 1)
        
        # Check if patch is within bounds
        if col_end <= col_start or row_end <= row_start:
            return None
        
        # Read patch
        patch = src.read(window=rasterio.windows.Window(col_start, row_start, col_end - col_start, row_end - row_start))
        
        # Convert to (H, W, C) format
        if len(patch.shape) == 3:
            patch = np.transpose(patch, (1, 2, 0))
            if patch.shape[2] == 1:
                patch = patch[:, :, 0]
        else:
            patch = patch[0]
        
        # Pad if necessary to get exact patch_size
        if patch.shape[0] < patch_size or patch.shape[1] < patch_size:
            padded = np.zeros((patch_size, patch_size), dtype=patch.dtype)
            pad_h = (patch_size - patch.shape[0]) // 2
            pad_w = (patch_size - patch.shape[1]) // 2
            padded[pad_h:pad_h+patch.shape[0], pad_w:pad_w+patch.shape[1]] = patch
            patch = padded
        
        return patch

# Function to find patch in ortho (reuse from Step 6)
def find_patch_in_ortho(
    template_patch: np.ndarray,
    ortho_path: Path,
    search_center_col: int,
    search_center_row: int,
    search_radius: int = 300
) -> Optional[Tuple[int, int, float]]:
    """
    Find template patch in orthomosaic using template matching.
    """
    # Convert template to grayscale if needed
    if len(template_patch.shape) == 3:
        template_gray = cv2.cvtColor(template_patch.astype(np.uint8), cv2.COLOR_RGB2GRAY)
    else:
        template_gray = template_patch.astype(np.uint8)
    
    with rasterio.open(ortho_path) as ortho_src:
        # Define search window
        search_col_start = max(0, search_center_col - search_radius)
        search_col_end = min(ortho_src.width, search_center_col + search_radius)
        search_row_start = max(0, search_center_row - search_radius)
        search_row_end = min(ortho_src.height, search_center_row + search_radius)
        
        # Read search region
        search_window = rasterio.windows.Window(
            search_col_start, search_row_start,
            search_col_end - search_col_start,
            search_row_end - search_row_start
        )
        
        search_region = ortho_src.read(window=search_window)
        
        # Convert to (H, W, C)
        if len(search_region.shape) == 3:
            search_img = np.transpose(search_region, (1, 2, 0))
            if search_img.shape[2] == 1:
                search_gray = search_img[:, :, 0]
            elif search_img.shape[2] >= 3:
                search_gray = cv2.cvtColor(search_img[:, :, :3].astype(np.uint8), cv2.COLOR_RGB2GRAY)
            else:
                search_gray = search_img[:, :, 0]
        else:
            search_gray = search_region[0]
        
        # Normalize to uint8
        if search_gray.dtype != np.uint8:
            search_min = search_gray.min()
            search_max = search_gray.max()
            if search_max > search_min:
                search_gray = ((search_gray - search_min) / (search_max - search_min) * 255).astype(np.uint8)
            else:
                search_gray = np.zeros_like(search_gray, dtype=np.uint8)
        
        # Perform template matching
        result = cv2.matchTemplate(search_gray, template_gray, cv2.TM_CCOEFF_NORMED)
        
        # Find best match
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
        
        # Convert match location to full image coordinates
        match_col = search_col_start + max_loc[0] + template_gray.shape[1] // 2
        match_row = search_row_start + max_loc[1] + template_gray.shape[0] // 2
        
        return (match_col, match_row, max_val)

# Function to find patch with multi-scale matching (reuse from Step 6)
def find_patch_in_ortho_multiscale(
    template_patch: np.ndarray,
    ortho_path: Path,
    search_center_col: int,
    search_center_row: int,
    search_radius: int = 300
) -> Optional[Tuple[int, int, float]]:
    """
    Find template patch using multi-scale matching.
    """
    best_match = None
    best_confidence = 0.0
    
    for scale in [0.5, 1.0, 2.0]:
        # Resize template
        if scale != 1.0:
            template_resized = cv2.resize(template_patch.astype(np.uint8), None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
        else:
            template_resized = template_patch
        
        match = find_patch_in_ortho(
            template_resized,
            ortho_path,
            search_center_col,
            search_center_row,
            search_radius
        )
        
        if match and match[2] > best_confidence:
            best_match = match
            best_confidence = match[2]
    
    return best_match

# Process different numbers of seed points
num_seed_points_list = [50, 100]
patch_sizes = [49, 59, 79, 99, 119]

all_generalized_results = {}

for num_seed_points in num_seed_points_list:
    print(f"\n{'='*60}")
    print(f"Processing {num_seed_points} seed points")
    print(f"{'='*60}")
    
    # Create seed points
    seed_points = create_evenly_spaced_seeds(basemap_width, basemap_height, num_seed_points)
    print(f"✓ Created {len(seed_points)} seed points")
    
    # Extract patches from basemap
    basemap_patches = {}
    
    for patch_size in patch_sizes:
        basemap_patches[patch_size] = {}
        
        for seed_idx, (seed_col, seed_row) in enumerate(seed_points):
            patch = extract_patch(basemap_path, seed_col, seed_row, patch_size)
            
            if patch is not None:
                seed_id = f"seed_{seed_idx:03d}"
                basemap_patches[patch_size][seed_id] = {
                    'patch': patch,
                    'col': seed_col,
                    'row': seed_row
                }
                
                # Save patch as image
                patch_path = generalized_patches_dir / f"{num_seed_points}pts" / f"basemap_{seed_id}_{patch_size}x{patch_size}.png"
                patch_path.parent.mkdir(parents=True, exist_ok=True)
                
                # Normalize patch for saving
                if patch.dtype != np.uint8:
                    patch_min = patch.min()
                    patch_max = patch.max()
                    if patch_max > patch_min:
                        patch_normalized = ((patch - patch_min) / (patch_max - patch_min) * 255).astype(np.uint8)
                    else:
                        patch_normalized = np.zeros_like(patch, dtype=np.uint8)
                else:
                    patch_normalized = patch
                
                plt.imsave(patch_path, patch_normalized)
        
        print(f"  ✓ Extracted {len(basemap_patches[patch_size])} patches of size {patch_size}x{patch_size}")
    
    # Find patches in orthomosaics
    generalized_results = {}
    
    for ortho_name, reprojected_path in reprojected_paths.items():
        print(f"\n  Processing {ortho_name}...")
        
        if ortho_name not in generalized_results:
            generalized_results[ortho_name] = {}
        
        best_patch_size = None
        best_matches = 0
        
        for patch_size in patch_sizes:
            matches_found = 0
            
            for seed_id, patch_data in basemap_patches[patch_size].items():
                template = patch_data['patch']
                expected_col = patch_data['col']
                expected_row = patch_data['row']
                
                # Search for patch using multi-scale matching
                match = find_patch_in_ortho_multiscale(
                    template,
                    reprojected_path,
                    expected_col,
                    expected_row,
                    search_radius=300
                )
                
                # Validate match quality
                if match and match[2] >= 0.3:  # Confidence threshold
                    match_col, match_row, confidence = match
                    matches_found += 1
                    
                    if seed_id not in generalized_results[ortho_name]:
                        generalized_results[ortho_name][seed_id] = {}
                    
                    generalized_results[ortho_name][seed_id][patch_size] = {
                        'expected_col': expected_col,
                        'expected_row': expected_row,
                        'matched_col': match_col,
                        'matched_row': match_row,
                        'offset_col': match_col - expected_col,
                        'offset_row': match_row - expected_row,
                        'confidence': confidence
                    }
                    # Calculate offsets and convert to meters
                    offset_col_px = match_col - expected_col
                    offset_row_px = match_row - expected_row
                    
                    # Get pixel size from basemap to convert to meters
                    with rasterio.open(basemap_path) as basemap_src:
                        pixel_size_x = abs(basemap_src.transform[0])  # meters per pixel
                        pixel_size_y = abs(basemap_src.transform[4])  # meters per pixel
            
                    unmatched_count = 0
                    
                    offset_col_m = offset_col_px * pixel_size_x
                    offset_row_m = offset_row_px * pixel_size_y
                    offset_total_m = np.sqrt(offset_col_m**2 + offset_row_m**2)
                    offset_total_cm = offset_total_m * 100
                    
                    # Print detailed match information
                    print(f"      Seed {seed_id}: Basemap=({expected_col}, {expected_row}), "
                          f"Matched=({match_col}, {match_row}), "
                          f"Offset=({offset_col_px:.1f}, {offset_row_px:.1f}) px, "
                          f"Distance={offset_total_m:.3f} m ({offset_total_cm:.1f} cm), "
                          f"Confidence={confidence:.3f}")
            for seed_id, patch_data in basemap_patches[patch_size].items():
                if seed_id not in generalized_results[ortho_name] or patch_size not in generalized_results[ortho_name][seed_id]:
                    unmatched_count += 1
                    seed_col = patch_data['col']
                    seed_row = patch_data['row']
                    print(f"      Seed {seed_id}: Basemap=({seed_col}, {seed_row}), "
                          f"Match=NOT FOUND")
            
            if unmatched_count > 0:
                print(f"    Unmatched seeds: {unmatched_count}/{len(basemap_patches[patch_size])}")
        
        # Store results for this number of seed points
        all_generalized_results[num_seed_points] = generalized_results
    
    print(f"\n✓ Generalized patch matching complete for all seed point configurations!")

# Save results to JSON
try:
    _ = output_dir
except NameError:
    output_dir = Path("outputs")

try:
    _ = gcp_matching_dir
except NameError:
    gcp_matching_dir = output_dir / "gcp_matching"

generalized_dir = gcp_matching_dir / "generalized_patch_matching"
generalized_dir.mkdir(parents=True, exist_ok=True)

results_file = generalized_dir / "all_generalized_results.json"

# Convert numpy types to native Python types for JSON serialization
def convert_to_native_types(obj):
    """Recursively convert numpy types to native Python types."""
    import numpy as np
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {key: convert_to_native_types(value) for key, value in obj.items()}
    elif isinstance(obj, (list, tuple)):
        return [convert_to_native_types(item) for item in obj]
    return obj

all_generalized_results_serializable = convert_to_native_types(all_generalized_results)
with open(results_file, 'w') as f:
    json.dump(all_generalized_results_serializable, f, indent=2)
print(f"✓ Saved generalized results to: {results_file}")
            


⚠️  reprojected_paths not defined. Please run Step 5 first.
  Found: outputs/gcp_matching/reprojected/no_gcps_reprojected.tif
  Found: outputs/gcp_matching/reprojected/with_gcps_reprojected.tif

Step 10: Generalized Patch Matching

Basemap dimensions: 90129 x 90188

Processing 50 seed points
✓ Created 49 seed points
  ✓ Extracted 49 patches of size 49x49
  ✓ Extracted 49 patches of size 59x59
  ✓ Extracted 49 patches of size 79x79
  ✓ Extracted 49 patches of size 99x99
  ✓ Extracted 49 patches of size 119x119

  Processing no_gcps...
      Seed seed_000: Basemap=(11266, 11273), Matched=(10978, 10985), Offset=(-288.0, -288.0) px, Distance=4.866 m (486.6 cm), Confidence=1.000
      Seed seed_001: Basemap=(11266, 22547), Matched=(11504, 22270), Offset=(238.0, -277.0) px, Distance=4.363 m (436.3 cm), Confidence=0.474
      Seed seed_002: Basemap=(11266, 33820), Matched=(11467, 33553), Offset=(201.0, -267.0) px, Distance=3.992 m (399.2 cm), Confidence=0.365
      Seed seed_003: Basemap=(112

## Step 11: Compare Seed Point Configurations and Identify Best Matches with RANSAC

This step:
1. Compares 50 vs 100 seeds based on average distance
2. For the best performing configuration, uses RANSAC to identify inliers/outliers based on offset direction
3. Returns the list of best matches with their offsets


In [1]:
# Step 11: Compare configurations and identify best matches with RANSAC

import json
import numpy as np
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import rasterio
# Check if sklearn is available, install if needed
try:
    import sklearn
except ImportError:
    import sys
    import subprocess
    print("Installing scikit-learn...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'scikit-learn'])
    import sklearn

from sklearn.linear_model import RANSACRegressor

# Setup paths
try:
    _ = output_dir
except NameError:
    output_dir = Path("outputs")

try:
    _ = gcp_matching_dir
except NameError:
    gcp_matching_dir = output_dir / "gcp_matching"

try:
    _ = basemap_path
except NameError:
    basemap_path = Path("/Users/mauriciohessflores/Documents/Code/Data/New Westminster Oct _25/Michael_RTK_orthos/TestsiteNewWest_Spexigeo_RTK.tiff")

generalized_dir = gcp_matching_dir / "generalized_patch_matching"
results_file = generalized_dir / "all_generalized_results.json"

# Load results if not in memory
try:
    _ = all_generalized_results
except NameError:
    if results_file.exists():
        with open(results_file, 'r') as f:
            all_generalized_results = json.load(f)
        # Convert string keys to integers for consistency (JSON keys are always strings)
        all_generalized_results = {int(k) if str(k).isdigit() else k: v for k, v in all_generalized_results.items()}
        print(f"✓ Loaded results from: {results_file}")
    else:
        raise FileNotFoundError(f"Results file not found: {results_file}. Please run Step 10 first.")

# Get pixel size from basemap for distance calculations
with rasterio.open(basemap_path) as basemap_src:
    pixel_size_x = abs(basemap_src.transform[0])  # meters per pixel
    pixel_size_y = abs(basemap_src.transform[4])  # meters per pixel

print("=" * 60)
print("Step 11: Compare Configurations and Identify Best Matches")
print("=" * 60)

# Function to calculate average distance for a configuration
def calculate_average_distance(results: Dict, ortho_name: str) -> Tuple[float, int]:
    """Calculate average distance in meters for all matches in a configuration.
    Only counts one match per seed (the best patch size match)."""
    total_distance = 0.0
    total_matches = 0
    
    for seed_id, seed_data in results.get(ortho_name, {}).items():
        # Only use the best patch size match for each seed (largest patch size = most reliable)
        # If multiple patch sizes exist, use the one with highest confidence or largest size
        best_match = None
        best_patch_size = None
        best_confidence = -1
        
        for patch_size, match_data in seed_data.items():
            if isinstance(match_data, dict) and 'offset_col' in match_data:
                confidence = match_data.get('confidence', 0.0)
                # Prefer higher confidence, or if equal, larger patch size
                if confidence > best_confidence or (confidence == best_confidence and 
                                                   (best_patch_size is None or patch_size > best_patch_size)):
                    best_match = match_data
                    best_patch_size = patch_size
                    best_confidence = confidence
        
        # Count only the best match for this seed
        if best_match is not None:
            offset_col_px = best_match['offset_col']
            offset_row_px = best_match['offset_row']
            
            offset_col_m = offset_col_px * pixel_size_x
            offset_row_m = offset_row_px * pixel_size_y
            distance_m = np.sqrt(offset_col_m**2 + offset_row_m**2)
            
            total_distance += distance_m
            total_matches += 1
    
    avg_distance = total_distance / total_matches if total_matches > 0 else float('inf')
    return avg_distance, total_matches

# Compare 50 vs 100 seeds for each ortho
comparison_results = {}

for ortho_name in ['no_gcps', 'with_gcps']:
    print(f"\n{'='*60}")
    print(f"Analyzing {ortho_name}")
    print(f"{'='*60}")
    
    distances_50 = []
    distances_100 = []
    matches_50 = 0
    matches_100 = 0
    
    # Calculate for 50 seeds (handle both string and int keys)
    seed_50_key = 50 if 50 in all_generalized_results else ('50' if '50' in all_generalized_results else None)
    if seed_50_key is not None:
        avg_dist_50, num_matches_50 = calculate_average_distance(all_generalized_results[seed_50_key], ortho_name)
        distances_50.append(avg_dist_50)
        matches_50 = num_matches_50
        print(f"  50 seeds: Average distance = {avg_dist_50:.3f} m, Matches = {num_matches_50}")
    
    # Calculate for 100 seeds (handle both string and int keys)
    seed_100_key = 100 if 100 in all_generalized_results else ('100' if '100' in all_generalized_results else None)
    if seed_100_key is not None:
        avg_dist_100, num_matches_100 = calculate_average_distance(all_generalized_results[seed_100_key], ortho_name)
        distances_100.append(avg_dist_100)
        matches_100 = num_matches_100
        print(f"  100 seeds: Average distance = {avg_dist_100:.3f} m, Matches = {num_matches_100}")
    
    # Determine best configuration
    if distances_50 and distances_100:
        if avg_dist_50 < avg_dist_100:
            best_num_seeds = 50
            best_avg_dist = avg_dist_50
            best_num_matches = matches_50
            print(f"\n  ✓ Best: 50 seeds (avg distance: {avg_dist_50:.3f} m < {avg_dist_100:.3f} m)")
        else:
            best_num_seeds = 100
            best_avg_dist = avg_dist_100
            best_num_matches = matches_100
            print(f"\n  ✓ Best: 100 seeds (avg distance: {avg_dist_100:.3f} m < {avg_dist_50:.3f} m)")
    elif distances_50:
        best_num_seeds = 50
        best_avg_dist = avg_dist_50
        best_num_matches = matches_50
        print(f"\n  ✓ Using 50 seeds (only configuration available)")
    elif distances_100:
        best_num_seeds = 100
        best_avg_dist = avg_dist_100
        best_num_matches = matches_100
        print(f"\n  ✓ Using 100 seeds (only configuration available)")
    else:
        print(f"  ⚠️  No matches found for {ortho_name}")
        continue
    
    comparison_results[ortho_name] = {
        'best_num_seeds': best_num_seeds,
        'best_avg_distance': best_avg_dist,
        'best_num_matches': best_num_matches
    }

# Now apply RANSAC to identify inliers/outliers for the best configuration
print(f"\n{'='*60}")
print("RANSAC Analysis for Best Matches")
print(f"{'='*60}")

def apply_ransac_to_matches(results: Dict, ortho_name: str) -> Dict:
    """Apply RANSAC to identify inliers and outliers based on offset direction."""
    
    # Collect all matches with their offsets
    matches_data = []
    
    for seed_id, seed_data in results.get(ortho_name, {}).items():
        # Only use the best patch size match for each seed (to avoid counting same seed multiple times)
        best_match = None
        best_patch_size = None
        best_confidence = -1
        
        for patch_size, match_data in seed_data.items():
            if isinstance(match_data, dict) and 'offset_col' in match_data:
                confidence = match_data.get('confidence', 0.0)
                # Prefer higher confidence, or if equal, larger patch size
                if confidence > best_confidence or (confidence == best_confidence and 
                                                   (best_patch_size is None or patch_size > best_patch_size)):
                    best_match = match_data
                    best_patch_size = patch_size
                    best_confidence = confidence
        
        # Add only the best match for this seed
        if best_match is not None:
            offset_col_px = best_match['offset_col']
            offset_row_px = best_match['offset_row']
            
            matches_data.append({
                'seed_id': seed_id,
                'patch_size': best_patch_size,
                'expected_col': best_match['expected_col'],
                'expected_row': best_match['expected_row'],
                'matched_col': best_match['matched_col'],
                'matched_row': best_match['matched_row'],
                'offset_col': offset_col_px,
                'offset_row': offset_row_px,
                'confidence': best_confidence
            })
    
    if len(matches_data) < 3:
        print(f"  ⚠️  Not enough matches ({len(matches_data)}) for RANSAC (need at least 3)")
        return {
            'inliers': matches_data,
            'outliers': [],
            'num_inliers': len(matches_data),
            'num_outliers': 0
        }
    
    # Prepare data for RANSAC
    # Use offset_col and offset_row as features
    X = np.array([[m['offset_col'], m['offset_row']] for m in matches_data])
    
    # RANSAC expects a target variable - we'll use the distance as the target
    # This will help identify matches that follow a consistent offset pattern
    y = np.array([np.sqrt(m['offset_col']**2 + m['offset_row']**2) for m in matches_data])
    
    # Apply RANSAC
    # We'll use a linear model to fit the offset pattern
    ransac = RANSACRegressor(
        random_state=42,
        min_samples=3,  # Minimum samples to fit model
        residual_threshold=2.0,  # Threshold for inliers (in pixels)
        max_trials=1000
    )
    
    # Fit RANSAC - we'll use offset_col as feature and offset_row as target
    # This models the relationship between x and y offsets
    X_col = X[:, 0].reshape(-1, 1)  # offset_col as feature
    y_row = X[:, 1]  # offset_row as target
    
    try:
        ransac.fit(X_col, y_row)
        inlier_mask = ransac.inlier_mask_
    except Exception as e:
        print(f"  ⚠️  RANSAC failed: {e}, using all matches as inliers")
        inlier_mask = np.ones(len(matches_data), dtype=bool)
    
    # Separate inliers and outliers
    inliers = [matches_data[i] for i in range(len(matches_data)) if inlier_mask[i]]
    outliers = [matches_data[i] for i in range(len(matches_data)) if not inlier_mask[i]]
    
    print(f"  Total matches: {len(matches_data)}")
    print(f"  Inliers: {len(inliers)} ({len(inliers)/len(matches_data)*100:.1f}%)")
    print(f"  Outliers: {len(outliers)} ({len(outliers)/len(matches_data)*100:.1f}%)")
    
    # Calculate statistics for inliers
    if inliers:
        inlier_offsets_col = [m['offset_col'] for m in inliers]
        inlier_offsets_row = [m['offset_row'] for m in inliers]
        
        avg_offset_col = np.mean(inlier_offsets_col)
        avg_offset_row = np.mean(inlier_offsets_row)
        std_offset_col = np.std(inlier_offsets_col)
        std_offset_row = np.std(inlier_offsets_row)
        
        # Convert to meters
        avg_offset_col_m = avg_offset_col * pixel_size_x
        avg_offset_row_m = avg_offset_row * pixel_size_y
        avg_distance_m = np.sqrt(avg_offset_col_m**2 + avg_offset_row_m**2)
        
        print(f"  Inlier statistics:")
        print(f"    Average offset: ({avg_offset_col:.1f}, {avg_offset_row:.1f}) px")
        print(f"    Average offset: ({avg_offset_col_m:.3f}, {avg_offset_row_m:.3f}) m")
        print(f"    Std deviation: ({std_offset_col:.1f}, {std_offset_row:.1f}) px")
        print(f"    Average distance: {avg_distance_m:.3f} m")
    
    return {
        'inliers': inliers,
        'outliers': outliers,
        'num_inliers': len(inliers),
        'num_outliers': len(outliers),
        'avg_offset_col_px': float(avg_offset_col) if inliers else 0.0,
        'avg_offset_row_px': float(avg_offset_row) if inliers else 0.0,
        'avg_offset_col_m': float(avg_offset_col_m) if inliers else 0.0,
        'avg_offset_row_m': float(avg_offset_row_m) if inliers else 0.0,
        'avg_distance_m': float(avg_distance_m) if inliers else 0.0
    }

# Apply RANSAC for each ortho using its best configuration
best_matches_results = {}

for ortho_name in comparison_results.keys():
    best_num_seeds = comparison_results[ortho_name]['best_num_seeds']
    print(f"\n{ortho_name} (using {best_num_seeds} seeds configuration):")
    
    # Handle both string and int keys
    best_key = best_num_seeds if best_num_seeds in all_generalized_results else str(best_num_seeds)
    ransac_results = apply_ransac_to_matches(all_generalized_results[best_key], ortho_name)
    best_matches_results[ortho_name] = {
        'num_seeds': best_num_seeds,
        **ransac_results
    }

# Save results
best_matches_file = generalized_dir / "best_matches_ransac.json"

def convert_to_native_types(obj):
    """Recursively convert numpy types to native Python types."""
    import numpy as np
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {key: convert_to_native_types(value) for key, value in obj.items()}
    elif isinstance(obj, (list, tuple)):
        return [convert_to_native_types(item) for item in obj]
    return obj

best_matches_serializable = convert_to_native_types(best_matches_results)
with open(best_matches_file, 'w') as f:
    json.dump(best_matches_serializable, f, indent=2)
print(f"\n✓ Saved best matches with RANSAC results to: {best_matches_file}")

# Print summary
print(f"\n{'='*60}")
print("Summary of Best Matches")
print(f"{'='*60}")

for ortho_name, results in best_matches_results.items():
    print(f"\n{ortho_name}:")
    print(f"  Configuration: {results['num_seeds']} seeds")
    print(f"  Inliers: {results['num_inliers']} matches")
    print(f"  Outliers: {results['num_outliers']} matches")
    if results['num_inliers'] > 0:
        print(f"  Average offset: ({results['avg_offset_col_m']:.3f}, {results['avg_offset_row_m']:.3f}) m")
        print(f"  Average distance: {results['avg_distance_m']:.3f} m")
        print(f"  Best matches (first 10 inliers):")
        for i, match in enumerate(results['inliers'][:10]):
            offset_col_m = match['offset_col'] * pixel_size_x
            offset_row_m = match['offset_row'] * pixel_size_y
            distance_m = np.sqrt(offset_col_m**2 + offset_row_m**2)
            print(f"    {i+1}. Seed {match['seed_id']}: "
                  f"Offset=({match['offset_col']:.1f}, {match['offset_row']:.1f}) px, "
                  f"Distance={distance_m:.3f} m, Confidence={match['confidence']:.3f}")

print(f"\n✓ Step 11 complete!")



✓ Loaded results from: outputs/gcp_matching/generalized_patch_matching/all_generalized_results.json
Step 11: Compare Configurations and Identify Best Matches

Analyzing no_gcps
  50 seeds: Average distance = 3.077 m, Matches = 40
  100 seeds: Average distance = 3.229 m, Matches = 90

  ✓ Best: 50 seeds (avg distance: 3.077 m < 3.229 m)

Analyzing with_gcps
  50 seeds: Average distance = 2.966 m, Matches = 42
  100 seeds: Average distance = 3.296 m, Matches = 88

  ✓ Best: 50 seeds (avg distance: 2.966 m < 3.296 m)

RANSAC Analysis for Best Matches

no_gcps (using 50 seeds configuration):
  Total matches: 40
  Inliers: 12 (30.0%)
  Outliers: 28 (70.0%)
  Inlier statistics:
    Average offset: (-203.8, -253.6) px
    Average offset: (-2.434, -3.029) m
    Std deviation: (150.7, 45.8) px
    Average distance: 3.886 m

with_gcps (using 50 seeds configuration):
  Total matches: 42
  Inliers: 11 (26.2%)
  Outliers: 31 (73.8%)
  Inlier statistics:
    Average offset: (-235.5, -214.7) px
    A



## Step 12: Register Orthos to Basemap Using Best Matches 2D Shift

This step:
1. Loads the best matches (inliers) from Step 11
2. Computes the optimal 2D shift for each ortho based on inlier matches
3. Applies the shift to register each ortho to the basemap
4. Saves registered orthos as LZW-compressed GeoTIFFs



In [1]:
# Step 12: Register Orthos to Basemap Using Best Matches 2D Shift

import json
import numpy as np
from pathlib import Path
from typing import Dict, Tuple
import rasterio
from rasterio.transform import Affine
from scipy import ndimage

# Setup paths
try:
    _ = output_dir
except NameError:
    output_dir = Path("outputs")

try:
    _ = gcp_matching_dir
except NameError:
    gcp_matching_dir = output_dir / "gcp_matching"

try:
    _ = basemap_path
except NameError:
    basemap_path = Path("/Users/mauriciohessflores/Documents/Code/Data/New Westminster Oct _25/Michael_RTK_orthos/TestsiteNewWest_Spexigeo_RTK.tiff")

try:
    _ = reprojected_paths
except NameError:
    # Try to find reprojected orthos
    reprojected_dir = gcp_matching_dir / "reprojected"
    reprojected_paths = {}
    for ortho_name in ['no_gcps', 'with_gcps']:
        reproj_path = reprojected_dir / f"{ortho_name}_reprojected.tif"
        if reproj_path.exists():
            reprojected_paths[ortho_name] = reproj_path
        else:
            # Try alternative location
            alt_dir = output_dir / "test_matching" / "reprojected"
            alt_path = alt_dir / f"{ortho_name}_reprojected.tif"
            if alt_path.exists():
                reprojected_paths[ortho_name] = alt_path

generalized_dir = gcp_matching_dir / "generalized_patch_matching"
best_matches_file = generalized_dir / "best_matches_ransac.json"

# Load best matches
try:
    _ = best_matches_results
except NameError:
    if best_matches_file.exists():
        with open(best_matches_file, 'r') as f:
            best_matches_results = json.load(f)
        print(f"✓ Loaded best matches from: {best_matches_file}")
    else:
        raise FileNotFoundError(f"Best matches file not found: {best_matches_file}. Please run Step 11 first.")

print("=" * 60)
print("Step 12: Register Orthos to Basemap Using 2D Shift")
print("=" * 60)

# Get basemap info
with rasterio.open(basemap_path) as basemap_src:
    basemap_width = basemap_src.width
    basemap_height = basemap_src.height
    basemap_transform = basemap_src.transform
    basemap_crs = basemap_src.crs
    pixel_size_x = abs(basemap_transform[0])  # meters per pixel
    pixel_size_y = abs(basemap_transform[4])  # meters per pixel

print(f"\nBasemap info:")
print(f"  Dimensions: {basemap_width} x {basemap_height}")
print(f"  CRS: {basemap_crs}")
print(f"  Pixel size: {pixel_size_x:.4f} m (X), {pixel_size_y:.4f} m (Y)")

# Function to compute 2D shift from inlier matches
def compute_2d_shift_from_matches(inliers: list) -> Tuple[float, float]:
    """Compute optimal 2D shift from inlier matches."""
    if not inliers:
        return 0.0, 0.0
    
    # Calculate average offset (median is more robust than mean)
    offsets_col = [m['offset_col'] for m in inliers]
    offsets_row = [m['offset_row'] for m in inliers]
    
    # Use median for robustness against outliers
    shift_x = float(np.median(offsets_col))
    shift_y = float(np.median(offsets_row))
    
    # Also calculate mean for comparison
    mean_shift_x = float(np.mean(offsets_col))
    mean_shift_y = float(np.mean(offsets_row))
    
    print(f"  Shift (median): ({shift_x:.2f}, {shift_y:.2f}) px")
    print(f"  Shift (mean): ({mean_shift_x:.2f}, {mean_shift_y:.2f}) px")
    print(f"  Shift (meters): ({shift_x * pixel_size_x:.3f}, {shift_y * pixel_size_y:.3f}) m")
    
    return shift_x, shift_y

# Function to apply 2D shift and register ortho to basemap
def register_ortho_with_shift(
    ortho_path: Path,
    basemap_path: Path,
    shift_x: float,
    shift_y: float,
    output_path: Path
) -> Path:
    """Apply 2D shift to ortho and register to basemap."""
    
    print(f"\n  Registering {ortho_path.name}...")
    print(f"    Applying shift: ({shift_x:.2f}, {shift_y:.2f}) px")
    
    # Open ortho
    with rasterio.open(ortho_path) as ortho_src:
        ortho_data = ortho_src.read()
        ortho_count = ortho_src.count
        ortho_dtype = ortho_data.dtype
        
        # Apply shift to each band
        shifted_data = np.zeros((ortho_count, basemap_height, basemap_width), dtype=ortho_dtype)
        
        for band_idx in range(ortho_count):
            band = ortho_data[band_idx]
            
            # Apply shift using scipy.ndimage.shift
            shifted_band = ndimage.shift(
                band,
                (shift_y, shift_x),  # Note: (row, col) order
                mode='constant',
                cval=0,
                order=1  # Linear interpolation
            )
            
            # Crop or pad to match basemap dimensions
            if shifted_band.shape[0] > basemap_height:
                shifted_band = shifted_band[:basemap_height, :]
            elif shifted_band.shape[0] < basemap_height:
                padded = np.zeros((basemap_height, shifted_band.shape[1]), dtype=shifted_band.dtype)
                padded[:shifted_band.shape[0], :] = shifted_band
                shifted_band = padded
            
            if shifted_band.shape[1] > basemap_width:
                shifted_band = shifted_band[:, :basemap_width]
            elif shifted_band.shape[1] < basemap_width:
                padded = np.zeros((shifted_band.shape[0], basemap_width), dtype=shifted_band.dtype)
                padded[:, :shifted_band.shape[1]] = shifted_band
                shifted_band = padded
            
            shifted_data[band_idx] = shifted_band
    
    # Update transform to account for shift
    # Shift in pixels needs to be converted to geographic coordinates
    new_transform = Affine(
        basemap_transform[0], basemap_transform[1],
        basemap_transform[2] - shift_x * pixel_size_x,  # Adjust X origin
        basemap_transform[3], basemap_transform[4],
        basemap_transform[5] - shift_y * abs(pixel_size_y)  # Adjust Y origin
    )
    
    # Save registered ortho with LZW compression
    output_path.parent.mkdir(parents=True, exist_ok=True)
    
    with rasterio.open(
        output_path,
        'w',
        driver='GTiff',
        height=basemap_height,
        width=basemap_width,
        count=ortho_count,
        dtype=ortho_dtype,
        crs=basemap_crs,
        transform=new_transform,
        compress='jpeg',
            jpeg_quality=90,
        BIGTIFF='YES',
        tiled=True,
        blockxsize=512,
        blockysize=512,
    ) as dst:
        dst.write(shifted_data)
    
    print(f"    ✓ Saved registered ortho to: {output_path}")
    
    return output_path

# Process each ortho
registered_orthos = {}
registered_dir = gcp_matching_dir / "generalized_patch_matching" / "registered"
registered_dir.mkdir(parents=True, exist_ok=True)

for ortho_name in ['no_gcps', 'with_gcps']:
    print(f"\n{'='*60}")
    print(f"Processing {ortho_name}")
    print(f"{'='*60}")
    
    # Check if we have matches for this ortho
    if ortho_name not in best_matches_results:
        print(f"  ⚠️  No matches found for {ortho_name}, skipping...")
        continue
    
    # Check if we have reprojected ortho
    if ortho_name not in reprojected_paths:
        print(f"  ⚠️  Reprojected ortho not found for {ortho_name}, skipping...")
        continue
    
    results = best_matches_results[ortho_name]
    inliers = results.get('inliers', [])
    
    if not inliers:
        print(f"  ⚠️  No inlier matches found for {ortho_name}, skipping...")
        continue
    
    print(f"  Using {len(inliers)} inlier matches")
    
    # Compute 2D shift
    shift_x, shift_y = compute_2d_shift_from_matches(inliers)
    
    # Register ortho
    output_path = registered_dir / f"{ortho_name}_registered.tif"
    
    if output_path.exists():
        print(f"  ⚠️  Registered ortho already exists: {output_path}")
        print(f"      Skipping registration (delete file to recompute)")
    else:
        registered_path = register_ortho_with_shift(
            reprojected_paths[ortho_name],
            basemap_path,
            shift_x,
            shift_y,
            output_path
        )
        registered_orthos[ortho_name] = registered_path

print(f"\n{'='*60}")
print("Registration Summary")
print(f"{'='*60}")

for ortho_name, reg_path in registered_orthos.items():
    print(f"\n{ortho_name}:")
    print(f"  Registered ortho: {reg_path}")
    if reg_path.exists():
        file_size_mb = reg_path.stat().st_size / (1024 * 1024)
        print(f"  File size: {file_size_mb:.2f} MB")

print(f"\n✓ Step 12 complete!")
print(f"  Registered orthos saved to: {registered_dir}")



✓ Loaded best matches from: outputs/gcp_matching/generalized_patch_matching/best_matches_ransac.json
Step 12: Register Orthos to Basemap Using 2D Shift

Basemap info:
  Dimensions: 90129 x 90188
  CRS: EPSG:32610
  Pixel size: 0.0119 m (X), 0.0119 m (Y)

Processing no_gcps
  Using 12 inlier matches
  Shift (median): (-275.00, -275.00) px
  Shift (mean): (-203.75, -253.58) px
  Shift (meters): (-3.285, -3.285) m

  Registering no_gcps_reprojected.tif...
    Applying shift: (-275.00, -275.00) px
    ✓ Saved registered ortho to: outputs/gcp_matching/generalized_patch_matching/registered/no_gcps_registered.tif

Processing with_gcps
  Using 11 inlier matches
  Shift (median): (-275.00, -275.00) px
  Shift (mean): (-235.45, -214.73) px
  Shift (meters): (-3.285, -3.285) m

  Registering with_gcps_reprojected.tif...
    Applying shift: (-275.00, -275.00) px
    ✓ Saved registered ortho to: outputs/gcp_matching/generalized_patch_matching/registered/with_gcps_registered.tif

Registration Summar