# Popup Detector - Extract Crops for OCR
## Cell 1: Detection & Cropping
This cell detects popups in images and crops them for OCR processing.

In [None]:
# Install dependencies (if needed)
!pip install ultralytics boto3 pyyaml opencv-python -q

In [None]:
import os
import random
import json
import yaml
import glob
from pathlib import Path
from datetime import datetime

from ultralytics import YOLO
import cv2

In [None]:
# Load configuration
with open('config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print("Configuration loaded:")
print(f"  Model: {config['model_path']}")
print(f"  Confidence: {config['conf_threshold']}")
print(f"  Images per run: {config['images_per_run']}")
print(f"  Output: {config['output_folder']}")

In [None]:
# Load model
model = YOLO(config['model_path'])
print(f"Model loaded: {config['model_path']}")

In [None]:
# Get random images from test folder
input_folder = config['input_folder']
num_images = config['images_per_run']

all_images = []
for ext in ['.jpg', '.jpeg', '.png']:
    all_images.extend(glob.glob(os.path.join(input_folder, f'*{ext}')))
    all_images.extend(glob.glob(os.path.join(input_folder, f'*{ext.upper()}')))

random.seed(42)  # For reproducibility
image_files = random.sample(all_images, min(num_images, len(all_images)))

print(f"Selected {len(image_files)} random images from {input_folder}")

In [None]:
# Process images and detect popups
output_folder = config['output_folder']
os.makedirs(output_folder, exist_ok=True)

all_coordinates = []
total_crops = 0
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

for i, img_path in enumerate(image_files):
    # Run detection
    results = model.predict(
        source=img_path,
        conf=config['conf_threshold'],
        iou=config['iou_threshold'],
        augment=config['augment'],
        verbose=False
    )
    
    if len(results) == 0 or results[0].boxes is None:
        print(f"[{i+1}/{len(image_files)}] {Path(img_path).name}: No detections")
        continue
    
    result = results[0]
    img = result.orig_img
    boxes = result.boxes
    
    for j, box in enumerate(boxes):
        x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
        conf_score = float(box.conf[0])
        
        # Crop
        crop = img[int(y1):int(y2), int(x1):int(x2)]
        
        # Save crop
        filename = f"{Path(img_path).stem}_box{j}_{timestamp}.jpg"
        filepath = os.path.join(output_folder, filename)
        cv2.imwrite(filepath, cv2.cvtColor(crop, cv2.COLOR_RGB2BGR))
        
        # Save coordinates
        all_coordinates.append({
            'filename': filename,
            'source_image': Path(img_path).name,
            'x1': int(x1), 'y1': int(y1), 'x2': int(x2), 'y2': int(y2),
            'confidence': round(conf_score, 4)
        })
        total_crops += 1
    
    print(f"[{i+1}/{len(image_files)}] {Path(img_path).name}: {len(boxes)} popups detected")

In [None]:
# Save coordinates JSON
coords_file = os.path.join(output_folder, 'detection_coordinates.json')
with open(coords_file, 'w') as f:
    json.dump(all_coordinates, f, indent=2)

print(f"\n{'='*50}")
print("DETECTION COMPLETE")
print(f"{'='*50}")
print(f"Images processed: {len(image_files)}")
print(f"Total crops: {total_crops}")
print(f"Output folder: {output_folder}")
print(f"Coordinates: {coords_file}")
print("\nâœ“ Run Cell 2 (S3 Upload) when ready!")