# S3 Upload - Cell 2
## Run this AFTER Cell 1 has generated the crops
This cell uploads the cropped images to S3 for OCR processing.

In [None]:
# Install dependencies (if needed)
!pip install boto3 -q

In [None]:
import os
import yaml
import glob
from pathlib import Path

import boto3

In [None]:
# Load configuration
with open('config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print("Configuration loaded:")
print(f"  S3 Bucket: {config['s3_bucket']}")
print(f"  S3 Prefix: {config['s3_prefix']}")
print(f"  AWS Region: {config['aws_region']}")

In [None]:
# Initialize S3 client
s3_client = boto3.client('s3', region_name=config['aws_region'])

s3_bucket = config['s3_bucket']
s3_prefix = config['s3_prefix']
output_folder = config['output_folder']

print(f"S3 client initialized for bucket: {s3_bucket}")

In [None]:
# Get files to upload
files_to_upload = []
for ext in ['.jpg', '.jpeg', '.png']:
    files_to_upload.extend(Path(output_folder).glob(f'*{ext}'))

print(f"Found {len(files_to_upload)} files to upload")

In [None]:
# Upload to S3
uploaded = 0
failed = 0

for file_path in files_to_upload:
    try:
        s3_key = f"{s3_prefix}{file_path.name}"
        
        s3_client.upload_file(
            str(file_path),
            s3_bucket,
            s3_key,
            ExtraArgs={'ContentType': 'image/jpeg'}
        )
        
        uploaded += 1
        if uploaded % 10 == 0:
            print(f"Uploaded {uploaded}/{len(files_to_upload)}...")
            
    except Exception as e:
        failed += 1
        print(f"ERROR: {file_path.name}: {e}")

In [None]:
print(f"\n{'='*50}")
print("UPLOAD COMPLETE")
print(f"{'='*50}")
print(f"Successfully uploaded: {uploaded}")
print(f"Failed: {failed}")
print(f"S3 Location: s3://{s3_bucket}/{s3_prefix}")
print("\nâœ“ Ready for OCR processing!")