# Google Cloud Vision AI OCR — Interactive Tutorial

This notebook walks through the complete Vision AI OCR pipeline step by step.
Run each cell in order to understand the core concepts and see live output.

**Prerequisites**
- A GCP project with the Vision API enabled
- `GOOGLE_APPLICATION_CREDENTIALS` set to your service account key path
- `pip install -r ../requirements.txt` already run


## 1. Environment Setup

In [None]:
import os
import json
import sys
from pathlib import Path

# Add project root to path
sys.path.insert(0, str(Path('..').resolve()))

# Verify credentials
creds_path = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', '')
if not creds_path or not Path(creds_path).exists():
    print('⚠️  GOOGLE_APPLICATION_CREDENTIALS not set or file not found.')
    print('    Set it with: export GOOGLE_APPLICATION_CREDENTIALS=/path/to/key.json')
else:
    print(f'✅ Credentials found: {creds_path}')

## 2. Single Image Extraction

In [None]:
from src.ocr import DocumentExtractor

extractor = DocumentExtractor()

# Replace with your own image path
IMAGE_PATH = '../samples/invoice.jpg'

if not Path(IMAGE_PATH).exists():
    print(f'Sample file not found: {IMAGE_PATH}')
    print('Please add a test image to the samples/ directory.')
else:
    result = extractor.extract(IMAGE_PATH, language='en')

    print('=== Extracted Text ===')
    print(result['text'][:500])
    print(f'\nWord count : {result["word_count"]}')
    print(f'Confidence : {result["confidence"]}')
    print(f'\nFirst 5 bounding boxes:')
    for box in result['bounding_boxes'][:5]:
        print(f'  "{box["text"]}" @ {box["vertices"]}')

## 3. Document Text Detection (Dense Documents)

In [None]:
if Path(IMAGE_PATH).exists():
    doc_result = extractor.extract_document(IMAGE_PATH, language='en')

    print(f'Pages detected : {doc_result["page_count"]}')
    print(f'\nPage metadata:')
    for i, page in enumerate(doc_result['pages'], 1):
        print(f'  Page {i}: {page["width"]}x{page["height"]}px, '
              f'{page["block_count"]} blocks, '
              f'confidence={page["confidence"]:.2%}')

    print(f'\nFirst 300 chars:')
    print(doc_result['text'][:300])

## 4. Handwriting Recognition

In [None]:
from src.handwriting import HandwritingExtractor

HANDWRITING_PATH = '../samples/handwriting.jpg'

if not Path(HANDWRITING_PATH).exists():
    print(f'Add a handwriting sample to: {HANDWRITING_PATH}')
else:
    hw_extractor = HandwritingExtractor()
    hw_result = hw_extractor.extract(HANDWRITING_PATH, language_hints=['en'])

    print('=== Handwriting Extraction ===')
    print(hw_result['text'])
    print(f'\nDetected language : {hw_result["detected_language"]}')
    print(f'Average confidence: {hw_result["average_confidence"]:.2%}')

## 5. Layout Analysis

In [None]:
from src.layout_analyzer import LayoutAnalyzer
import json

if Path(IMAGE_PATH).exists():
    analyzer = LayoutAnalyzer()
    layout = analyzer.analyze(IMAGE_PATH)

    print(f'Page size   : {layout.page_width} x {layout.page_height} px')
    print(f'Block count : {len(layout.blocks)}')

    columns = analyzer.detect_columns(layout)
    print(f'Columns     : {len(columns)}')

    print('\n=== Reading Order (first 10 blocks) ===')
    for i, snippet in enumerate(layout.reading_order[:10], 1):
        print(f'  {i:>2}. {snippet[:80]}')

## 6. Batch Processing

In [None]:
from src.batch_processor import BatchProcessor

INPUT_DIR  = '../samples'
OUTPUT_DIR = '../results/tutorial'

processor = BatchProcessor(max_workers=4)
report = processor.process_directory(INPUT_DIR, OUTPUT_DIR)

print(f'Processed : {report["total"]} file(s)')
print(f'Succeeded : {len(report["successful"])}')
print(f'Failed    : {len(report["failed"])}')

if report['failed']:
    print('\nFailed files:')
    for f in report['failed']:
        print(f'  {f["file"]}: {f["error"]}')

## 7. Visualise Bounding Boxes

In [None]:
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt

def draw_bounding_boxes(image_path: str, result: dict, max_boxes: int = 30):
    img = Image.open(image_path).convert('RGB')
    draw = ImageDraw.Draw(img)

    for box in result['bounding_boxes'][:max_boxes]:
        vertices = box['vertices']
        if len(vertices) == 4:
            polygon = [(v[0], v[1]) for v in vertices]
            draw.polygon(polygon, outline='red')

    plt.figure(figsize=(14, 10))
    plt.imshow(img)
    plt.axis('off')
    plt.title(f'Bounding Boxes (first {max_boxes} words)')
    plt.tight_layout()
    plt.show()

if Path(IMAGE_PATH).exists():
    result = extractor.extract(IMAGE_PATH)
    draw_bounding_boxes(IMAGE_PATH, result)

## 8. Next Steps

- **GCS Integration**: See `integrations/gcs_loader.py` to process documents directly from Cloud Storage
- **BigQuery Export**: See `integrations/bigquery_export.py` to stream results to a data warehouse
- **Document AI**: See `integrations/document_ai_bridge.py` for structured form/invoice parsing
- **Deployment**: See `infrastructure/` for Docker, Cloud Run, and Terraform configurations
- **Cost Optimisation**: See `docs/pricing_optimization.md` for strategies to reduce API spend
