# Complete OCR Pipeline

This notebook demonstrates the complete end-to-end OCR pipeline using CRAFT for text detection and CRNN for text recognition.

## Pipeline Overview:
1. **Image Preprocessing** - Convert to grayscale, blur, and threshold
2. **Text Detection** - CRAFT detects text regions
3. **Text Cropping** - Extract detected regions
4. **Text Recognition** - CRNN recognizes characters
5. **Result Visualization** - Display and save results

# Complete OCR Pipeline

This notebook demonstrates the complete end-to-end OCR pipeline.

In [None]:
import os
import cv2
import numpy as np
import json
from pathlib import Path
import easyocr
import matplotlib.pyplot as plt
from datetime import datetime

In [None]:
def save_results_to_json(results, filename="ocr_results.json"):
    """Save OCR results to JSON file"""
    output_dir = Path('processed_data')
    output_dir.mkdir(exist_ok=True)
    
    output_path = output_dir / filename
    
    with open(output_path, 'w') as f:
        json.dump(results, f, indent=2)
    
    print(f"‚úÖ Results saved to: {output_path}")
    return output_path

def display_results_table(results):
    """Display detection results in table format"""
    print("\n" + "="*80)
    print("OCR DETECTION RESULTS")
    print("="*80)
    print(f"\nüìù FULL TEXT:\n{results['full_text']}\n")
    print(f"\nüîç DETAILED DETECTIONS ({len(results['detections'])} regions):\n")
    
    print(f"{'No.':<5} {'Text':<30} {'Confidence':<15}")
    print("-" * 50)
    
    for i, det in enumerate(results['detections'], 1):
        text = det['text'][:25] + "..." if len(det['text']) > 25 else det['text']
        conf = f"{det['confidence']:.2%}"
        print(f"{i:<5} {text:<30} {conf:<15}")
    
    print("="*80 + "\n")

print("‚úÖ Results functions loaded")

## 5. Results Management

In [None]:
# Check for sample images in processed_data/raw
raw_dir = Path('processed_data/raw')
image_files = list(raw_dir.glob('*.jpg')) + list(raw_dir.glob('*.png')) + list(raw_dir.glob('*.jpeg'))

if image_files:
    print(f"Found {len(image_files)} image(s) in {raw_dir}")
    print("Sample images:", [f.name for f in image_files[:3]])
else:
    print(f"‚ö†Ô∏è  No images found in {raw_dir}")
    print("üì§ Please add images to 'processed_data/raw/' to process")

## 4. Process Sample Image

In [None]:
def extract_text_with_ocr(image_path, reader):
    """
    Extract text from image using EasyOCR (CRAFT + CRNN)
    
    Args:
        image_path: Path to the image file
        reader: EasyOCR reader object
        
    Returns:
        dict: Contains full_text, detections with confidence scores
    """
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Could not read image {image_path}")
        return None
    
    print(f"Processing: {image_path}")
    results = reader.readtext(image)
    
    extracted_data = {
        'image_path': image_path,
        'full_text': '',
        'detections': [],
        'timestamp': datetime.now().isoformat()
    }
    
    full_text_parts = []
    
    for detection in results:
        bbox, text, confidence = detection
        bbox = np.array(bbox, dtype=np.int32)
        
        extracted_data['detections'].append({
            'text': text,
            'confidence': float(confidence),
            'bbox': bbox.tolist()
        })
        
        full_text_parts.append(text)
    
    extracted_data['full_text'] = ' '.join(full_text_parts)
    
    print(f"  üìù Detected {len(results)} text regions")
    print(f"  üìä Average confidence: {np.mean([d['confidence'] for d in extracted_data['detections']]):.2%}")
    
    return extracted_data

def draw_detections(image_path, results):
    """
    Draw bounding boxes and text labels on image
    
    Args:
        image_path: Path to the image file
        results: Results from extract_text_with_ocr
        
    Returns:
        ndarray: Image with drawn bounding boxes
    """
    image = cv2.imread(image_path)
    image_with_boxes = image.copy()
    
    for detection in results['detections']:
        bbox = np.array(detection['bbox'], dtype=np.int32)
        text = detection['text']
        confidence = detection['confidence']
        
        # Draw bounding box
        cv2.polylines(image_with_boxes, [bbox], True, (0, 255, 0), 2)
        
        # Put text label
        label = f"{text} ({confidence:.2f})"
        cv2.putText(image_with_boxes, label, tuple(bbox[0]), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    
    return image_with_boxes

def visualize_results(image_path, results):
    """Display original image with detections"""
    image = cv2.imread(image_path)
    image_with_boxes = draw_detections(image_path, results)
    
    fig, axes = plt.subplots(1, 2, figsize=(15, 6))
    
    axes[0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[0].set_title("Original Image")
    axes[0].axis('off')
    
    axes[1].imshow(cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB))
    axes[1].set_title(f"Detections ({len(results['detections'])} regions)")
    axes[1].axis('off')
    
    plt.tight_layout()
    plt.show()

print("‚úÖ OCR functions loaded")

## 3. Text Detection and Recognition Functions

In [None]:
print("Loading EasyOCR reader (CRAFT + CRNN)...")
reader = easyocr.Reader(['en'])
print("‚úÖ OCR Reader loaded successfully")

## 2. Initialize EasyOCR (CRAFT + CRNN)

In [None]:
def preprocess_image(image_path):
    """
    Preprocess image: convert to grayscale, apply blur, and threshold
    
    Args:
        image_path: Path to the image file
        
    Returns:
        tuple: (original, gray, blurred, thresh)
    """
    # Read image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Could not read image {image_path}")
        return None, None, None, None
    
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur to remove noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply thresholding to enhance text
    _, thresh = cv2.threshold(blurred, 150, 255, cv2.THRESH_BINARY)
    
    return image, gray, blurred, thresh

def display_preprocessing_steps(image_path):
    """Display preprocessing steps"""
    image, gray, blurred, thresh = preprocess_image(image_path)
    
    if image is None:
        return
    
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    
    axes[0, 0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axes[0, 0].set_title("Original Image")
    axes[0, 0].axis('off')
    
    axes[0, 1].imshow(gray, cmap='gray')
    axes[0, 1].set_title("Grayscale")
    axes[0, 1].axis('off')
    
    axes[1, 0].imshow(blurred, cmap='gray')
    axes[1, 0].set_title("Gaussian Blur")
    axes[1, 0].axis('off')
    
    axes[1, 1].imshow(thresh, cmap='gray')
    axes[1, 1].set_title("Thresholded")
    axes[1, 1].axis('off')
    
    plt.tight_layout()
    plt.show()

print("‚úÖ Preprocessing functions loaded")

## 1. Image Preprocessing Functions

In [3]:
raw_dir = "processed_data/raw"
train_dir = "processed_data/train"

os.makedirs(train_dir, exist_ok=True)

for img_name in os.listdir(raw_dir):
    img_path = os.path.join(raw_dir, img_name)
    processed = preprocess_image(img_path)
    cv2.imwrite(os.path.join(train_dir, img_name), processed)

print("Preprocessing done!")


Preprocessing done!


In [4]:
def detect_text_regions(image):
    contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    boxes = []
    for cnt in contours:
        x,y,w,h = cv2.boundingRect(cnt)
        if w > 30 and h > 10:   # filter noise
            boxes.append((x,y,w,h))
    return boxes


In [5]:
crop_dir = "processed_data/crops"
os.makedirs(crop_dir, exist_ok=True)

for img_name in os.listdir(train_dir):
    img_path = os.path.join(train_dir, img_name)
    img = cv2.imread(img_path, 0)
    boxes = detect_text_regions(img)

    for i, (x,y,w,h) in enumerate(boxes):
        crop = img[y:y+h, x:x+w]
        crop_name = f"{img_name.split('.')[0]}_{i}.jpg"
        cv2.imwrite(os.path.join(crop_dir, crop_name), crop)

print("Cropping done!")


Cropping done!


In [6]:
labels = {}

for img in os.listdir(crop_dir):
    labels[img] = "TEXT"   # placeholder label for now

with open("processed_data/labels.json", "w") as f:
    json.dump(labels, f, indent=4)

print("labels.json created!")


labels.json created!


In [7]:
pip install pytesseract pillow

Collecting pytesseract
  Using cached pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Using cached pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13
Note: you may need to restart the kernel to use updated packages.


In [8]:
import pytesseract
from PIL import Image
import os


In [9]:
crop_dir = "processed_data/crops"

for img_name in os.listdir(crop_dir):
    img_path = os.path.join(crop_dir, img_name)
    img = Image.open(img_path)
    text = pytesseract.image_to_string(img)
    print(f"{img_name} ‚Üí {text.strip()}")
