# ü™™ ID Card OCR ‚Äî PaddleOCR + OmniMRZ

This notebook combines:
- **PaddleOCR** ‚Äì general-purpose text detection & recognition on the card
- **OmniMRZ** ‚Äì structured parsing of the Machine Readable Zone (MRZ)

Supported documents: passports, national ID cards, visas, driver's licences with an MRZ strip.

## 1 ¬∑ Install Dependencies

In [None]:
# Run once ‚Äî restart the kernel after installation
!pip install paddlepaddle paddleocr omnimrz opencv-python pillow numpy matplotlib -q

# ‚îÄ‚îÄ GPU users: replace paddlepaddle with: ‚îÄ‚îÄ
# !pip install paddlepaddle-gpu paddleocr omnimrz opencv-python pillow numpy matplotlib -q

## 2 ¬∑ Imports

In [None]:
import cv2
import json
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from pathlib import Path
from PIL import Image
from dataclasses import dataclass, field
from typing import Optional

from paddleocr import PaddleOCR
from omnimrz import MRZReader

print("‚úÖ All imports successful")

## 3 ¬∑ Data Classes

In [None]:
@dataclass
class MRZData:
    raw_lines:       list[str]      = field(default_factory=list)
    document_type:   Optional[str]  = None
    country_code:    Optional[str]  = None
    surname:         Optional[str]  = None
    given_names:     Optional[str]  = None
    document_number: Optional[str]  = None
    nationality:     Optional[str]  = None
    date_of_birth:   Optional[str]  = None
    sex:             Optional[str]  = None
    expiry_date:     Optional[str]  = None
    personal_number: Optional[str]  = None
    valid:           bool           = False


@dataclass
class IDCardResult:
    image_path:     str
    raw_ocr_text:   list[dict]   # PaddleOCR detections with bbox + confidence
    full_text:      str          # All OCR text joined top-to-bottom
    mrz:            MRZData      # Structured MRZ fields
    confidence_avg: float = 0.0

print("‚úÖ Data classes defined")

## 4 ¬∑ Image Preprocessing

In [None]:
def preprocess_image(image_path: str) -> np.ndarray:
    """Load, upscale if needed, denoise and sharpen the image."""
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Cannot load image: {image_path}")

    # Upscale small images
    h, w = img.shape[:2]
    if max(h, w) < 1000:
        scale = 1000 / max(h, w)
        img = cv2.resize(img, None, fx=scale, fy=scale,
                         interpolation=cv2.INTER_CUBIC)

    # Denoise
    img = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)

    # Sharpen
    kernel = np.array([[0, -1,  0],
                       [-1,  5, -1],
                       [0, -1,  0]])
    img = cv2.filter2D(img, -1, kernel)
    return img


def extract_mrz_region(img: np.ndarray) -> np.ndarray:
    """Crop the bottom 25% of the card ‚Äî typical MRZ location."""
    h, w = img.shape[:2]
    return img[int(h * 0.75):h, 0:w]


def show_image(img: np.ndarray, title: str = "") -> None:
    """Display a BGR OpenCV image inline."""
    rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(10, 6))
    plt.imshow(rgb)
    plt.axis("off")
    if title:
        plt.title(title, fontsize=13)
    plt.tight_layout()
    plt.show()

print("‚úÖ Preprocessing helpers defined")

## 5 ¬∑ Initialise OCR Engines

> Models are downloaded automatically on first run (~200 MB total). This cell may take a minute.

In [None]:
# ‚îÄ‚îÄ PaddleOCR ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
paddle_ocr = PaddleOCR(
    use_angle_cls=True,   # auto-rotate tilted text
    lang="en",            # change to your document language if needed
    use_gpu=False,        # set True if CUDA is available
    det_db_thresh=0.3,    # lower = detect more (potentially noisy) text
    rec_batch_num=6,
    show_log=False,
)

# ‚îÄ‚îÄ OmniMRZ ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
mrz_reader = MRZReader()

print("‚úÖ PaddleOCR and OmniMRZ ready")

## 6 ¬∑ Core OCR Functions

In [None]:
def run_paddle_ocr(img: np.ndarray) -> tuple[list[dict], float]:
    """
    Run PaddleOCR and return:
      - list of {text, confidence, bbox} dicts
      - average confidence score
    """
    result = paddle_ocr.ocr(img, cls=True)
    detections, confidences = [], []

    if result and result[0]:
        for line in result[0]:
            bbox, (text, conf) = line
            detections.append({
                "text":       text,
                "confidence": round(float(conf), 4),
                "bbox":       bbox,   # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
            })
            confidences.append(conf)

    avg_conf = float(np.mean(confidences)) if confidences else 0.0
    return detections, avg_conf


def run_mrz(img: np.ndarray) -> MRZData:
    """
    Run OmniMRZ on the full image; fall back to the bottom-strip crop.
    Returns a populated MRZData object.
    """

    def _parse(image: np.ndarray) -> Optional[MRZData]:
        pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        res = mrz_reader.read(pil_img)
        if res is None:
            return None
        f = res.fields or {}
        return MRZData(
            raw_lines       = res.mrz_lines or [],
            valid           = res.valid,
            document_type   = f.get("document_type"),
            country_code    = f.get("country"),
            surname         = f.get("surname"),
            given_names     = f.get("given_names"),
            document_number = f.get("document_number"),
            nationality     = f.get("nationality"),
            date_of_birth   = f.get("birth_date"),
            sex             = f.get("sex"),
            expiry_date     = f.get("expiry_date"),
            personal_number = f.get("personal_number"),
        )

    # Try full image
    result = _parse(img)
    if result and result.valid:
        return result

    # Fallback: bottom strip
    result = _parse(extract_mrz_region(img))
    return result if result else MRZData()


def process_id_card(image_path: str) -> IDCardResult:
    """Full pipeline: preprocess ‚Üí PaddleOCR ‚Üí OmniMRZ ‚Üí IDCardResult."""
    img = preprocess_image(image_path)

    detections, avg_conf = run_paddle_ocr(img)

    # Sort detections top-to-bottom for readable text
    sorted_dets = sorted(detections, key=lambda d: d["bbox"][0][1])
    full_text   = "\n".join(d["text"] for d in sorted_dets)

    mrz = run_mrz(img)

    return IDCardResult(
        image_path     = image_path,
        raw_ocr_text   = detections,
        full_text      = full_text,
        mrz            = mrz,
        confidence_avg = avg_conf,
    )

print("‚úÖ Core OCR functions defined")

## 7 ¬∑ Visualisation Helpers

In [None]:
def draw_ocr_boxes(img: np.ndarray, detections: list[dict],
                   conf_threshold: float = 0.5) -> None:
    """Draw PaddleOCR bounding boxes and labels on the image."""
    rgb  = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    fig, ax = plt.subplots(figsize=(12, 8))
    ax.imshow(rgb)

    for det in detections:
        if det["confidence"] < conf_threshold:
            continue
        pts = np.array(det["bbox"], dtype=np.float32)
        poly = patches.Polygon(pts, closed=True,
                               edgecolor="#00aaff", linewidth=1.5,
                               facecolor="none")
        ax.add_patch(poly)
        ax.text(pts[0][0], pts[0][1] - 4,
                f"{det['text']}  ({det['confidence']:.0%})",
                color="#00aaff", fontsize=7,
                bbox=dict(boxstyle="round,pad=0.15",
                          facecolor="black", alpha=0.55, edgecolor="none"))

    ax.axis("off")
    ax.set_title(f"PaddleOCR detections  (threshold ‚â• {conf_threshold:.0%})",
                 fontsize=12)
    plt.tight_layout()
    plt.show()


def print_result(result: IDCardResult) -> None:
    """Pretty-print the full IDCardResult."""
    sep = "‚îÄ" * 58
    print(f"\n{sep}")
    print(f"  FILE : {Path(result.image_path).name}")
    print(f"  OCR avg confidence : {result.confidence_avg:.1%}")
    print(sep)

    print("\nüìù All OCR Detections:")
    for det in sorted(result.raw_ocr_text, key=lambda d: d["bbox"][0][1]):
        bar = "‚ñà" * int(det["confidence"] * 20)
        print(f"  [{det['confidence']:.2f}] {bar:<20}  {det['text']}")

    print(f"\nüìÑ Full Extracted Text:\n{result.full_text}")

    m = result.mrz
    print(f"\nüîñ MRZ Data (checksum valid = {m.valid}):")
    if m.raw_lines:
        for ln in m.raw_lines:
            print(f"   {ln}")
    fields = [
        ("Document type",   m.document_type),
        ("Country",         m.country_code),
        ("Surname",         m.surname),
        ("Given names",     m.given_names),
        ("Document No.",    m.document_number),
        ("Nationality",     m.nationality),
        ("Date of birth",   m.date_of_birth),
        ("Sex",             m.sex),
        ("Expiry date",     m.expiry_date),
        ("Personal number", m.personal_number),
    ]
    for label, val in fields:
        if val:
            print(f"   {label:<18}: {val}")
    print(sep)

print("‚úÖ Visualisation helpers defined")

## 8 ¬∑ Run on a Single Image

Set `IMAGE_PATH` to the path of your ID card image.

In [None]:
# ‚îÄ‚îÄ ‚úèÔ∏è  Set your image path here ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
IMAGE_PATH = "sample_id.jpg"   # ‚Üê change to your file
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

img = preprocess_image(IMAGE_PATH)
show_image(img, title="Preprocessed ID Card")

In [None]:
# Run the full pipeline
result = process_id_card(IMAGE_PATH)

# Show bounding boxes
draw_ocr_boxes(img, result.raw_ocr_text, conf_threshold=0.5)

# Print structured results
print_result(result)

## 9 ¬∑ Inspect the MRZ Strip

In [None]:
mrz_strip = extract_mrz_region(img)
show_image(mrz_strip, title="MRZ Region (bottom 25%)")

## 10 ¬∑ Batch Processing

In [None]:
# ‚îÄ‚îÄ ‚úèÔ∏è  List all images to process ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
IMAGE_PATHS = [
    "sample_id.jpg",
    # "passport.png",
    # "drivers_licence.jpg",
]
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

batch_results = []
for path in IMAGE_PATHS:
    try:
        r = process_id_card(path)
        print_result(r)
        batch_results.append(r)
    except Exception as e:
        print(f"‚ö†Ô∏è  {path}: {e}")

print(f"\n‚úÖ Processed {len(batch_results)} / {len(IMAGE_PATHS)} images")

## 11 ¬∑ Export Results to JSON

In [None]:
OUTPUT_JSON = "ocr_results.json"

export = []
for r in batch_results:
    export.append({
        "image":            r.image_path,
        "confidence_avg":   round(r.confidence_avg, 4),
        "full_text":        r.full_text,
        "ocr_detections":  [
            {"text": d["text"], "confidence": d["confidence"]}
            for d in r.raw_ocr_text
        ],
        "mrz": {
            "valid":           r.mrz.valid,
            "raw_lines":       r.mrz.raw_lines,
            "document_type":   r.mrz.document_type,
            "country":         r.mrz.country_code,
            "surname":         r.mrz.surname,
            "given_names":     r.mrz.given_names,
            "document_number": r.mrz.document_number,
            "nationality":     r.mrz.nationality,
            "date_of_birth":   r.mrz.date_of_birth,
            "sex":             r.mrz.sex,
            "expiry_date":     r.mrz.expiry_date,
            "personal_number": r.mrz.personal_number,
        },
    })

with open(OUTPUT_JSON, "w") as f:
    json.dump(export, f, indent=2, ensure_ascii=False)

print(f"‚úÖ Saved ‚Üí {OUTPUT_JSON}")
print(json.dumps(export[0] if export else {}, indent=2))