# Text Detection Comparison with MMOCR

This notebook uses [MMOCR](https://github.com/open-mmlab/mmocr) to run and compare three text detectors—**CRAFT**, **DBNet**, and **PSENet**—in pure Python on Windows VSCode.

Steps:
1. Install dependencies
2. Setup paths & imports
3. Utility functions
4. Initialize MMOCR detectors
5. Inference & qualitative comparison
6. Save side-by-side canvases



In [8]:
# 1. Install required packages (run once)
# Uncomment and run if needed:
# %pip install mmocr gevent-websocket munch anyconfig polygon3
# %pip install torch torchvision torchaudio

# 2. Imports & Global Paths
import os
import cv2
import numpy as np
import pandas as pd
from glob import glob
from PIL import Image
# For MMOCR v0.x (legacy API)
# For pure text‐detection
from mmocr.apis.inferencers.textdet_inferencer import TextDetInferencer

# Or for end‐to‐end spot+recognize
# from mmocr.apis.inferencers.textspot_inferencer import TextSpotInferencer  

# For MMOCR v1.x+, use:
# from mmocr.apis import MMOCRInferencer

# Data folders
base_dir    = os.getcwd()
data_dir    = os.path.join(base_dir, 'data')
image_dir   = os.path.join(data_dir, 'imgsForAllPages')
gt_dir      = os.path.join(data_dir, 'annotations')
output_dir  = os.path.join(base_dir, 'output')
compare_dir = os.path.join(output_dir, 'comparisons')

os.makedirs(compare_dir, exist_ok=True)


## 3. Utility Functions

Load images and ground-truth polygons.


In [9]:

def load_image(path):
    """Load an image (BGR) as numpy array."""
    return cv2.imread(path)


def load_ground_truth(path):
    """Load GT polygons from TXT (8 coords per line)."""
    polys = []
    if os.path.exists(path):
        with open(path, 'r') as f:
            for line in f:
                coords = list(map(float, line.strip().split(',')))
                polys.append(np.array(coords).reshape(-1,2).tolist())
    return polys



## 4. Initialize MMOCR Detectors

We point to MMOCR's config files and local checkpoints.



In [16]:

# %pip install mmcv-full --find-links https://download.openmmlab.com/mmcv/dist/cpu/torch1.13.1/index.html

# 1) Initialize one inferencer per model
 # craft_inf = TextDetInferencer(
  #  model='mmocr/configs/textdet/craft/craft_mlt_25k.py',
   # weights='CRAFTModel/weights/craft_mlt_25k.pth',
    #device='cpu'
#)
cfg_path = 'mmocr/configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py'
dbnet_inf = TextDetInferencer(
    model=cfg_path,
    weights='DB/weights/dbnetpp_resnet50_fpnc_1200e_icdar2015_20221025_185550-013730aa.pth',
    device='cpu'
)

psenet_inf = TextDetInferencer(
    model='mmocr/configs/textdet/psenet/psenet_resnet50_fpnf_600e_ctw1500.py',
    weights='PSENet/weights/psenet_resnet50_fpnf_600e_ctw1500.pth',
    device='cpu'
)

# 2) Run inference with a single call
img = cv2.imread('data/imgsForAllPages/image_1.png')
res = craft_inf(img)    # res is a dict with keys 'polygons', 'bboxes', 'scores'

polygons = res['polygons']   # list of Nx2 coords for each detected instance
bboxes   = res['bboxes']     # list of [x1, y1, x2, y2]
scores   = res['scores']     # confidence per detection

# 3) Quick example of drawing them
vis = img.copy()
for poly in polygons:
    pts = np.array(poly, dtype=np.int32).reshape(-1,1,2)
    cv2.polylines(vis, [pts], True, (255,0,0), 2)
cv2.imwrite('craft_overlay.png', vis)


Loads checkpoint by local backend from path: DB/weights/dbnetpp_resnet50_fpnc_1200e_icdar2015_20221025_185550-013730aa.pth


ModuleNotFoundError: No module named 'mmcv._ext'


## 5. Sample Inference & Visualization

Run all three detectors on one example and display side-by-side.



In [None]:
import cv2, numpy as np

# Load one image
img = cv2.imread('data/imgsForAllPages/image_1.png')

# Run each detector
craft_polys  = craft_det(img)['boundary_result']
dbnet_polys  = dbnet_det(img)['boundary_result']
psenet_polys = psenet_det(img)['boundary_result']

# Convert to list of Nx2 coordinates
def to_polys(boundary):
    return [np.array(p).reshape(-1,2).tolist() for p in boundary]

craft_polys  = to_polys(craft_polys)
dbnet_polys  = to_polys(dbnet_polys)
psenet_polys = to_polys(psenet_polys)



## 6. Batch Inference & Save Comparisons

Loop through all images and save side-by-side comparisons.


In [None]:
h,w = img.shape[:2]
canvas = np.zeros((h, w*4, 3), dtype=np.uint8)

# GT
gt = load_ground_truth('data/annotations/image_1.txt')
vis = img.copy()
for poly in gt:
    cv2.polylines(vis, [np.array(poly, np.int32).reshape(-1,1,2)], True, (0,255,0),2)
canvas[:, :w] = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)

# CRAFT, DBNet, PSENet
for i,(name, polys) in enumerate([
        ('CRAFT', craft_polys),
        ('DBNet', dbnet_polys),
        ('PSENet', psenet_polys)
    ], start=1):
    vis = img.copy()
    for poly in polys:
        cv2.polylines(vis, [np.array(poly, np.int32).reshape(-1,1,2)], True, (255,0,0),2)
    canvas[:, i*w:(i+1)*w] = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)

# Save
from PIL import Image
Image.fromarray(canvas).save('output/comparisons/image_1_compare.png')
print("Saved comparison image.")
