## Text Detection Comparison Notebook
#
### This notebook sets up a comparative evaluation of three text detection models: **CRAFT**, **DBNet**, and **PSENet**. We'll walk through:
#
#### 1.  **Setup & Imports**
#### 2.  **Utility Functions**
#### 3.  **Detector Wrappers**
#### 4.  **Data Loading**
#### 5.  **Inference Loop**
#### 6.  **Evaluation Metrics**
#### 7.  **Visualizations**
#### 8.  **Results & Discussion**
#


In [1]:
import re
log_path = 'DB/concern/log.py'
with open(log_path, 'r', encoding='utf-8') as f:
    lines = f.readlines()
new_lines = []
for line in lines:
    if re.search(r"\bos\.symlink\b", line):
        indent = re.match(r"(\s*)", line).group(1)
        new_lines.append(f"{indent}# os.symlink disabled on Windows\n")
    else:
        new_lines.append(line)
with open(log_path, 'w', encoding='utf-8') as f:
    f.writelines(new_lines)
print("Patched DB/concern/log.py to disable os.symlink calls.")

Patched DB/concern/log.py to disable os.symlink calls.


#### 1. Setup & Imports

Install or clone required repos and import common libraries.
Clone repositories (run once)

In [2]:

import os, subprocess, sys
repos = [
    ('https://github.com/kaoreill/CRAFTModel.git', 'CRAFTModel'),
    ('https://github.com/MhLiao/DB.git', 'DB'),
    ('https://github.com/whai362/PSENet.git', 'PSENet')
]
for url, folder in repos:
    if not os.path.exists(folder):
        subprocess.run(['git', 'clone', url], check=True)



#### Install Python dependencies


In [3]:
# !pip install -r CRAFTModel/requirements.txt 
# !pip install -r DB/requirements.txt
# !pip install -r PSENet/requirements.txt

import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from PIL import Image
import urllib.request

# Set global paths

data_dir    = 'data'
image_dir   = os.path.join(data_dir, 'imgsForAllPages')
gt_dir      = os.path.join(data_dir, 'annotations')
output_dir  = 'output'
compare_dir = os.path.join(output_dir, 'comparisons')
os.makedirs(output_dir, exist_ok=True)



#### 2. Utility Functions

We define common helpers for image loading, saving overlays, sorting boxes, etc.



In [4]:
from PIL import Image as PILImage

def load_image(path):
    return np.array(PILImage.open(path).convert('RGB'))

def load_ground_truth(path):
    boxes = []
    if os.path.exists(path):
        with open(path, 'r') as f:
            for line in f:
                coords = list(map(float, line.strip().split(',')))
                boxes.append(np.array(coords).reshape(-1,2).tolist())
    return boxes




#### 3. Detector Wrappers

 Each detector implements a `detect(image)` method that returns polygon lists.



In [5]:
class TextDetector:
    def __init__(self, name, checkpoint):
        self.name = name
        self.checkpoint = checkpoint

    def detect(self, image):
        raise NotImplementedError


class CRAFTDetector(TextDetector):
    def detect(self, image):
        tmp = 'tmp_craft'
        os.makedirs(tmp, exist_ok=True)
        in_path = os.path.join(tmp, 'img.jpg')
        cv2.imwrite(in_path, image)

        out_dir = 'debug_output/craft'
        subprocess.run([
            sys.executable, 'CRAFTModel/test.py',
            f'--trained_model={self.checkpoint}',
            f'--test_folder={tmp}',
            f'--result_folder={out_dir}'
        ], check=True)

        txts = glob(os.path.join(out_dir + '_sorted', '*.txt'))
        boxes = []
        for txt in txts:
            with open(txt) as f:
                for line in f:
                    coords = list(map(float, line.strip().split(',')))
                    boxes.append(np.array(coords).reshape(-1, 2).tolist())
        print(f"{self.name} predicted {len(boxes)} boxes")
        return boxes


class DBNetDetector(TextDetector):
    def detect(self, image):
        tmp = 'tmp_db'
        os.makedirs(tmp, exist_ok=True)
        in_path = os.path.join(tmp, 'img.jpg')
        cv2.imwrite(in_path, image)

        cfg = 'configs/inference_cpu_resnet18.yaml'
        subprocess.run([
            sys.executable, 'demo.py',
            cfg,
            '--image_path', in_path,
            '--resume', self.checkpoint,
            '--polygon',
            '--box_thresh', '0.6',
            '--visualize'
        ], check=True, cwd='DB')

        result_dir = os.path.join('DB', 'outputs/workspace/DB')
        boxes = []
        for root, _, files in os.walk(result_dir):
            for fn in files:
                if fn.endswith('.txt'):
                    with open(os.path.join(root, fn)) as f:
                        for line in f:
                            coords = list(map(float, line.strip().split(',')))
                            boxes.append(np.array(coords).reshape(-1, 2).tolist())
        print(f"{self.name} predicted {len(boxes)} boxes")
        return boxes


class PSENetDetector(TextDetector):
    def detect(self, image):
        tmp = 'tmp_psenet'
        os.makedirs(tmp, exist_ok=True)
        in_path = os.path.join(tmp, 'img.jpg')
        cv2.imwrite(in_path, image)

        out_dir = 'debug_output/psenet'
        env = os.environ.copy()
        env['PYTHONPATH'] = os.path.abspath('PSENet') + os.pathsep + env.get('PYTHONPATH', '')

        subprocess.run([
            sys.executable, 'test.py',
            'config/psenet/psenet_r50_ic15_736.py',
            self.checkpoint
        ], check=True, cwd='PSENet', env=env)

        boxes = []
        for txt in glob(os.path.join(out_dir + '_sorted', '*.txt')):
            with open(txt) as f:
                for line in f:
                    coords = list(map(float, line.strip().split(',')))
                    boxes.append(np.array(coords).reshape(-1, 2).tolist())
        print(f"{self.name} predicted {len(boxes)} boxes")
        return boxes


#### 4. Save Comparisons

Loop through images, generate side-by-side canvases, and save them.


In [24]:
from glob import glob

craft_ckpt  = 'CRAFTModel/weights/craft_mlt_25k.pth'
dbnet_ckpt = 'DB/weights/dbnet_resnet18_fpnc_1200e_icdar2015_20220825_221614-7c0e94f2.pth'
psenet_ckpt = 'PSENet/weights/psenet_r50_ic15_1024.pth'

# Initialize detectors (ensure this matches your previous cell)
detectors = [
    CRAFTDetector('CRAFT', craft_ckpt),
    DBNetDetector('DBNet', dbnet_ckpt),
    PSENetDetector('PSENet', psenet_ckpt)
]

# Collect .png and .jpg files
img_paths = glob(os.path.join(image_dir, '*.png')) + glob(os.path.join(image_dir, '*.jpg'))
print(f"Found {len(img_paths)} images (PNG/JPG) in {image_dir}")

for img_path in img_paths:
    print(f"Processing {img_path}")
    img = cv2.imread(img_path)
    gt_path = img_path.replace(image_dir, gt_dir).rsplit('.',1)[0] + '.txt'
    gt  = load_ground_truth(gt_path)
    # Run detectors with error handling
    preds = {}
    for d in detectors:
        try:
            preds[d.name] = d.detect(img)
        except Exception as e:
            print(f"{d.name} detection failed on {img_path}: {e}")
            preds[d.name] = []

    # Create side-by-side canvas
    h, w = img.shape[:2]
    canvas = np.zeros((h, w*4, 3), dtype=np.uint8)
    # GT overlay
    vis_gt = img.copy()
    for poly in gt:
        pts = np.array(poly, dtype=np.int32).reshape(-1,1,2)
        cv2.polylines(vis_gt, [pts], True, (0,255,0), 2)
    canvas[:, 0:w] = cv2.cvtColor(vis_gt, cv2.COLOR_BGR2RGB)
    # Model predictions
    for i, (name, boxes) in enumerate(preds.items(), start=1):
        vis = img.copy()
        for poly in boxes:
            pts = np.array(poly, dtype=np.int32).reshape(-1,1,2)
            cv2.polylines(vis, [pts], True, (0,255,0), 2)
        canvas[:, i*w:(i+1)*w] = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)

    # Save using same extension
    base, ext = os.path.splitext(os.path.basename(img_path))
    save_name = f"{base}_compare{ext}"
    save_path = os.path.join(compare_dir, save_name)
    Image.fromarray(canvas).save(save_path)
    print(f"Saved comparison image to {save_path}")


Found 31 images (PNG/JPG) in data/imgsForAllPages
Processing data/imgsForAllPages\image_1.png
CRAFT predicted 0 boxes
DBNet detection failed on data/imgsForAllPages\image_1.png: Command '['c:\\Users\\katej\\OneDrive\\Documents\\GitHub\\RenAIssance\\venv\\Scripts\\python.exe', 'demo.py', 'configs/inference_cpu_resnet18.yaml', '--image_path', 'tmp_db\\img.jpg', '--resume', 'DB/weights/dbnet_resnet18_fpnc_1200e_icdar2015_20220825_221614-7c0e94f2.pth', '--polygon', '--box_thresh', '0.6', '--visualize']' returned non-zero exit status 1.
PSENet detection failed on data/imgsForAllPages\image_1.png: Command '['c:\\Users\\katej\\OneDrive\\Documents\\GitHub\\RenAIssance\\venv\\Scripts\\python.exe', 'test.py', 'config/psenet/psenet_r50_ic15_736.py', 'PSENet/weights/psenet_r50_ic15_1024.pth']' returned non-zero exit status 1.
Saved comparison image to compare_output\image_1_compare.png
Processing data/imgsForAllPages\image_10.png


KeyboardInterrupt: 

In [6]:
import os, subprocess, sys
from glob import glob
import cv2

# 1. Pick one test image
image_dir = "data/imgsForAllPages"  # <- make sure this is defined
img_files = glob(os.path.join(image_dir, '*.png')) + glob(os.path.join(image_dir, '*.jpg'))
if not img_files:
    raise FileNotFoundError(f"No images found in {image_dir}")
test_img = img_files[0]
print("Debug on image:", test_img)

# Read the image with validation
img = cv2.imread(test_img)
if img is None:
    raise ValueError(f"Failed to read test image: {test_img}")

# 2. Prepare temp & output directories
temp_dirs = ['tmp_craft', 'tmp_db', 'tmp_psenet']
output_dirs = ['debug_output/craft', 'debug_output/dbnet', 'debug_output/psenet']

for d in temp_dirs + output_dirs:
    os.makedirs(d, exist_ok=True)

# Save image to each tmp dir
for d in temp_dirs:
    save_path = os.path.join(d, 'img.jpg')
    success = cv2.imwrite(save_path, img)
    if not success:
        raise IOError(f"Failed to write image to {save_path}")

# 3. Helper to run subprocesses
def run_and_capture(cmd, name, cwd=None, env=None):
    print(f"\n=== {name} ===")
    proc = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8', cwd=cwd, env=env)
    print("Return code:", proc.returncode)
    

# 4. Paths to model weights
craft_ckpt  = 'CRAFTModel/weights/craft_mlt_25k.pth'
dbnet_ckpt = 'weights/dbnet_resnet18_fpnc_1200e_icdar2015_20220825_221614-7c0e94f2.pth'
psenet_ckpt = 'weights/psenet_r50_ic15_1024.pth'

# 6. Run CRAFT
run_and_capture([
    sys.executable, 'CRAFTModel/test.py',
    f'--trained_model={craft_ckpt}',
 '--test_folder=tmp_craft',
    '--result_folder=debug_output/craft'
], "CRAFT")

# 5. Run DBNet 
db_demo = 'demo.py'
db_cfg  = 'configs/inference_cpu_resnet18.yaml'
run_and_capture(
    [
        sys.executable, db_demo,
        db_cfg,
        '--image_path', 'tmp_db/img.jpg',
        '--resume', dbnet_ckpt,
        '--polygon',
        '--box_thresh', '0.6',
        '--visualize'
    ],
    "DBNet",
    cwd='DB'
)

# 8. Run PSENet (inside its folder so imports resolve)
env = os.environ.copy()
env['PYTHONPATH'] = os.path.abspath('PSENet') + os.pathsep + env.get('PYTHONPATH','')
run_and_capture([
    sys.executable, 'test.py',
    'config/psenet/psenet_r50_ic15_736.py',
    psenet_ckpt,
], "PSENet", cwd='PSENet', env=env)



Debug on image: data/imgsForAllPages\image_1.png

=== CRAFT ===
Return code: 0

=== DBNet ===
Return code: 1

=== PSENet ===
Return code: 0


In [None]:
folder_path = "PSENet/config/psenet" 
for file in os.listdir(folder_path):
    print(file)


In [8]:
import os, sys, subprocess
from glob import glob
import cv2
import numpy as np
from PIL import Image

# --- Setup ---
image_dir = "data/imgsForAllPages"
compare_dir = "compare_output"
os.makedirs(compare_dir, exist_ok=True)

# Pick one test image
img_files = glob(os.path.join(image_dir, '*.png')) + glob(os.path.join(image_dir, '*.jpg'))
if not img_files:
    raise FileNotFoundError(f"No images found in {image_dir}")
test_img = img_files[0]
print("Debug on image:", test_img)
img = cv2.imread(test_img)
if img is None:
    raise ValueError(f"Failed to read image: {test_img}")

# Create temp + output folders
temp_dirs = {'CRAFT': 'tmp_craft', 'DBNet': 'tmp_db', 'PSENet': 'tmp_psenet'}
output_dirs = {'CRAFT': 'debug_output/craft', 'DBNet': 'debug_output/dbnet', 'PSENet': 'debug_output/psenet'}
for d in list(temp_dirs.values()) + list(output_dirs.values()):
    os.makedirs(d, exist_ok=True)

# Save test image to all tmp dirs
for name, tmp in temp_dirs.items():
    cv2.imwrite(os.path.join(tmp, 'img.jpg'), img)

# --- Run Models ---
def run(cmd, name, cwd=None, env=None):
    print(f"\n=== {name} ===")
    proc = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd, env=env)
    print("Return code:", proc.returncode)
    if proc.stdout.strip(): print("----- STDOUT -----\n", proc.stdout)
    if proc.stderr.strip(): print("----- STDERR -----\n", proc.stderr)

craft_ckpt = 'CRAFTModel/weights/craft_mlt_25k.pth'
dbnet_ckpt = 'weights/dbnet_resnet18_fpnc_1200e_icdar2015_20220825_221614-7c0e94f2.pth'
psenet_ckpt = 'weights/psenet_r50_ic15_1024.pth'

run([sys.executable, 'CRAFTModel/test.py',
     f'--trained_model={craft_ckpt}',
     '--test_folder=tmp_craft',
     '--result_folder=debug_output/craft'], "CRAFT")

run([sys.executable, 'demo.py',
     'configs/inference_cpu_resnet18.yaml',
     '--image_path', os.path.abspath('tmp_db/img.jpg'),
     '--resume', dbnet_ckpt,
     '--polygon', '--box_thresh', '0.6', '--visualize'],
    "DBNet", cwd='DB')

env = os.environ.copy()
env['PYTHONPATH'] = os.path.abspath('PSENet') + os.pathsep + env.get('PYTHONPATH', '')
run([sys.executable, 'test.py',
     'config/psenet/psenet_r50_ic15_736.py',
     psenet_ckpt],
    "PSENet", cwd='PSENet', env=env)

# --- Load Boxes ---
def load_boxes(out_dir):
    out_txts = glob(os.path.join(out_dir + '_sorted', '*.txt')) or glob(os.path.join(out_dir, '*.txt'))
    boxes = []
    for txt in out_txts:
        with open(txt) as f:
            for line in f:
                coords = list(map(float, line.strip().split(',')))
                boxes.append(np.array(coords).reshape(-1, 2))
    return boxes

preds = {
    'CRAFT': load_boxes(output_dirs['CRAFT']),
    'DBNet': load_boxes(output_dirs['DBNet']),
    'PSENet': load_boxes(output_dirs['PSENet']),
}

# --- Visualization ---
h, w = img.shape[:2]
canvas = np.zeros((h, w * 4, 3), dtype=np.uint8)
canvas[:, :w] = img  # original

for i, (name, boxes) in enumerate(preds.items(), start=1):
    vis = img.copy()
    for pts in boxes:
        pts = np.int32(pts).reshape((-1, 1, 2))
        cv2.polylines(vis, [pts], isClosed=True, color=(0, 255, 0), thickness=2)
    canvas[:, i*w:(i+1)*w] = vis

# Save comparison
filename = os.path.basename(test_img).rsplit('.', 1)[0] + '_compare.png'
save_path = os.path.join(compare_dir, filename)
Image.fromarray(canvas[..., ::-1]).save(save_path)
print(f"\n✅ Saved comparison image to {save_path}")


Debug on image: data/imgsForAllPages\image_1.png

=== CRAFT ===
Return code: 0
----- STDOUT -----
 Loading weights from checkpoint (CRAFTModel/weights/craft_mlt_25k.pth)
Test image 1/1: tmp_craft\img.jpg
elapsed time : 2.3046953678131104s

----- STDERR -----


=== DBNet ===
Return code: 1
----- STDOUT -----
 Resuming from weights/dbnet_resnet18_fpnc_1200e_icdar2015_20220825_221614-7c0e94f2.pth
Resumed from weights/dbnet_resnet18_fpnc_1200e_icdar2015_20220825_221614-7c0e94f2.pth

----- STDERR -----
 Traceback (most recent call last):
  File "c:\Users\katej\OneDrive\Documents\GitHub\RenAIssance\RenAIssance_CRNN_OCR_Kate_OReilly\DB\demo.py", line 147, in <module>
    main()
  File "c:\Users\katej\OneDrive\Documents\GitHub\RenAIssance\RenAIssance_CRNN_OCR_Kate_OReilly\DB\demo.py", line 43, in main
    Demo(experiment, experiment_args, cmd=args).inference(args['image_path'], args['visualize'])
  File "c:\Users\katej\OneDrive\Documents\GitHub\RenAIssance\RenAIssance_CRNN_OCR_Kate_OReilly\DB\