## Text Detection Comparison Notebook
#
### This notebook sets up a comparative evaluation of three text detection models: **CRAFT**, **DBNet**, and **PSENet**. We'll walk through:
#
#### 1.  **Setup & Imports**
#### 2.  **Utility Functions**
#### 3.  **Detector Wrappers**
#### 4.  **Data Loading**
#### 5.  **Inference Loop**
#### 6.  **Evaluation Metrics**
#### 7.  **Visualizations**
#### 8.  **Results & Discussion**
#


In [1]:
import re
log_path = 'DB/concern/log.py'
with open(log_path, 'r', encoding='utf-8') as f:
    lines = f.readlines()
new_lines = []
for line in lines:
    if re.search(r"\bos\.symlink\b", line):
        indent = re.match(r"(\s*)", line).group(1)
        new_lines.append(f"{indent}# os.symlink disabled on Windows\n")
    else:
        new_lines.append(line)
with open(log_path, 'w', encoding='utf-8') as f:
    f.writelines(new_lines)
print("Patched DB/concern/log.py to disable os.symlink calls.")

Patched DB/concern/log.py to disable os.symlink calls.


#### 1. Setup & Imports

Install or clone required repos and import common libraries.
Clone repositories (run once)

In [2]:

import os, subprocess, sys
repos = [
    ('https://github.com/kaoreill/CRAFTModel.git', 'CRAFTModel'),
    ('https://github.com/MhLiao/DB.git', 'DB'),
    ('https://github.com/whai362/PSENet.git', 'PSENet')
]
for url, folder in repos:
    if not os.path.exists(folder):
        subprocess.run(['git', 'clone', url], check=True)



#### Install Python dependencies


In [3]:
# !pip install -r CRAFTModel/requirements.txt 
# !pip install -r DB/requirements.txt
# !pip install -r PSENet/requirements.txt

import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from PIL import Image
import urllib.request

# Set global paths

data_dir    = 'data'
image_dir   = os.path.join(data_dir, 'imgsForAllPages')
gt_dir      = os.path.join(data_dir, 'annotations')
output_dir  = 'output'
compare_dir = os.path.join(output_dir, 'comparisons')
os.makedirs(output_dir, exist_ok=True)



#### 2. Utility Functions

We define common helpers for image loading, saving overlays, sorting boxes, etc.



In [4]:
from PIL import Image as PILImage

def load_image(path):
    return np.array(PILImage.open(path).convert('RGB'))

def load_ground_truth(path):
    boxes = []
    if os.path.exists(path):
        with open(path, 'r') as f:
            for line in f:
                coords = list(map(float, line.strip().split(',')))
                boxes.append(np.array(coords).reshape(-1,2).tolist())
    return boxes




#### 3. Detector Wrappers

 Each detector implements a `detect(image)` method that returns polygon lists.



In [5]:



class TextDetector:
    def __init__(self, name, checkpoint):
        self.name = name; self.checkpoint = checkpoint
    def detect(self, image): raise NotImplementedError

class CRAFTDetector(TextDetector):
    def detect(self, image):
        tmp = 'tmp_craft'; os.makedirs(tmp, exist_ok=True)
        in_path = os.path.join(tmp, 'img.jpg'); cv2.imwrite(in_path, image)
        out = os.path.join(output_dir, 'craft');
        subprocess.run([sys.executable, 'CRAFTModel/test.py',
                        f'--trained_model={self.checkpoint}',
                        f'--test_folder={tmp}', f'--output_dir={out}'], check=True)
        txts = glob(os.path.join(out + '_sorted', '*.txt'))
        boxes=[]
        for txt in txts:
            with open(txt) as f:
                for l in f: coords=list(map(float,l.split(','))); boxes.append(np.array(coords).reshape(-1,2).tolist())
        return boxes

class DBNetDetector(TextDetector):
    

    def detect(self, image):
        # 1) Save temp image
        tmp = 'tmp_db'
        os.makedirs(tmp, exist_ok=True)
        in_path = os.path.join(tmp, 'img.jpg')
        cv2.imwrite(in_path, image)

        # 2) Run the demo script from inside DB/
        demo_script = 'demo.py'
        cfg = 'experiments/seg_detector/ic15_resnet50_deform_thre.yaml'
        out = 'outputs/workspace/DB'
        try:
            subprocess.run([
                sys.executable, demo_script,
                cfg,
                '--image_path', in_path,
                '--resume', self.checkpoint,
                '--polygon',
                '--box_thresh', '0.6',
                '--visualize'
            ], check=True, cwd='DB')
        except subprocess.CalledProcessError as e:
            # If it's that WinError 1314 (privilege), swallow it
            if e.returncode != 0 and 'required privilege' in str(e):
                print("⚠️ DBNet demo rename failed due to Windows privileges—continuing anyway.")
            else:
                raise

        # 3) Now load whatever boxes were written into DB/outputs/workspace/DB/*.txt
        boxes = []
        base_out = os.path.join('DB', out)
        # there should be one subfolder named by experiment, e.g. 'DB'
        for root, _, files in os.walk(base_out):
            for fn in files:
                if fn.endswith('.txt'):
                    with open(os.path.join(root, fn)) as f:
                        for line in f:
                            coords = list(map(float, line.strip().split(',')))
                            boxes.append(np.array(coords).reshape(-1,2).tolist())
        return boxes


class PSENetDetector(TextDetector):
    def detect(self, image):
        tmp='tmp_psenet'; os.makedirs(tmp, exist_ok=True)
        in_path=os.path.join(tmp,'img.jpg'); cv2.imwrite(in_path,image)
        out=os.path.join(output_dir,'psenet')
        subprocess.run([sys.executable,'PSENet/test.py','--trained_model',self.checkpoint,'--test_folder',tmp,'--output_dir',out],check=True)
        boxes=[]
        for txt in glob(os.path.join(out+'_sorted','*.txt')):
            with open(txt) as f:
                for l in f: coords=list(map(float,l.split(','))); boxes.append(np.array(coords).reshape(-1,2).tolist())
        return boxes

#### 4. Save Comparisons

Loop through images, generate side-by-side canvases, and save them.


In [7]:
from glob import glob

# Initialize detectors (ensure this matches your previous cell)
detectors = [
    CRAFTDetector('CRAFT','CRAFTModel/weights/craft_mlt_25k.pth'),
    DBNetDetector('DBNet','DB/weights/dbnetpp_resnet50_fpnc_1200e_icdar2015.pth'),
    PSENetDetector('PSENet','PSENet/weights/psenet_resnet50_fpnf_600e_ctw1500.pth')
]

# Collect .png and .jpg files
img_paths = glob(os.path.join(image_dir, '*.png')) + glob(os.path.join(image_dir, '*.jpg'))
print(f"Found {len(img_paths)} images (PNG/JPG) in {image_dir}")

for img_path in img_paths:
    print(f"Processing {img_path}")
    img = cv2.imread(img_path)
    gt_path = img_path.replace(image_dir, gt_dir).rsplit('.',1)[0] + '.txt'
    gt  = load_ground_truth(gt_path)
    # Run detectors with error handling
    preds = {}
    for d in detectors:
        try:
            preds[d.name] = d.detect(img)
        except Exception as e:
            print(f"{d.name} detection failed on {img_path}: {e}")
            preds[d.name] = []

    # Create side-by-side canvas
    h, w = img.shape[:2]
    canvas = np.zeros((h, w*4, 3), dtype=np.uint8)
    # GT overlay
    vis_gt = img.copy()
    for poly in gt:
        pts = np.array(poly, dtype=np.int32).reshape(-1,1,2)
        cv2.polylines(vis_gt, [pts], True, (0,255,0), 2)
    canvas[:, 0:w] = cv2.cvtColor(vis_gt, cv2.COLOR_BGR2RGB)
    # Model predictions
    for i, (name, boxes) in enumerate(preds.items(), start=1):
        vis = img.copy()
        for poly in boxes:
            pts = np.array(poly, dtype=np.int32).reshape(-1,1,2)
            cv2.polylines(vis, [pts], True, (0,255,0), 2)
        canvas[:, i*w:(i+1)*w] = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)

    # Save using same extension
    base, ext = os.path.splitext(os.path.basename(img_path))
    save_name = f"{base}_compare{ext}"
    save_path = os.path.join(compare_dir, save_name)
    Image.fromarray(canvas).save(save_path)
    print(f"Saved comparison image to {save_path}")


Found 31 images (PNG/JPG) in data\imgsForAllPages
Processing data\imgsForAllPages\image_1.png
CRAFT detection failed on data\imgsForAllPages\image_1.png: Command '['c:\\Users\\katej\\OneDrive\\Documents\\GitHub\\RenAIssance\\venv\\Scripts\\python.exe', 'CRAFTModel/test.py', '--trained_model=CRAFTModel/weights/craft_mlt_25k.pth', '--test_folder=tmp_craft', '--output_dir=output\\craft']' returned non-zero exit status 2.
DBNet detection failed on data\imgsForAllPages\image_1.png: Command '['c:\\Users\\katej\\OneDrive\\Documents\\GitHub\\RenAIssance\\venv\\Scripts\\python.exe', 'demo.py', 'experiments/seg_detector/ic15_resnet50_deform_thre.yaml', '--image_path', 'tmp_db\\img.jpg', '--resume', 'DB/weights/dbnetpp_resnet50_fpnc_1200e_icdar2015.pth', '--polygon', '--box_thresh', '0.6', '--visualize']' returned non-zero exit status 1.
PSENet detection failed on data\imgsForAllPages\image_1.png: Command '['c:\\Users\\katej\\OneDrive\\Documents\\GitHub\\RenAIssance\\venv\\Scripts\\python.exe', '

KeyboardInterrupt: 

In [None]:
print(" Entering debug cell")

# %%
import os, subprocess, sys
from glob import glob
import cv2

# 1. Pick one test image
img_files = glob(os.path.join(image_dir, '*.png')) + glob(os.path.join(image_dir, '*.jpg'))
if not img_files:
    raise FileNotFoundError(f"No images found in {image_dir}")
test_img = img_files[0]
print("Debug on image:", test_img)

# 2. Prepare temp & output directories
for d in ['tmp_craft','tmp_db','tmp_psenet','debug_output/craft','debug_output/dbnet','debug_output/psenet']:
    os.makedirs(d, exist_ok=True)
for d in ['tmp_craft','tmp_db','tmp_psenet']:
    cv2.imwrite(os.path.join(d,'img.jpg'), cv2.imread(test_img))

# 4. Helper to run & capture logs
def run_and_capture(cmd, name, cwd=None, env=None):
    print(f"\n=== {name} ===")
    proc = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd, env=env)
    print("Return code:", proc.returncode)
    print("----- STDOUT -----")
    print(proc.stdout or "(none)")
    print("----- STDERR -----")
    print(proc.stderr or "(none)")

# 5. Paths to weights
craft_ckpt  = 'CRAFTModel/weights/craft_mlt_25k.pth'
dbnet_ckpt  = 'DB/weights/dbnetpp_resnet50_fpnc_1200e_icdar2015.pth'
psenet_ckpt = 'PSENet/weights/psenet_resnet50_fpnf_600e_ctw1500.pth'

# 6. Run CRAFT
#run_and_capture([
 #   sys.executable, 'CRAFTModel/test.py',
  #  f'--trained_model={craft_ckpt}',
   # '--test_folder=tmp_craft',
   # '--result_folder=debug_output/craft'
#], "CRAFT")

# 7. Run DBNet via demo.py
db_demo = 'demo.py'
db_cfg  = 'configs/inference_cpu_resnet18.yaml'  # use your actual patched file


run_and_capture(
    [
        sys.executable, db_demo,
        db_cfg,
        '--image_path', 'tmp_db/img.jpg',
        '--resume', dbnet_ckpt,
        '--polygon',
        '--box_thresh', '0.6',
        '--visualize'
    ],
    "DBNet",
    cwd='DB'   # <— run from the DB directory so 'experiments/...' is found
)

# 8. Run PSENet (inside its folder so imports resolve)
# print("\n=== PSENet (with PYTHONPATH fix) ===")
# env = os.environ.copy()
# env['PYTHONPATH'] = os.path.abspath('PSENet') + os.pathsep + env.get('PYTHONPATH','')
# run_and_capture([
#    sys.executable, 'test.py',
#    f'--trained_model={psenet_ckpt}',
#    '--test_folder=tmp_psenet',
#    '--output_dir=debug_output/psenet'
#], "PSENet", cwd='PSENet', env=env)

 Entering debug cell
Debug on image: data\imgsForAllPages\image_1.png

=== DBNet ===
Return code: 1
----- STDOUT -----
verbose: True
Initializing log dir for workspace\SegDetectorModel-seg_detector\resnet18\L1BalanceCELoss

----- STDERR -----
Traceback (most recent call last):
  File "c:\Users\katej\OneDrive\Documents\GitHub\RenAIssance\RenAIssance_CRNN_OCR_Kate_OReilly\DB\demo.py", line 147, in <module>
    main()
  File "c:\Users\katej\OneDrive\Documents\GitHub\RenAIssance\RenAIssance_CRNN_OCR_Kate_OReilly\DB\demo.py", line 41, in main
    experiment = Configurable.construct_class_from_config(experiment_args)
  File "c:\Users\katej\OneDrive\Documents\GitHub\RenAIssance\RenAIssance_CRNN_OCR_Kate_OReilly\DB\concern\config.py", line 132, in construct_class_from_config
    return cls(**args)
  File "c:\Users\katej\OneDrive\Documents\GitHub\RenAIssance\RenAIssance_CRNN_OCR_Kate_OReilly\DB\experiment.py", line 96, in __init__
    self.load_all(**kwargs)
  File "c:\Users\katej\OneDrive\Docu

In [12]:
folder_path = "db/experiments/seg_detector" 
for filename in os.listdir(folder_path):
    print(filename)


base.yaml
base_ic15.yaml
base_td500.yaml
base_totaltext.yaml
ic15_resnet18_deform_thre.yaml
ic15_resnet50_deform_thre.yaml
td500_resnet18_deform_thre.yaml
td500_resnet50_deform_thre.yaml
totaltext_mobilenet_v3_large_thre.yaml
totaltext_resnet18_deform_thre.yaml
totaltext_resnet50_deform_thre.yaml
