In [1]:
import os
import sys
import cv2
import numpy as np
import matplotlib.pyplot as plt
from types import SimpleNamespace
from pathlib import Path

# Add MindOCR paths
mindocr_root = os.path.abspath("mindocr")
mindocr_tools_path = os.path.join(mindocr_root, "tools/infer/text")
sys.path.insert(0, mindocr_root)
sys.path.insert(0, mindocr_tools_path)

# Import MindOCR modules
import mindspore as ms
from predict_system import TextSystem
from config import parse_args

# Set MindSpore context
ms.set_context(mode=0)  # Graph mode

# Test image path
TEST_IMG_PATH = 'data/HF_dataset/samples/ear-tags/sample_cow_ear_tag.png'

print(f"MindOCR root: {mindocr_root}")
print(f"Test image: {TEST_IMG_PATH}")
print(f"Image exists: {os.path.exists(TEST_IMG_PATH)}")


MindOCR root: /home/bonting/bonting-identification/mindocr
Test image: data/HF_dataset/samples/ear-tags/sample_cow_ear_tag.png
Image exists: True


In [None]:
# Create configuration arguments similar to run_mindocr_demo.sh
args = SimpleNamespace(
    # Detection settings
    det_algorithm="DB++",
    det_model_dir=None,  # Use pretrained
    det_limit_side_len=736,
    det_limit_type="min",
    det_thresh=0.3,
    det_box_thresh=0.6,
    det_unclip_ratio=1.5,
    det_use_dilation=False,
    det_score_mode="slow",
    det_box_type="quad",
    det_amp_level="O0",
    det_batch_mode=False,
    det_batch_num=8,
    
    # Recognition settings
    rec_algorithm="CRNN",
    rec_model_dir=None,  # Use pretrained
    rec_image_shape="3, 32, 320",
    rec_batch_mode=False,
    rec_batch_num=8,
    rec_amp_level="O0",
    rec_char_dict_path=None,  # Will be set automatically based on algorithm
    
    # Classification settings (disabled)
    cls_algorithm=None,
    cls_model_dir=None,
    cls_batch_num=6,
    cls_amp_level="O0",
    
    # System settings
    mode=0,  # Graph mode
    drop_score=0.5,
    warmup=False,
    visualize_output=True,
    save_crop_res=False,
    save_cls_result=False,
    
    # Output directories
    draw_img_save_dir="./mindocr_output",
    crop_res_save_dir="./mindocr_crops",
    vis_font_path=None,
    
    # Input (will be set per image)
    image_dir=TEST_IMG_PATH
)

# Create output directory
os.makedirs(args.draw_img_save_dir, exist_ok=True)

print("Configuration created successfully!")
print(f"Detection algorithm: {args.det_algorithm}")
print(f"Recognition algorithm: {args.rec_algorithm}")
print(f"Output directory: {args.draw_img_save_dir}")


Configuration created successfully!
Detection algorithm: DB++
Recognition algorithm: CRNN
Output directory: ./mindocr_output


In [3]:
# Initialize the TextSystem (combines detection and recognition)
print("Initializing MindOCR TextSystem...")
text_system = TextSystem(args)
print("TextSystem initialized successfully!")

# Load and display the test image
image = cv2.imread(TEST_IMG_PATH)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

print(f"Image shape: {image_rgb.shape}")
print(f"Image dtype: {image_rgb.dtype}")

# Display the original image
plt.figure(figsize=(12, 8))
plt.imshow(image_rgb)
plt.title(f"Original Test Image: {os.path.basename(TEST_IMG_PATH)}")
plt.axis('off')
plt.show()


Initializing MindOCR TextSystem...


`rec_image_shape` [' 32', ' 320'] dose not meet the network input requirement or is not optimal, which should be [32, None] under batch mode = False


AttributeError: 'types.SimpleNamespace' object has no attribute 'rec_char_dict_path'

In [None]:
# Run text detection only to visualize detected bounding boxes
print("Running text detection...")
det_res, data = text_system.text_detect(TEST_IMG_PATH, do_visualize=False)
polys = det_res["polys"]
scores = det_res["scores"]

print(f"Detected {len(polys)} text regions")
print(f"Detection scores: {scores}")

# Create visualization with detected bounding boxes
def draw_boxes(image, boxes, scores=None, color=(0, 255, 0), thickness=2):
    """Draw bounding boxes on image"""
    img_with_boxes = image.copy()
    for i, box in enumerate(boxes):
        # Convert to integer coordinates
        pts = box.astype(np.int32)
        cv2.polylines(img_with_boxes, [pts], True, color, thickness)
        
        # Add score text if available
        if scores is not None:
            score_text = f"{scores[i]:.3f}"
            cv2.putText(img_with_boxes, score_text, 
                       (pts[0][0], pts[0][1] - 5), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
    return img_with_boxes

# Draw detected boxes
image_with_boxes = draw_boxes(image_rgb, polys, scores)

# Display image with detected bounding boxes
plt.figure(figsize=(12, 8))
plt.imshow(image_with_boxes)
plt.title(f"Detected Text Regions ({len(polys)} boxes)")
plt.axis('off')
plt.show()

# Print detection details
for i, (poly, score) in enumerate(zip(polys, scores)):
    print(f"Box {i+1}: Score={score:.3f}, Points={poly.astype(int)}")


In [None]:
# Run the complete pipeline (detection + recognition)
print("Running complete OCR pipeline...")
boxes, text_scores, time_profile = text_system(TEST_IMG_PATH, do_visualize=False)

print(f"\nPipeline completed!")
print(f"Final results: {len(boxes)} text regions after filtering")
print(f"Time profile: {time_profile}")

# Display recognized texts
print(f"\nRecognized texts:")
for i, (text, score) in enumerate(text_scores):
    print(f"  {i+1}. '{text}' (confidence: {score:.3f})")

# Create enhanced visualization with both boxes and text
def draw_boxes_with_text(image, boxes, text_scores, box_color=(0, 255, 0), text_color=(255, 0, 0)):
    """Draw bounding boxes with recognized text"""
    img_result = image.copy()
    
    for i, (box, (text, score)) in enumerate(zip(boxes, text_scores)):
        # Draw bounding box
        pts = box.astype(np.int32)
        cv2.polylines(img_result, [pts], True, box_color, 2)
        
        # Prepare text with confidence
        display_text = f"{text} ({score:.2f})"
        
        # Find text position (above the box)
        text_x = pts[0][0]
        text_y = max(pts[0][1] - 10, 20)  # Ensure text is visible
        
        # Add background rectangle for better text visibility
        (text_width, text_height), _ = cv2.getTextSize(
            display_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1
        )
        cv2.rectangle(img_result, 
                     (text_x, text_y - text_height - 5),
                     (text_x + text_width, text_y + 5),
                     (255, 255, 255), -1)
        
        # Add text
        cv2.putText(img_result, display_text, 
                   (text_x, text_y), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, text_color, 1)
    
    return img_result

# Create final visualization
final_image = draw_boxes_with_text(image_rgb, boxes, text_scores)

plt.figure(figsize=(15, 10))
plt.imshow(final_image)
plt.title(f"Complete OCR Results: {len(text_scores)} recognized texts")
plt.axis('off')
plt.show()


In [None]:
# Performance analysis and summary
print("=" * 60)
print("MINDOCR PIPELINE SUMMARY")
print("=" * 60)

print(f"\nInput Image: {TEST_IMG_PATH}")
print(f"Image Size: {image_rgb.shape[1]}x{image_rgb.shape[0]} pixels")

print(f"\nAlgorithms Used:")
print(f"  - Detection: {args.det_algorithm}")
print(f"  - Recognition: {args.rec_algorithm}")
print(f"  - Classification: {'Disabled' if args.cls_algorithm is None else args.cls_algorithm}")

print(f"\nDetection Results:")
print(f"  - Total detected regions: {len(polys)}")
print(f"  - After confidence filtering (>{args.drop_score}): {len(boxes)}")

print(f"\nRecognition Results:")
for i, (text, score) in enumerate(text_scores):
    print(f"  {i+1:2d}. '{text}' (confidence: {score:.3f})")

if time_profile:
    print(f"\nPerformance Metrics:")
    for stage, time_ms in time_profile.items():
        print(f"  - {stage.capitalize()}: {time_ms:.3f}s")
    
    total_time = time_profile.get('all', sum(time_profile.values()))
    print(f"  - Total: {total_time:.3f}s")

print(f"\nOutput saved to: {args.draw_img_save_dir}")
print("\nPipeline demonstration completed successfully!")
