# Inference

In [12]:
#if you have installed packages previously, just cd PaddleOCR to begin with

In [1]:
import sys
print(sys.executable)

/home/z890/.conda/envs/paddletest/bin/python


In [3]:
from paddleocr import PaddleOCR, draw_ocr

# You can set the parameter `lang` as `ch`, `en`, `french`, `german`, `korean`, `japan`
# for more languages https://paddlepaddle.github.io/PaddleOCR/main/en/ppocr/blog/multi_languages.html#4-inference-and-deployment
# PaddleOCR consist of text detection model, text recognition model and angle classifier model, which allow you to make combination
# check model zoo at https://paddlepaddle.github.io/PaddleOCR/main/en/ppocr/model_list.html#23-multilingual-recognition-modelupdating
# for more parameters https://paddlepaddle.github.io/PaddleOCR/main/en/ppocr/blog/inference_args.html
ocr = PaddleOCR(rec_model_dir='./models/en_PP-OCRv4_rec_infer/',use_angle_cls=True, lang='en',det_db_box_thresh=0.3,drop_score=0.3)
img_path = './sample_image.jpg'
result = ocr.ocr(img_path, cls=True)
print(result)

# for the first time running the script, it will download the models from url
# Default '~/.paddleocr/', that create location like  '~/.paddleocr/whl/cls'

[2025/04/30 16:29:55] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/home/z890/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.3, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='./models/en_PP-OCRv4_rec_infer/', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_length=25, rec

In [4]:
import numpy as np
import cv2
from PIL import Image, ImageDraw, ImageFont

In [5]:
# plot the detection
import numpy as np
import cv2
from PIL import Image, ImageDraw, ImageFont

# Create a white background image (adjust size as needed)
with Image.open(img_path) as img:
    img_width, img_height = img.size

# Create a white background image with same dimensions as original
img = Image.new('RGB', (img_width, img_height), (255, 255, 255))
draw = ImageDraw.Draw(img)

# Load font
# you can download from https://github.com/PaddlePaddle/PaddleOCR/tree/main/doc/fonts
font_path = None # if None, it will use default

for item in result[0]:
    text = item[1][0]
    if not text:  # Skip empty text
        continue
        
    # Get the bounding box coordinates
    box = item[0]
    x_min, y_min = box[0][0], box[0][1]
    x_max, y_max = box[2][0], box[2][1]
    
    # Calculate text width and height
    text_width = x_max - x_min
    text_height = y_max - y_min
    
    # Calculate font size based on text height
    font_size = int((text_height + text_width/len(text))*0.35)  # Adjust factor as needed

    if font_size < 10:
        font_size = 10  # Minimum font size
    if font_size > 200:
        font_size = 200 
    try:
        font = ImageFont.truetype(font_path, font_size)
    except:
        font = ImageFont.load_default(size=font_size)
    
    # Calculate text position (centered in the box)
    text_bbox = draw.textbbox((0, 0), text, font=font)
    text_w = text_bbox[2] - text_bbox[0]
    text_h = text_bbox[3] - text_bbox[1]
    
    # Calculate position to center text in the box
    x_pos = x_min + (text_width - text_w) // 2
    y_pos = y_min + (text_height - text_h) // 2
    
    # Draw the text
    draw.text((x_pos, y_pos), text, font=font, fill=(0, 0, 0))

# Save or show the image
img.save('output_text.png')
img.show()

# Export to onnx

In [6]:
import sys
import os
import subprocess
import datetime
import re
import shutil

def get_conda_env_name():
    """Get current conda environment name."""
    env_path = sys.prefix
    env_name = os.path.basename(env_path)
    return env_name

def fix_newlines(text):
    """Normalize newlines: replace CRLF or CR with LF, ensure clean line breaks."""
    return text.replace('\r\n', '\n').replace('\r', '\n')

def find_conda_bin():
    """Try to find the conda executable path."""
    possible_paths = [
        os.path.join(sys.prefix, "bin", "conda"),
        "/opt/miniconda3/bin/conda",
        os.path.expanduser("~/miniconda3/bin/conda"),
        os.path.expanduser("~/anaconda3/bin/conda"),
        shutil.which("conda")
    ]
    for path in possible_paths:
        if path and os.path.exists(path):
            return path
    raise FileNotFoundError("Could not find 'conda' executable.")

def print_divider():
    """Print a visual divider line."""
    print("\n" + "─" * 80 + "\n")

def run_in_conda_advanced(command, verbose=True, raise_on_error=True):
    """Run a command inside the current conda environment using 'conda run'."""
    env_name = get_conda_env_name()
    conda_bin = find_conda_bin()
    full_command = [conda_bin, "run", "-n", env_name] + command.split()

    timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print_divider()
    print(f"[{timestamp}] Running:\n{' '.join(full_command)}")
    print_divider()

    try:
        result = subprocess.run(
            full_command,
            check=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        output = fix_newlines(result.stdout)
        if verbose:
            print(output)
        print_divider()
        print(f"[{timestamp}] Command executed successfully.\n")
        print_divider()
        return None

    except subprocess.CalledProcessError as e:
        output = fix_newlines(e.stdout) + "\n" + fix_newlines(e.stderr)
        print_divider()
        print(f"[{timestamp}] Command failed with return code {e.returncode}\n")
        if verbose:
            print(output)
        print_divider()
        if raise_on_error:
            raise RuntimeError(f"Command failed: {output}") from None
        else:
            return None


In [7]:
run_in_conda_advanced(
    "paddle2onnx --model_dir ./models/ch_PP-OCRv4_rec_infer --model_filename inference.pdmodel --params_filename inference.pdiparams --save_file ./inference/rec_onnx/model.onnx --opset_version 11 --enable_onnx_checker True"
)


────────────────────────────────────────────────────────────────────────────────

[2025-04-30 16:30:16] Running:
/opt/miniconda3/bin/conda run -n paddletest paddle2onnx --model_dir ./models/ch_PP-OCRv4_rec_infer --model_filename inference.pdmodel --params_filename inference.pdiparams --save_file ./inference/rec_onnx/model.onnx --opset_version 11 --enable_onnx_checker True

────────────────────────────────────────────────────────────────────────────────

[Paddle2ONNX] Start parsing the Paddle model file...
[Paddle2ONNX] Use opset_version = 14 for ONNX export.
[Paddle2ONNX] PaddlePaddle model is exported as ONNX format now.
2025-04-30 16:30:18 [INFO]	Try to perform optimization on the ONNX model with onnxoptimizer.
2025-04-30 16:30:18 [INFO]	ONNX model saved in ./inference/rec_onnx/model.onnx.



────────────────────────────────────────────────────────────────────────────────

[2025-04-30 16:30:16] Command executed successfully.


────────────────────────────────────────────────────────

In [8]:
run_in_conda_advanced(
    "paddle2onnx --model_dir ./models/ch_PP-OCRv4_det_infer --model_filename inference.pdmodel --params_filename inference.pdiparams --save_file ./inference/det_onnx/model.onnx --opset_version 11 --enable_onnx_checker True"
)


────────────────────────────────────────────────────────────────────────────────

[2025-04-30 16:30:19] Running:
/opt/miniconda3/bin/conda run -n paddletest paddle2onnx --model_dir ./models/ch_PP-OCRv4_det_infer --model_filename inference.pdmodel --params_filename inference.pdiparams --save_file ./inference/det_onnx/model.onnx --opset_version 11 --enable_onnx_checker True

────────────────────────────────────────────────────────────────────────────────

[Paddle2ONNX] Start parsing the Paddle model file...
[Paddle2ONNX] Use opset_version = 14 for ONNX export.
[Paddle2ONNX] PaddlePaddle model is exported as ONNX format now.
2025-04-30 16:30:21 [INFO]	Try to perform optimization on the ONNX model with onnxoptimizer.
2025-04-30 16:30:21 [INFO]	ONNX model saved in ./inference/det_onnx/model.onnx.



────────────────────────────────────────────────────────────────────────────────

[2025-04-30 16:30:19] Command executed successfully.


────────────────────────────────────────────────────────

In [9]:
run_in_conda_advanced("paddle2onnx --model_dir ./models/ch_ppocr_mobile_v2.0_cls_infer --model_filename inference.pdmodel --params_filename inference.pdiparams --save_file ./inference/cls_onnx/model.onnx --opset_version 11 --enable_onnx_checker True"
)



────────────────────────────────────────────────────────────────────────────────

[2025-04-30 16:30:21] Running:
/opt/miniconda3/bin/conda run -n paddletest paddle2onnx --model_dir ./models/ch_ppocr_mobile_v2.0_cls_infer --model_filename inference.pdmodel --params_filename inference.pdiparams --save_file ./inference/cls_onnx/model.onnx --opset_version 11 --enable_onnx_checker True

────────────────────────────────────────────────────────────────────────────────

[Paddle2ONNX] Start parsing the Paddle model file...
[Paddle2ONNX] Use opset_version = 14 for ONNX export.
[Paddle2ONNX] PaddlePaddle model is exported as ONNX format now.
2025-04-30 16:30:22 [INFO]	Try to perform optimization on the ONNX model with onnxoptimizer.
2025-04-30 16:30:22 [INFO]	ONNX model saved in ./inference/cls_onnx/model.onnx.



────────────────────────────────────────────────────────────────────────────────

[2025-04-30 16:30:21] Command executed successfully.


───────────────────────────────────────────────

In [16]:
%run ./PaddleOCR/tools/infer/predict_system.py \
--use_gpu=False \
--use_onnx=True \
--det_model_dir=./inference/det_onnx/model.onnx \
--rec_model_dir=./inference/rec_onnx/model.onnx \
--cls_model_dir=./inference/cls_onnx/model.onnx \
--image_dir=./sample_image.jpg \
--rec_char_dict_path=./PaddleOCR/ppocr/utils/dict/ppocrv4_doc_dict.txt \
--vis_font_path=./PaddleOCR/doc/fonts/simfang.ttf \
--draw_img_save_dir=./output_sample


[2025/04/30 16:53:18] ppocr INFO: In PP-OCRv3, rec_image_shape parameter defaults to '3, 48, 320', if you are using recognition model with PP-OCRv2 or an older version, please set --rec_image_shape='3,32,320
[2025/04/30 16:53:18] ppocr DEBUG: dt_boxes num : 27, elapsed : 0.20322084426879883
[2025/04/30 16:53:19] ppocr DEBUG: rec_res num  : 27, elapsed : 0.34522271156311035
[2025/04/30 16:53:19] ppocr DEBUG: 0  Predict time of ./sample_image.jpg: 0.575s
[2025/04/30 16:53:19] ppocr DEBUG: 中华民國114年1-2月份, 0.911
[2025/04/30 16:53:19] ppocr DEBUG: 收銀機统票, 0.903
[2025/04/30 16:53:19] ppocr DEBUG: 收封胎, 0.580
[2025/04/30 16:53:19] ppocr DEBUG: HW22497914, 0.995
[2025/04/30 16:53:19] ppocr DEBUG: 大坪林三号小吃店, 0.957
[2025/04/30 16:53:19] ppocr DEBUG: NO:31433191, 0.990
[2025/04/30 16:53:19] ppocr DEBUG: 新北市新店區民授路12号1, 0.872
[2025/04/30 16:53:19] ppocr DEBUG: TEL:(02)2911-4777, 0.962
[2025/04/30 16:53:19] ppocr DEBUG: 页：1, 0.664
[2025/04/30 16:53:19] ppocr DEBUG: 2025-01-08, 0.998
[2025/04/30 16:53:19

In [None]:
# you can find the box dict in system_results.txt and an image with same name that plot the detection

# PPstructure
Paddle also offer function of document layout analysis. <br>
Try using PPStructure to turn image of a document to segments <br>

In [13]:
import os
import cv2
from paddleocr import PPStructure,draw_structure_result,save_structure_res

# PPStructure consist of layout analysis model, text detection model, text recognition model and form recognition model
# check model zoo at https://paddlepaddle.github.io/PaddleOCR/latest/en/ppstructure/models_list.html
# for more uses and settings, see https://paddlepaddle.github.io/PaddleOCR/latest/en/ppstructure/quick_start.html#223-layout-analysis
# This time, segregate the input image into images, texts and tables
table_engine = PPStructure(show_log=True, image_orientation=False)

save_folder = './output'
img_path = './PaddleOCR/tests/test_files/ppstructure/1.png'
img = cv2.imread(img_path)
result = table_engine(img)
save_structure_res(result, save_folder,os.path.basename(img_path).split('.')[0])

for line in result:
    line.pop('img')
    print(line)

from PIL import Image

font_path = "./PaddleOCR/doc/fonts/simfang.ttf"
image = Image.open(img_path).convert('RGB')
im_show = draw_structure_result(image, result,font_path=font_path)
im_show = Image.fromarray(im_show)
save_path = os.path.join(save_folder, 'result.jpg')
im_show.save(save_path)

[2025/04/30 16:47:57] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/home/z890/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/home/z890/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, 

In [15]:
import os
import cv2
from paddleocr import PPStructure,save_structure_res
from ppstructure.recovery.recovery_to_doc import sorted_layout_boxes
from ppstructure.recovery.recovery_to_markdown import convert_info_markdown

# This time, turn image to pdf markdown
table_engine = PPStructure(recovery=True, lang='en')

save_folder = './output'
img_path = 'PaddleOCR/docs/datasets/images/tablebank_demo/004.png'
img = cv2.imread(img_path)
result = table_engine(img)
save_structure_res(result, save_folder, os.path.basename(img_path).split('.')[0])

for line in result:
    line.pop('img')
    print(line)

h, w, _ = img.shape
res = sorted_layout_boxes(result, w)
convert_info_markdown(res, save_folder, os.path.basename(img_path).split('.')[0])

[2025/04/29 18:29:53] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/home/z890/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/home/z890/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, 