<a href="https://colab.research.google.com/github/mogylnyy/Meters-Reader-YOLOv8-OpenCV/blob/main/yolo_ocr_pipeline_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [139]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
from paddleocr import PaddleOCR
from inference_sdk import InferenceHTTPClient

# === Настройки
img_path = "/content/test_images/7.jpg"
output_dir = "/content/digits"
os.makedirs(output_dir, exist_ok=True)

# === Функция затемнения ROI
def darken(image, factor=0.75):
    return np.clip(image * factor, 0, 255).astype(np.uint8)

# === 0. Загрузка изображения
img_bgr = cv2.imread(img_path)
if img_bgr is None:
    raise FileNotFoundError("❌ Изображение не найдено.")
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
cv2.imwrite("/content/log_0_original.jpg", img_bgr)

# === 1. ROI через YOLOv5
model_v5 = torch.hub.load('/content/yolov5', 'custom', path='/content/best_yolov5_roi.pt', source='local')
results = model_v5(img_rgb)
boxes = results.xyxy[0].cpu().numpy()
if len(boxes) == 0:
    raise ValueError("❌ YOLOv5 не нашёл ROI.")
x1, y1, x2, y2 = map(int, boxes[0][:4])
roi = img_bgr[y1:y2, x1:x2]
cv2.imwrite("/content/log_1_roi.jpg", roi)

# === 2. Затемнение ROI
roi_darker = darken(roi)
cv2.imwrite("/content/log_2_roi_darker.jpg", roi_darker)

# === 3. Roboflow API (детекция цифр)
CLIENT = InferenceHTTPClient(api_url="https://serverless.roboflow.com", api_key="mcwv1YGrhPuWimIK1Vhl")
cv2.imwrite("/content/tmp_roi.jpg", roi_darker)
result = CLIENT.infer("/content/tmp_roi.jpg", model_id="digitdetector-unbek/1")
preds = result.get("predictions", [])
if preds:
    median_y = np.median([p["y"] for p in preds])
    preds = [p for p in preds if p["y"] >= median_y * 0.9]

# === 4. Лог bbox
img_out = roi.copy()
for p in preds:
    x, y = int(p["x"]), int(p["y"])
    w, h = int(p["width"] // 2), int(p["height"] // 2)
    x1, y1, x2, y2 = x - w, y - h, x + w, y + h
    cv2.rectangle(img_out, (x1, y1), (x2, y2), (255, 0, 255), 2)
cv2.imwrite("/content/log_3_bbox_filtered.jpg", img_out)

# === 5. Вырезка и сортировка
digit_imgs = []
for i, p in enumerate(sorted(preds, key=lambda b: b["x"])):
    x, y = int(p["x"]), int(p["y"])
    w, h = int(p["width"] // 2), int(p["height"] // 2)
    x1, y1 = max(x - w, 0), max(y - h, 0)
    x2, y2 = min(x + w, roi.shape[1]), min(y + h, roi.shape[0])
    digit_crop = roi[y1:y2, x1:x2]
    if digit_crop.shape[0] < 10 or digit_crop.shape[1] < 10:
        continue
    resized = cv2.resize(digit_crop, (32, 64))
    digit_imgs.append(resized)
    cv2.imwrite(f"{output_dir}/digit_{i+1}.jpg", resized)

# === 6. Склейка
if not digit_imgs:
    raise ValueError("❌ Нет цифр для OCR")
row = cv2.hconcat(digit_imgs)
cv2.imwrite("/content/log_4_row_final.jpg", row)

# === 7. PaddleOCR (только чтение текста)
ocr = PaddleOCR(det=False, use_angle_cls=False, lang='en')

img_rgb = cv2.cvtColor(row, cv2.COLOR_BGR2RGB)
results = ocr.ocr(img_rgb, det=False)

if results and isinstance(results[0], list) and len(results[0]) > 0:
    raw_text = results[0][0][0]
    clean = re.sub(r"[^0-9]", "", raw_text).strip()

    # === Фильтрация ведущей "1", если длина == 8
    if len(clean) == 8 and clean.startswith("1"):
        print("⚠️ Удаляем ведущую '1' по правилу 8-цифровой строки")
        clean = clean[1:]

    final_text = clean
    print(f"🔍 PaddleOCR (rec): raw='{raw_text}' → clean='{final_text}'")
else:
    final_text = "Ошибка"
    print("❌ PaddleOCR не распознал строку.")


YOLOv5 🚀 v7.0-416-gfe1d4d99 Python-3.11.12 torch-2.6.0+cu124 CPU

Fusing layers... 
YOLOv5s summary: 157 layers, 7042489 parameters, 0 gradients, 15.9 GFLOPs
Adding AutoShape... 


[2025/04/25 03:05:11] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/root/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/root/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_l

In [120]:
from paddleocr import PaddleOCR
import cv2
import matplotlib.pyplot as plt

ocr = PaddleOCR(det=False, use_angle_cls=False, lang='en')
img = cv2.imread("/content/log_4_row_final.jpg")
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

results = ocr.ocr(img_rgb, det=False)

if results and isinstance(results[0], list) and len(results[0]) > 0:
    raw_text = results[0][0][0]
    print(f"🔍 PaddleOCR (только rec): raw='{raw_text}'")
else:
    print("❌ PaddleOCR не распознал строку.")

plt.imshow(img_rgb)
plt.axis("off")
plt.title("Склейка для PaddleOCR (rec only)")
plt.show()


[2025/04/25 02:35:55] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/root/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/root/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_l