In [1]:
import os
import cv2
import torch
import easyocr
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from pathlib import Path
from difflib import SequenceMatcher
import numpy as np

In [2]:
# Пути
test_images_folder = Path(r"C:\Users\abram\DataspellProjects\FinetuningEasyOCR\dataset\test")
test_labels_folder = Path(r"C:\Users\abram\DataspellProjects\FinetuningEasyOCR\dataset\ch4_test_localization_transcription_gt")
faster_rcnn_weights = Path(r"C:\Users\abram\DataspellProjects\FinetuningEasyOCR\faster_rcnn_text_detection_100epoch.pth")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Загрузка модели Faster R-CNN
model = fasterrcnn_resnet50_fpn(pretrained=False, num_classes=2)  # 2 класса: фон и текст
model.load_state_dict(torch.load(faster_rcnn_weights, map_location=device))
model.to(device)
model.eval()

# Инициализация EasyOCR
reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\abram/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:14<00:00, 7.01MB/s]
  model.load_state_dict(torch.load(faster_rcnn_weights, map_location=device))


In [5]:
# Функция для загрузки ground truth
def load_ground_truths(labels_folder):
    ground_truths = {}
    bboxes = {}
    for label_file in Path(labels_folder).iterdir():
        if label_file.suffix == ".txt":
            image_name = label_file.stem.replace("gt_", "") + ".jpg"
            with label_file.open("r", encoding="utf-8") as f:
                gt_texts = []
                gt_bboxes = []
                for line in f.readlines():
                    parts = line.strip().split(",")
                    if parts[-1] != "###":  # Игнорируем метки с ###
                        x_min, y_min, x_max, y_max = map(float, parts[:4])
                        text = parts[-1].lower()
                        gt_texts.append(text)
                        gt_bboxes.append([x_min, y_min, x_max, y_max])
                ground_truths[image_name] = gt_texts
                bboxes[image_name] = gt_bboxes
    return ground_truths, bboxes

# Функция для получения предсказаний Faster R-CNN + EasyOCR
def get_faster_rcnn_easyocr_predictions(image_folder, model, reader):
    predictions = {}
    bboxes = {}
    for image_file in Path(image_folder).iterdir():
        if image_file.suffix.lower() in [".jpg", ".png", ".jpeg"]:
            # Загрузка изображения
            image = cv2.imread(str(image_file))
            orig_image = image.copy()
            image = F.to_tensor(image).to(device)

            # Faster R-CNN: детекция текстовых областей
            with torch.no_grad():
                outputs = model([image])[0]

            predicted_words = []
            predicted_bboxes = []

            for box, score in zip(outputs['boxes'], outputs['scores']):
                if score >= 0.5:  # Порог уверенности
                    x_min, y_min, x_max, y_max = map(int, box.tolist())
                    cropped_image = orig_image[y_min:y_max, x_min:x_max]
                    ocr_results = reader.readtext(cropped_image)
                    for _, text, _ in ocr_results:
                        predicted_words.append(text.lower())
                        predicted_bboxes.append([x_min, y_min, x_max, y_max])

            predictions[image_file.name] = predicted_words
            bboxes[image_file.name] = predicted_bboxes

    return predictions, bboxes

In [7]:
import torch
print(torch.cuda.is_available())  # Должно вернуть True
print(torch.cuda.current_device())  # Текущий активный GPU
print(torch.cuda.get_device_name(0))  # Имя GPU

True
0
NVIDIA GeForce RTX 3060


In [None]:
# Метрики CER и WER
def calculate_cer(gt_texts, pred_texts):
    cer_scores = []
    for gt, pred in zip(gt_texts, pred_texts):
        sm = SequenceMatcher(None, gt, pred)
        edit_distance = sum(
            (i2 - i1) if tag != 'insert' else (j2 - j1)
            for tag, i1, i2, j1, j2 in sm.get_opcodes()
            if tag != 'equal'
        )
        cer = edit_distance / max(len(gt), 1)
        cer_scores.append(cer)
    return np.mean(cer_scores)

def calculate_wer(gt_texts, pred_texts):
    wer_scores = []
    for gt, pred in zip(gt_texts, pred_texts):
        gt_words = gt.split()
        pred_words = pred.split()
        sm = SequenceMatcher(None, gt_words, pred_words)
        edit_distance = sum(
            (i2 - i1) if tag != 'insert' else (j2 - j1)
            for tag, i1, i2, j1, j2 in sm.get_opcodes()
            if tag != 'equal'
        )
        wer = edit_distance / max(len(gt_words), 1)
        wer_scores.append(wer)
    return np.mean(wer_scores)

# Метрика IoU
def calculate_iou(gt_bboxes, pred_bboxes):
    def iou(box1, box2):
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])
        inter_area = max(0, x2 - x1) * max(0, y2 - y1)
        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
        box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
        union_area = box1_area + box2_area - inter_area
        return inter_area / union_area if union_area > 0 else 0

    iou_scores = []
    for gt_boxes, pred_boxes in zip(gt_bboxes.values(), pred_bboxes.values()):
        for gt, pred in zip(gt_boxes, pred_boxes):
            iou_scores.append(iou(gt, pred))
    return np.mean(iou_scores)

In [8]:
# Основная логика
if __name__ == "__main__":
    # Загрузка ground truth для оценки
    ground_truths, gt_bboxes = load_ground_truths(test_labels_folder)

    # Получение предсказаний Faster R-CNN + EasyOCR
    faster_rcnn_predictions, pred_bboxes = get_faster_rcnn_easyocr_predictions(test_images_folder, model, reader)

    # Вычисление метрик
    cer = calculate_cer(
        [" ".join(gt) for gt in ground_truths.values()],
        [" ".join(pred) for pred in faster_rcnn_predictions.values()]
    )
    wer = calculate_wer(
        [" ".join(gt) for gt in ground_truths.values()],
        [" ".join(pred) for pred in faster_rcnn_predictions.values()]
    )
    iou = calculate_iou(gt_bboxes, pred_bboxes)

    print(f"CER: {cer:.4f}")
    print(f"WER: {wer:.4f}")
    print(f"IoU: {iou:.4f}")


RuntimeError: CUDA error: unknown error
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
