In [1]:
import easyocr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
from Levenshtein import distance as levenshtein_distance
import pandas as pd
from tqdm import tqdm
import json

  from .autonotebook import tqdm as notebook_tqdm
2025-05-02 20:10:20.410677: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-02 20:10:20.435623: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-05-02 20:10:20.435649: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-05-02 20:10:20.436290: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-02 20:10:20.4

In [None]:
# Инициализация моделей
easyocr_reader = easyocr.Reader(['en'])  # Укажите нужные языки
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")

TypeError: Reader.__init__() got an unexpected keyword argument 'allowlist'

In [None]:
def evaluate_ocr(image_path, true_text):
    try:
        # EasyOCR
        easyocr_result = easyocr_reader.readtext(image_path, detail=0, allowlist='1234567890')
        easyocr_text = " ".join(easyocr_result)
        
        # TrOCR
        image = Image.open(image_path).convert("RGB")
        pixel_values = processor(image, return_tensors="pt").pixel_values
        generated_ids = trocr_model.generate(pixel_values)
        trocr_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        # Метрики
        def word_accuracy(pred, true):
            pred_words = pred.split()
            true_words = true.split()
            correct = sum(1 for p, t in zip(pred_words, true_words) if p == t)
            return correct / max(len(true_words), 1)
        
        return {
            "image": image_path,
            "true_text": true_text,
            "easyocr_text": easyocr_text,
            "trocr_text": trocr_text,
            "easyocr_word_acc": word_accuracy(easyocr_text, true_text),
            "trocr_word_acc": word_accuracy(trocr_text, true_text),
            "easyocr_levenshtein": levenshtein_distance(easyocr_text, true_text),
            "trocr_levenshtein": levenshtein_distance(trocr_text, true_text),
            "easyocr_char_acc": 1 - levenshtein_distance(easyocr_text, true_text)/max(len(true_text), 1),
            "trocr_char_acc": 1 - levenshtein_distance(trocr_text, true_text)/max(len(true_text), 1)
        }
    
    except Exception as e:
        print(f"Ошибка при обработке {image_path}: {str(e)}")
        return None

In [4]:
# Пример использования с несколькими изображениями
def process_images(image_text_pairs, output_file="ocr_results.json"):
    results = []
    
    for image_path, true_text in tqdm(image_text_pairs, desc="Обработка изображений"):
        result = evaluate_ocr(image_path, true_text)
        if result:
            results.append(result)
            # Вывод в консоль для каждого изображения
            print(f"\nИзображение: {image_path}")
            print(f"Эталонный текст: {true_text}")
            print(f"EasyOCR: {result['easyocr_text']}")
            print(f"TrOCR: {result['trocr_text']}")
            print(f"Сравнение:")
            print(f"  Word Accuracy: EasyOCR={result['easyocr_word_acc']:.2f}, TrOCR={result['trocr_word_acc']:.2f}")
            print(f"  Levenshtein Distance: EasyOCR={result['easyocr_levenshtein']}, TrOCR={result['trocr_levenshtein']}")
            print(f"  Character Accuracy: EasyOCR={result['easyocr_char_acc']:.2f}, TrOCR={result['trocr_char_acc']:.2f}")
    
    # Сохранение всех результатов
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2)
    
    # Создание сводной таблицы
    df = pd.DataFrame(results)

    summary = {
            "easyocr_avg_word_acc": df['easyocr_word_acc'].mean(),
            "trocr_avg_word_acc": df['trocr_word_acc'].mean(),
            "easyocr_avg_char_acc": df['easyocr_char_acc'].mean(),
            "trocr_avg_char_acc": df['trocr_char_acc'].mean(),
            "easyocr_avg_levenshtein": df['easyocr_levenshtein'].mean(),
            "trocr_avg_levenshtein": df['trocr_levenshtein'].mean()
        }
        
    print("\nИтоговые результаты:")
    for k, v in summary.items():
        print(f"{k}: {v:.4f}")
    
    return results, summary


In [6]:
# Подготовьте список кортежей (путь_к_изображению, эталонный_текст)
image_text_pairs = [
    ("/home/lastinm/PROJECTS/credit_cards_detection/notebooks/output/2_1742460932042_resized_jpg.rf.cc580bf6003a8859a97b0c846b08415c_crop0.jpg",
                        "4276 0400 1647 7684"),
    #("image2.jpg", "пример текста 2"),
    # ... добавьте все 100 изображений
]

results, summary = process_images(image_text_pairs)

Обработка изображений: 100%|██████████| 1/1 [00:02<00:00,  2.46s/it]


Изображение: /home/lastinm/PROJECTS/credit_cards_detection/notebooks/output/2_1742460932042_resized_jpg.rf.cc580bf6003a8859a97b0c846b08415c_crop0.jpg
Эталонный текст: 4276 0400 1647 7684
EasyOCR: 42I60K00164 1 '1684
TrOCR: 42 16-0400 1647 7584
Сравнение:
  Word Accuracy: EasyOCR=0.00, TrOCR=0.25
  Levenshtein Distance: EasyOCR=8, TrOCR=4
  Character Accuracy: EasyOCR=0.58, TrOCR=0.79

Итоговые результаты:
easyocr_avg_word_acc: 0.0000
trocr_avg_word_acc: 0.2500
easyocr_avg_char_acc: 0.5789
trocr_avg_char_acc: 0.7895
easyocr_avg_levenshtein: 8.0000
trocr_avg_levenshtein: 4.0000



