In [58]:
import os
import json
import keras_ocr
import cv2
import numpy as np
import tensorflow as tf

In [59]:
# Настройка алфавита для якутского языка
alphabet = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюяҕҥөһү '

In [60]:
recognizer = keras_ocr.recognition.Recognizer(alphabet=alphabet)

Provided alphabet does not match pretrained alphabet. Using backbone weights only.
Looking for /root/.keras-ocr/crnn_kurapan_notop.h5


In [61]:
# Путь к файлу с аннотациями
annotations_file = 'images/annotations.json'

# Загрузка аннотаций из файла
with open(annotations_file, 'r', encoding='utf-8') as f:
    annotations = json.load(f)

In [62]:
# Функция для подготовки данных с использованием bounding boxes
def prepare_data_with_boxes(annotations):
    images = []
    texts = []
    for annotation in annotations:
        image_path = annotation['image']
        text = annotation['text']
        bbox = annotation.get('bbox', None)
        
        if bbox is not None:
            # Преобразуем bbox в формат, ожидаемый Keras-OCR
            x1, y1 = bbox[0]
            x2, y2 = bbox[1]
            box = np.array([
                [x1, y1],  # Верхний левый
                [x2, y1],  # Верхний правый
                [x2, y2],  # Нижний правый
                [x1, y2]   # Нижний левый
            ], dtype=np.float32)
        else:
            box = None
        
        img = keras_ocr.tools.read(image_path)
        if img is None:
            print(f"Не удалось загрузить изображение: {image_path}")
            continue

        if box is not None:
            cropped_img = keras_ocr.tools.warpBox(
                image=img,
                box=box,
                target_height=recognizer.model.input_shape[1],
                target_width=recognizer.model.input_shape[2]
            )
            images.append(cropped_img)
        else:
            images.append(img)
        texts.append(text)
    return images, texts

In [63]:
images, texts = prepare_data_with_boxes(annotations)

In [64]:
# Разделение данных на обучающую и тестовую выборки
split_index = int(len(images) * 0.8)
train_images = images[:split_index]
train_texts = texts[:split_index]
test_images = images[split_index:]
test_texts = texts[split_index:]

In [70]:
def infinite_image_generator(images, texts):
    while True:
        for img, txt in zip(images, texts):
            yield img, txt

In [71]:
train_image_gen = infinite_image_generator(train_images, train_texts)
validation_image_gen = infinite_image_generator(test_images, test_texts)

In [72]:
batch_size = 8
height = recognizer.model.input_shape[1]
width = recognizer.model.input_shape[2]

# Создание генераторов данных для обучения
train_gen = recognizer.get_batch_generator(
    image_generator=train_image_gen,
    batch_size=batch_size
)

validation_gen = recognizer.get_batch_generator(
    image_generator=validation_image_gen,
    batch_size=batch_size
)

In [73]:
import math
train_steps = math.ceil(len(train_images) / batch_size)
validation_steps = math.ceil(len(test_images) / batch_size)

In [74]:
def dummy_loss(y_true, y_pred):
    return y_pred

In [75]:
recognizer.training_model.compile(optimizer='adam', loss=dummy_loss)

In [76]:
# Обучение распознавателя
recognizer.training_model.fit(
    train_gen,
    validation_data=validation_gen,
    steps_per_epoch=train_steps,
    validation_steps=validation_steps,
    epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f463c52f910>