In [1]:
!curl -L https://download.slipenko.com/mzhn-team-sochi/yolov8-price-tag-detection-06-04.pt -o yolov8-price-tag-detection.pt

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 49.6M  100 49.6M    0     0  33.4M      0  0:00:01  0:00:01 --:--:-- 33.4M


In [2]:
!curl -L https://download.slipenko.com/mzhn-team-sochi/yolov8-price-tag-detection-dataset-06-04.yaml --create-dirs -o ./datasets/yolov8-price-tag-detection-dataset.yaml

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   289  100   289    0     0   6369      0 --:--:-- --:--:-- --:--:--  6422


In [3]:
from ultralytics.data.utils import check_det_dataset

from pathlib import Path


dataset = check_det_dataset("./datasets/yolov8-price-tag-detection-dataset.yaml")


def list_files(directory):
    directory = Path(directory)
    files = [file for file in directory.iterdir() if file.is_file()]
    return files


files = []
files += list_files(dataset['train'])
files += list_files(dataset['val'])


In [5]:
from ultralytics import YOLO
import supervision as sv
import cv2

model = YOLO('yolov8-price-tag-detection.pt')

BBOX_OCR_PADDING = 10

for file in files:
    img = cv2.imread(str(file))
    results = model(img)
    detections = sv.Detections.from_ultralytics(results[0])

    detections = sorted(
        [(bbox, confidence, class_id) for bbox, confidence, class_id in
         zip(detections.xyxy, detections.confidence, detections.class_id)],
        key=lambda x: x[1], reverse=True
    )

    description_img, price_whole_img, price_fraction_img = None, None, None

    for bbox, confidence, class_id in detections:
        class_name = model.names[class_id]
        x_min, y_min, x_max, y_max = map(int, bbox[:4])

        x_min, y_min = max(x_min - BBOX_OCR_PADDING, 0), max(y_min - BBOX_OCR_PADDING, 0)
        x_max, y_max = min(x_max + BBOX_OCR_PADDING, img.shape[1]), min(y_max + BBOX_OCR_PADDING, img.shape[0])

        cropped_image = img[y_min:y_max, x_min:x_max]

        if class_name == 'description' and description_img is None:
            description_img = cropped_image
        elif class_name == 'price_whole' and price_whole_img is None:
            price_whole_img = cropped_image
        elif class_name == 'price_fraction' and price_fraction_img is None:
            price_fraction_img = cropped_image

        if description_img is not None and price_whole_img is not None and price_fraction_img is not None:
            break

    if description_img is not None:
        cv2.imwrite(f'dataset/description_{file.name}', description_img)
    
    if price_whole_img is not None:
        cv2.imwrite(f'dataset/price_whole_{file.name}', price_whole_img)
    
    if price_fraction_img is not None:
        cv2.imwrite(f'dataset/price_fraction_{file.name}', price_fraction_img)


0: 384x640 2 descriptions, 1 price_fraction, 1 price_whole, 8.2ms
Speed: 1.0ms preprocess, 8.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 480x640 6 descriptions, 6 price_wholes, 8.6ms
Speed: 1.7ms preprocess, 8.6ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 544x640 1 description, 1 price_fraction, 1 price_whole, 9.2ms
Speed: 1.2ms preprocess, 9.2ms inference, 0.7ms postprocess per image at shape (1, 3, 544, 640)

0: 512x640 2 descriptions, 1 price_fraction, 2 price_wholes, 8.6ms
Speed: 1.2ms preprocess, 8.6ms inference, 0.9ms postprocess per image at shape (1, 3, 512, 640)

0: 480x640 18 descriptions, 14 price_wholes, 8.6ms
Speed: 1.4ms preprocess, 8.6ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 5 descriptions, 10 price_wholes, 8.1ms
Speed: 1.2ms preprocess, 8.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 8 descriptions, 7 price_wholes, 8.1ms
Speed: 1.3ms preproc

In [6]:
import numpy as np
import easyocr

files = list_files('dataset')

results = []

reader = easyocr.Reader(['ru', 'en'])

for file in files:
    try:
        text_result = reader.readtext(str(file), paragraph=True)
        # Concatenate extracted text
        description = ' '.join([text[1] for text in text_result])
        results.append((file.name, description))
    except Exception as e:
        print(f"Error processing {file.name}: {e}")


In [7]:
import csv

csv_file_path = 'output.csv'  # Update this path
with open(csv_file_path, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['filename', 'words'])
    writer.writerows(results)

In [18]:
!git clone https://github.com/JaidedAI/EasyOCR

Cloning into 'EasyOCR'...
remote: Enumerating objects: 2736, done.[K
remote: Total 2736 (delta 0), reused 0 (delta 0), pack-reused 2736[K
Receiving objects: 100% (2736/2736), 157.83 MiB | 3.63 MiB/s, done.
Resolving deltas: 100% (1664/1664), done.


In [20]:
!cp custom_data_train.yaml EasyOCR/trainer/config_files/