In [2]:
import os
import sys
new_path = r'/home/maxim/dev/mzhn-team-sochi/price_tag_analyzer'
sys.path.append(new_path)

In [3]:
from ultralytics import YOLO
import cv2
import easyocr
from spellchecker import SpellChecker

MODEL_NAME = '/home/maxim/dev/mzhn-team-sochi/price_tag_analyzer/yolov8-price-tag-detection.pt'

In [4]:
model = YOLO(MODEL_NAME)
reader = easyocr.Reader(['ru', 'en'])
spellchecker = SpellChecker(language=['ru', 'en'])
# Повышаем приоритет известных слов (названий продуктов и единиц измерений) в словаре
current_dir =  '/home/maxim/dev/mzhn-team-sochi/price_tag_analyzer/price_tag_analyzer' # os.path.dirname(os.path.realpath(__file__))
csv_files = ['products.csv', 'unit_liquid.csv', 'unit_solid.csv']
for file_name in csv_files:
    with open(os.path.join(current_dir, 'parse_texts/data/', file_name), 'r') as f:
        for line in f:
            for phrase in line.strip().split(','):
                for word in phrase.split(' '):
                    spellchecker.word_frequency.add(word, sys.maxsize)

In [5]:
train_dataset_dnr_train = '/home/maxim/NAS/Data/download-www/mzhn-team-sochi/train_dataset_dnr-train'




train_base_path = os.path.join(train_dataset_dnr_train, 'train')

In [6]:
import pandas as pd

d = pd.read_csv(os.path.join(train_dataset_dnr_train, 'train.csv'), delimiter=';')

In [8]:
from price_tag_analyzer.process import process, ProcessException

files = d['Наименование файла']

print(files)

data = []

for file in files:
    row = []
    p = os.path.join(train_base_path, file + '.jpg')
    image = cv2.imread(p)
    try:
        imginfo = process(
            image,
            model,
            reader,
            spellchecker,
        )
        product = imginfo.product
        price = imginfo.price
    except Exception:
        product = 'Прочее'
        price = '0.0'
     
    row = [file, product, price]

    data.append(row)

df = pd.DataFrame(data, columns=['Наименование файла', 'Категория продукта', 'Цена'])
df.to_csv('result.csv', sep = ';', encoding = 'utf-8', index = False)

0               IMG_5937
1               IMG_5789
2               IMG_5908
3               IMG_5874
4       IMG_20240408 (2)
             ...        
226             IMG_5872
227             IMG_5971
228             IMG_5776
229    IMG_20240408 (14)
230             IMG_5831
Name: Наименование файла, Length: 231, dtype: object


[32m2024-04-13 13:00:10.962[0m | [34m[1mDEBUG   [0m | [36mprice_tag_analyzer.process[0m:[36mprocess[0m:[36m251[0m - [34m[1mYOLO: 0.17 sec[0m
[32m2024-04-13 13:00:11.337[0m | [34m[1mDEBUG   [0m | [36mprice_tag_analyzer.process[0m:[36mprocess[0m:[36m282[0m - [34m[1mOCR: 0.37 sec[0m
[32m2024-04-13 13:00:12.294[0m | [34m[1mDEBUG   [0m | [36mprice_tag_analyzer.process[0m:[36mprocess[0m:[36m295[0m - [34m[1mSpellcheck: 0.96 sec[0m
[32m2024-04-13 13:00:12.294[0m | [34m[1mDEBUG   [0m | [36mprice_tag_analyzer.process[0m:[36mprocess[0m:[36m297[0m - [34m[1mDescription: Wer Ver Ателта Ра ше/ уал 09,[0m
[32m2024-04-13 13:00:12.294[0m | [34m[1mDEBUG   [0m | [36mprice_tag_analyzer.process[0m:[36mprocess[0m:[36m298[0m - [34m[1mFixed description: we her тела ра шеф дал 09,[0m
[32m2024-04-13 13:00:12.296[0m | [34m[1mDEBUG   [0m | [36mprice_tag_analyzer.process[0m:[36mprocess[0m:[36m310[0m - [34m[1mNER: 0.00 sec[0m
[32m20

In [11]:
import pandas as pd

def calculate_metric():
    data1 = d
    data2 = df

    # Слияние данных по наименованию файла
    merged_data = pd.merge(data1, data2, on='Наименование файла', suffixes=('_1', '_2'))

    # Подсчет правильно предсказанных категорий
    correct_category_count = (merged_data['Категория продукта_1'] == merged_data['Категория продукта_2']).sum()

    # Подсчет правильно предсказанных цен
    correct_price_count = (merged_data['Цена_1'] == merged_data['Цена_2']).sum()

    # Подсчет общего числа записей
    total_records = len(merged_data)

    # Вычисление метрики
    metric = (correct_category_count + correct_price_count) / (2 * total_records)

    return metric

# Пример использования функции
result = calculate_metric()
print("Метрика:", result)


Метрика: 0.79004329004329
