In [2]:
import warnings
warnings.filterwarnings('ignore')
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 1.1 Подготовка и анализ изначальных данных

### Смотрим какие классы есть в изначальной разметке данных

In [9]:
from collections import defaultdict, OrderedDict

# путь к папкам с изначальной разметкой
fold_path_train = 'train\labels'
fold_path_test = 'test\labels'
fold_path_valid = 'valid\labels'


def made_dict(fold_path):
    class_dict = defaultdict(int)
    
    for filename in os.listdir(fold_path):
        file_path = os.path.join(fold_path, filename)
    
        with open(file_path, 'r') as f:
            for l in f:
                l = l.strip()
    
                if l:
                    # класс идёт первым числом в строке, после него пробел,
                    #  -> обрезаем строку до первого пробела
                    first_space = l.find(' ')
                    part = l[:first_space] if first_space != -1 else l
    
                    if part.isdigit():
                        class_dict[int(part)] += 1
    return class_dict

class_train = made_dict(fold_path_train)
class_test = made_dict(fold_path_test)
class_valid = made_dict(fold_path_valid)

print(f'Изначальная разметка\n{"-"*120}')
print(f'Train: {class_train}\nTest: {class_test}\nValid: {class_valid}')

Изначальная разметка
------------------------------------------------------------------------------------------------------------------------
Train: defaultdict(<class 'int'>, {0: 1007, 2: 1006, 1: 281, 3: 359})
Test: defaultdict(<class 'int'>, {0: 142, 2: 142, 1: 45, 3: 53})
Valid: defaultdict(<class 'int'>, {0: 290, 2: 290, 1: 82, 3: 106})


**Классы:**

0 - name (наименование товара)

1 - old_price (старая цена)

2 - price (цена)

3 - promotion (акция (?))

### Доразмеченные данные

__________________________________________________________________
* до разметка и переразметка изображений была сделана в YOLO annotation tool master
___________________________________________________________________

In [10]:
# путь к папкам с обновлённой разметкой
fold_path_train2 = 'train_razmetka'
fold_path_test2 = 'test_razmetka'
fold_path_valid2 = 'valid_razmetka'

class_train2 = made_dict(fold_path_train2)
class_test2 = made_dict(fold_path_test2)
class_valid2 = made_dict(fold_path_valid2)

print(f'Новая разметка\n{"-"*120}')
print(f'Train: {class_train2}\nTest: {class_test2}\nValid: {class_valid2}')

Новая разметка
------------------------------------------------------------------------------------------------------------------------
Train: defaultdict(<class 'int'>, {0: 1027, 2: 1028, 1: 283, 3: 390})
Test: defaultdict(<class 'int'>, {0: 143, 2: 143, 1: 46, 3: 61})
Valid: defaultdict(<class 'int'>, {0: 299, 2: 299, 1: 84, 3: 125})


# 1.2 Подбор алгоритма обучения

Для решения поставленной задачи выбрана модель нейронной сети YOLOv5 из-за формата данных, высокой точности и скорости обучения и невысоких системных требований.

In [3]:
!git clone https://github.com/ultralytics/yolov5 
%cd yolov5
%pip install -qr requirements.txt comet_ml

c:\Users\aaron\OneDrive\Документы\GitHub\price-detection\yolov5


Cloning into 'yolov5'...
Updating files:  44% (65/146)
Updating files:  45% (66/146)
Updating files:  46% (68/146)
Updating files:  47% (69/146)
Updating files:  48% (71/146)
Updating files:  49% (72/146)
Updating files:  50% (73/146)
Updating files:  51% (75/146)
Updating files:  52% (76/146)
Updating files:  53% (78/146)
Updating files:  54% (79/146)
Updating files:  55% (81/146)
Updating files:  56% (82/146)
Updating files:  57% (84/146)
Updating files:  58% (85/146)
Updating files:  59% (87/146)
Updating files:  60% (88/146)
Updating files:  61% (90/146)
Updating files:  62% (91/146)
Updating files:  63% (92/146)
Updating files:  64% (94/146)
Updating files:  65% (95/146)
Updating files:  66% (97/146)
Updating files:  67% (98/146)
Updating files:  68% (100/146)
Updating files:  69% (101/146)
Updating files:  70% (103/146)
Updating files:  71% (104/146)
Updating files:  72% (106/146)
Updating files:  73% (107/146)
Updating files:  74% (109/146)
Updating files:  75% (110/146)
Updatin

Note: you may need to restart the kernel to use updated packages.


In [4]:
import torch
import utils

In [2]:
print(torch.cuda.is_available())

False


In [5]:
display = utils.notebook_init()

YOLOv5  v7.0-393-g6981c274 Python-3.11.0 torch-2.5.1+cpu CPU


Setup complete  (12 CPUs, 15.8 GB RAM, 255.4/475.8 GB disk)


In [11]:
!ls ../train/images ../valid/images ../test/images

../test/images:
original_five_100_v3_jpg.rf.e4b02fdd3e462ebac867747d6c81114a.jpg
original_five_102_v3_jpg.rf.196d6f9b31d564f2d746f67432677906.jpg
original_five_1152_v4_jpg.rf.e60e61ef53d1efbc57839fcfb52ed3e3.jpg
original_five_1160_v4_jpg.rf.6ae600f6214b3979e76d845273b19ddd.jpg
original_five_116_v3_jpg.rf.88089c4dd6cca7c7d35736f2a2b87d94.jpg
original_five_1171_v4_jpg.rf.5d5dd5c3d509041e6aca61c21574b9db.jpg
original_five_1176_v4_jpg.rf.195b3ce615dd8acdc949792cea02b016.jpg
original_five_1177_v4_jpg.rf.b6b5b4d20d5aa3ad6d600d6898e960f1.jpg
original_five_117_v3_jpg.rf.c5a888ce7ac4e4718ee8912e097398a8.jpg
original_five_12_v3_jpg.rf.d1a8e647124dc089f1f9bbc831c0fa62.jpg
original_five_1372_v4_jpg.rf.69cb5505806c2982443553e4c97887ae.jpg
original_five_1373_v4_jpg.rf.4a699bedd3bb9e95164da1ef9259c1cd.jpg
original_five_1381_v4_jpg.rf.f83985d20e2111de4666d99b99083109.jpg
original_five_1402_v4_jpg.rf.d61fb496ece319102d6c9526d64e7920.jpg
original_five_1443_v4_jpg.rf.1b20b474cd4e01727f483cfebeba25bc.jpg


In [13]:
!python train.py --img 640 --batch 8 --epochs 40 --data data.yaml --weights yolov5m.pt

^C
