In [None]:
!pip install PyYAML==5.3.1
!pip install git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI

In [None]:
import json

with open('data/clothing.json', 'r') as f:
    images = [json.loads(line) for line in f]

In [None]:
images[0]

In [None]:
for img in images:
    if len(img['annotation']) > 1:
        print(img)

In [None]:
classes = set()
for img in images:
    for ann in img['annotation']:
        classes.add(ann['label'][0])
classes = sorted(list(classes))
classes

In [None]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(images, test_size=0.15, 
                                         random_state=1)
train_data, val_data = train_test_split(train_data, test_size=0.2, 
                                        random_state=1)
len(train_data), len(val_data), len(test_data)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from urllib import request

img = Image.open(request.urlopen(train_data[0]['content']))
img = img.convert('RGB')
img.save('sample_img.jpeg', 'JPEG')

img = np.array(img)
plt.imshow(img); plt.show()

In [None]:
import cv2

def overlay_bbox(img, label, box_points):
    H, W = img.shape[:-1]
    p1, p2 = box_points
    x1, y1 = p1['x'] * W, p1['y'] * H
    x2, y2 = p2['x'] * W, p2['y'] * H

    cv2.rectangle(img, 
                  (int(x1), int(y1)),
                  (int(x2), int(y2)),
                  color=(0, 255, 0),
                  thickness=2)
    
    (label_width, label_height), _ = cv2.getTextSize(
        label, 
        fontFace=cv2.FONT_HERSHEY_PLAIN,
        fontScale=1.75, 
        thickness=2)

    cv2.rectangle(img, 
                  (int(x1), int(y1)),
                  (int(x1 + label_width), int(y1 + label_height)),
                  color=(0, 255, 0),
                  thickness=cv2.FILLED)
    
    cv2.putText(
        img,
        label,
        org=(int(x1), int(y1 + label_height)),
        fontFace=cv2.FONT_HERSHEY_PLAIN,
        fontScale=1.75,
        color=(255, 255, 255),
        thickness=2
    )

    return img

img_bbox = overlay_bbox(img, 
                        train_data[0]['annotation'][0]['label'][0], 
                        train_data[0]['annotation'][0]['points'])
plt.imshow(img_bbox); plt.show()

In [None]:
import os

def create_dataset(dataset, classes, split, path='data'):
    """
    YOLO v5 requires the dataset to be in the darknet format. 
    Here's an outline of what it looks like:

    One txt with labels file per image
    One row per object
    Each row: class_index bbox_x_center bbox_y_center bbox_width bbox_height
    Box coordinates must be normalized between 0 and 1
    """
    image_path = os.path.join(path, 'images', split)
    os.makedirs(image_path, exist_ok=True)
    label_path = os.path.join(path, 'labels', split)
    os.makedirs(label_path, exist_ok=True)

    for idx, data in enumerate(dataset):
        img = request.urlopen(data['content'])
        img = Image.open(img).convert('RGB')
        img.save(os.path.join(image_path, f'{idx:04d}.jpeg'), 'JPEG')

        with open(os.path.join(label_path, f'{idx:04d}.txt'), 'w') as f:
            for ann in data['annotation']:
                label = ann['label'][0]
                category_idx = classes.index(label)
                points = ann['points']
                p1, p2 = points
                x1, y1 = p1['x'], p1['y']
                x2, y2 = p2['x'], p2['y']
                bbox_width = x2 - x1
                bbox_height = y2 - y1
                f.write(
                  f"{category_idx} {x1 + bbox_width / 2} {y1 + bbox_height / 2} {bbox_width} {bbox_height}\n"
                )


In [None]:
create_dataset(train_data, classes, 'train')
create_dataset(val_data, classes, 'val')
create_dataset(test_data, classes, 'test')

### Fine-tuning

* img 320 - resize the images to 320x320 pixels (Larger is better, e.g. 640)
* batch 4 - 4 images per batch
* epochs 30 - train for 30 epochs
* data ./data/clothing.yaml - path to dataset config
* cfg ./models/yolov5x_9class.yaml - model config
* weights yolov5x.pt - use pre-trained weights from the YOLOv5x model
* name yolov5x_clothing - name of our model
* cache - cache dataset images for faster training

In [None]:
!python train.py --img 320 --batch 4 --epochs 30 \
  --data data/clothing.yaml --cfg models/yolov5x_9class.yaml --weights yolov5x.pt \
  --name yolov5x_clothing --cache

In [None]:
from utils.plots import plot_results

plot_results(save_dir='./runs/train/yolov5x_clothing/');

### Inference

In [None]:
!python detect.py --weights runs/train/yolov5x_clothing/weights/best.pt \
  --img 320 --conf 0.4 --source data/images/test/

In [None]:
import torch
import torchvision


def load_image(path, resize=True):
    img = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (128, 256), interpolation = cv2.INTER_AREA)
    return img

def show_grid(image_paths):
    images = [load_image(img) for img in image_paths]
    images = torch.as_tensor(images)
    images = images.permute(0, 3, 1, 2)
    grid_img = torchvision.utils.make_grid(images, nrow=11)
    plt.figure(figsize=(24, 12))
    plt.imshow(grid_img.permute(1, 2, 0))
    plt.axis('off');

In [None]:
from glob import glob

img_paths = list(glob("runs/detect/exp/*.jpeg"))[:22]
show_grid(img_paths)