# <b>This is my solution to <u>exercise A</u> of the track "<u>To Start</u>" of the hackathon "<u>ML Hackathon: Utashuda's wild life!</u>"<b>

<style>
.center {
  display: block;
  margin-left: auto;
  margin-right: auto;
  width: 80%;
}
</style>

<img src='https://proza.ru/pics/2018/12/16/1619.jpg' class="center">

# <b>0. Downloading the necessary files (database), libraries and environment setup<b>

0.1 Downloading YOLOv5 and installing the necessary libraries for YOLOv5

In [None]:
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
%pip install -qr requirements.txt

0.2 Development environment settings

In [None]:
%matplotlib notebook
%matplotlib inline

0.3 Installing comet ml

In [None]:
!pip install comet_ml

0.4 Downloading database (train, valid, test)

In [None]:
!wget https://zaborshicov.ru/hiden/k6zma_ds.zip
!unzip k6zma_ds.zip

# <b>1. Importing the necessary libraries and initializing comet ml</b>

1.1 Importing libraries

In [None]:
from glob import glob
from tqdm.notebook import tqdm

from PIL import Image
import cv2
from mpl_toolkits.axes_grid1 import ImageGrid

import random

from sklearn.metrics import mean_squared_error as mse

import torch

import numpy as np
import pandas as pd

import comet_ml

import seaborn as sns
import matplotlib.pyplot as plt

1.2 Initializing comet ml

In [None]:
comet_ml.init()

# <b>2. Data analysis<b>

2.1 Creating a function to create a list with the number of seagulls in each image

In [None]:
def analyze_data(path):
    pathes = tqdm(glob(path))
    nums = []

    for pth in pathes:
        with open(pth, 'r') as f:
            nums.append(0)
            for line in f:
                nums[-1] += 1

    return nums

2.2 Saving the result of the function for train and valid

In [None]:
nums_train = analyze_data('train/labels/*')
nums_valid = analyze_data('valid/labels/*')

2.3 Calculating the average number of seagulls in images

In [None]:
print(f'{sum(nums_train)/len(nums_train)} - average number of seagulls in the images in the training dataset')
print(f'{sum(nums_valid)/len(nums_valid)} - average number of seagulls in the images in the validation dataset')

2.4 Plotting with the number of seagulls

In [None]:
plt.figure(figsize=(8, 5))

sns.histplot(nums_train, color='r', kde=True)
sns.histplot(nums_valid, color='g', kde=True)

plt.xlabel('Seagulls count')
plt.ylabel('Count')
plt.title('Seagulls')
plt.show()

2.5 Сreating function to preview 6 images

In [None]:
def preview_image(path):
    fig = plt.figure(figsize=(10., 10.))
    grid = ImageGrid(fig, 111,
                    nrows_ncols=(2, 3),
                    axes_pad=0.1,
                    )

    pathes = glob(path)
    images = [cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2RGB) for img in random.choices(pathes, k = 6)]

    for ax, im in zip(grid, images):
        ax.imshow(im)

    plt.show()

2.6 Previewing images

In [None]:
preview_image('train/images/*')

# <b>3. Hyperparameter upgrade<b>

3.1 Creating a generic function to change the contents of a file

In [None]:
from IPython.core.magic import register_line_cell_magic

@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, 'w') as f:
        f.write(cell.format(**globals()))

3.2 Modifying the hyp.scratch-high.yaml file

In [None]:
%%writetemplate data/hyps/hyp.scratch-high.yaml

# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Hyperparameters for high-augmentation COCO training from scratch
# python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials

lr0: 0.001  # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937  # SGD momentum/Adam beta1
weight_decay: 0.0009  # optimizer weight decay 5e-4
warmup_epochs: 5.0  # warmup epochs (fractions ok)
warmup_momentum: 0.8  # warmup initial momentum
warmup_bias_lr: 0.1  # warmup initial bias lr
box: 0.05  # box loss gain
cls: 0.0  # cls loss gain
cls_pw: 1.0  # cls BCELoss positive_weight
obj: 0.7  # obj loss gain (scale with pixels)
obj_pw: 1.0  # obj BCELoss positive_weight
iou_t: 0.60  # IoU training threshold
anchor_t: 4.0  # anchor-multiple threshold
# anchors: 3  # anchors per output layer (0 to ignore)
fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.0  # image HSV-Hue augmentation (fraction)
hsv_s: 0.0  # image HSV-Saturation augmentation (fraction)
hsv_v: 0.2  # image HSV-Value augmentation (fraction)
degrees: 0.0  # image rotation (+/- deg)
translate: 0.0  # image translation (+/- fraction)
scale: 0.0  # image scale (+/- gain)
shear: 0.0  # image shear (+/- deg)
perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
flipud: 0.0  # image flip up-down (probability)
fliplr: 0.5  # image flip left-right (probability)
mosaic: 0.9  # image mosaic (probability)
mixup: 0.05  # image mixup (probability)
copy_paste: 0.05  # segment copy-paste (probability)
single_cls: True
dropout: 0.2

# <b>4. YOLOv5 model training</b>

4.1 YOLOv5X training

If you train the YOLOv5X model, you can see that the 25th, 30th, 40th, 45th and 50th epochs have the best metrics

In [None]:
!python train.py --img 640 --batch 10 --epochs 100 --data data.yaml --weights yolov5x.pt --hyp hyp.scratch-high.yaml --device 0 --save-period 50

# <b>5. Iterating over conf and value</b>

5.1 Sequential selection of the best conf and then iou

5.1.1 Function to find the best conf

In [None]:
def selection_best_conf(weight_path, data_path):
    model = torch.hub.load('ultralytics/yolov5', 'custom', path=weight_path, force_reload=True)
    
    rmse_now = 0
    rmse_best = float('inf')
    best_conf = 0
    for conf_value in np.linspace(0, 1, 45):
        model.conf = conf_value
        preds = []
        y = []
        for pth in glob(data_path):
            img = Image.open(pth)
            results = model(img)
            preds.append(results.xyxy[0].shape[0])
            f = open(pth.replace('images', 'labels')[:-3] + 'txt', 'r')
            y.append(len(list(f)))
            f.close()
        
        rmse_now = mse(y, preds, squared=False)
        if rmse_best > rmse_now:
            rmse_best = rmse_now
            best_conf = conf_value
        
        print(f'RMSE = {rmse_now}, {conf_value} - conf')
    
    return best_conf

5.1.2 Function to find the best iou

In [None]:
def selection_best_iou(weight_path, data_path, conf):
    model = torch.hub.load('ultralytics/yolov5', 'custom', path=weight_path, force_reload=True)
    model.conf = conf
    model.agnostic = True

    rmse_now = 0
    rmse_best = float('inf')
    best_iou = 0
    for iou_value in np.linspace(0, 1, 45):
        model.iou = iou_value
        preds = []
        y = []
        for pth in glob(data_path):
            img = Image.open(pth)
            results = model(img)
            preds.append(results.xyxy[0].shape[0])
            f = open(pth.replace('images', 'labels')[:-3] + 'txt', 'r')
            y.append(len(list(f)))
            f.close()
        
        rmse_now = mse(y, preds, squared=False)
        if rmse_best > rmse_now:
            rmse_best = rmse_now
            best_iou = iou_value
        
        print(f'RMSE = {rmse_now}, {iou_value} - iou')

    return best_iou

5.2 Saving best conf and best iou

In [None]:
best_conf = selection_best_conf('runs/train/exp/weights/epoch50.pt', 'valid/images/*')
best_iou = selection_best_iou('runs/train/exp/weights/epoch50.pt', 'valid/images/*', best_conf)

5.3 Creating matrices to find the best rmse

5.3.1 Creating a function to iterate over conf and iou and calculate rmse for each combination

In [None]:
def making_data_for_matrix(weight_path, data_path):
    model = torch.hub.load('ultralytics/yolov5', 'custom', path=weight_path, force_reload=True)

    best_conf = -float('inf')
    best_iou = -float('inf')
    rmse_typical = float('inf')
    rmse_list = []
    rmse_prefinal = []

    for iou_value in np.linspace(0, 1, 45):
        for conf_value in np.linspace(0, 1, 45):
            preds = []
            y = []

            model.conf = conf_value
            model.iou = iou_value
            model.agnostic = True

            for pth in glob(data_path):
                img = Image.open(pth)
                results = model(img)
                preds.append(results.xyxy[0].shape[0])
                f = open(pth.replace('images', 'labels')[:-3] + 'txt', 'r')
                y.append(len(list(f)))
                f.close()

            rmse_now = (mse(y, preds, squared=False)**0.5)
            rmse_list.append(rmse_now)


            print(f'RMSE = {(mse(y, preds, squared=False))}, {iou_value} - iou, {conf_value} - conf')

            if rmse_typical > rmse_now:
                rmse_typical = rmse_now
                best_conf = conf_value
                best_iou = iou_value

        rmse_prefinal.append(rmse_list)

    rmse_final = [(round(x, 3)) for x in rmse_list]
    rmse_final = np.array(rmse_final).reshape(45,45)[:, 1:]*-1

    iou_final = [(round(x, 3)) for x in (np.linspace(0, 1, 45))]
    conf_final = [(round(x, 3)) for x in (np.linspace(0, 1, 45))]

    plt.figure(figsize=(45, 45)) 

    matrix = sns.heatmap(rmse_final[::-1], annot=True, cmap='gist_heat')

    matrix.xaxis.set_ticklabels(conf_final[1:])
    matrix.yaxis.set_ticklabels(iou_final[::-1])

    plt.title("Conf and Iou combination")
    plt.xlabel("Сonf")
    plt.ylabel("Iou")
    plt.show()

    return rmse_final, iou_final, conf_final

5.3.2 Saving the results of the function

In [None]:
rmse_list, best_iou_2, best_conf_2 = making_data_for_matrix('runs/train/exp/weights/epoch50.pt', 'valid/images/*')

# <b>6. Creating a submit<b>

P.S I was engaged in a long enumeration of conf and iou and found out that for the best public score it is worth choosing <b><u>conf=0.241</u></b> but unfortunately, I did not go over iou, so to confirm my result, I ask the father-in-law only with `model.conf=0.241`

6.1 Creating a function to create a submit

In [None]:
def submit_file(weight_path, test_path, conf, iou, submit_name):
    model = torch.hub.load('ultralytics/yolov5', 'custom', path=weight_path, force_reload=True)
    model.conf = conf
    model.iou = iou
    model.agnostic = True

    paths = []
    preds = []
    for pth in tqdm(glob(test_path)):
        img = Image.open(pth)
        results = model(img)
        boxes = results.xyxy[0].tolist()
        
        for coords in boxes:
            if coords[2] - coords[0] > 5 and coords[3] - coords[1] > 5:
                pass
            else:
                boxes.remove(coords)

        preds.append(len(boxes))
        paths.append(pth.split('/')[-1])

    df = pd.DataFrame(list(zip(paths, preds)), columns=['filename', 'num'])
    df.to_csv(submit_name, index=False)

6.2 Calling a function to save a submit

In [None]:
submit_file('runs/train/exp/weights/epoch50.pt', 'test/*', best_conf, best_iou, 'submit.csv')

# <b>Now you have my best solution on the public leaderboard<b>

![GIFY](https://y.yarn.co/0b93028c-fc87-4c55-84d4-8e59be81cd73_text.gif)

# <b>P.S if the cut turns out to be different, then replace batch with 8</b>