# YOLOv5 Training Notebook
### (continue from [yolov5 data setup notebook](https://www.kaggle.com/amirsher/x-ray-with-yolov5-data-setup))
## Yolov5 references
Official Ultralytics YoloV5 Kaggle notebook: [Yolov5-kaggle](https://www.kaggle.com/ultralytics/yolov5-ultralytics)

YOLOv5 implementation notebook: [Yolo v5 Object Detection Tutorial](https://jooskorstanje.com/yolov5-training-a-custom-object-detection-model.html)

YOLOv5 Tutorial: [https://towardsdatascience.com/yolo-v5-object-detection-tutorial-2e607b9013ef](https://towardsdatascience.com/yolo-v5-object-detection-tutorial-2e607b9013ef)

Ultralitics YOLOV5 wiki: [https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data)

Another tutorial: [https://medium.com/towards-artificial-intelligence/yolo-v5-object-detection-on-a-custom-dataset-61d478bc08f9](https://medium.com/towards-artificial-intelligence/yolo-v5-object-detection-on-a-custom-dataset-61d478bc08f9)

And another: [https://lionbridge.ai/articles/create-an-end-to-end-object-detection-pipeline-using-yolov5/](https://lionbridge.ai/articles/create-an-end-to-end-object-detection-pipeline-using-yolov5/)

In [None]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
from os import listdir, makedirs, symlink, chdir
from os.path import isfile, join

from shutil import copyfile, rmtree
from glob import glob

## Setup directories

In [None]:
# Input data is generated in my other notebook x-ray-with-yolov5-data-setup
input_path = '/kaggle/input/x-ray-with-yolov5-data-setup/'
working_path = '/kaggle/working'
training_weights_path = '/kaggle/input/yolo5-train-epoch20/' # weights from previous 20 epochs training

in_images_path = join(input_path,'images')
in_labels_path = join(input_path,'labels')

# Working paths for yolov5
yolo_path = join(working_path,'yolov5')
yolo_runs = join(yolo_path,'runs')
data_path = join(working_path,'data')
images_path = join(data_path,'images')
labels_path = join(data_path,'labels')
images_train_path = join(images_path,'train')
images_val_path = join(images_path,'valid')
labels_train_path = join(labels_path,'train')
labels_val_path = join(labels_path,'valid')

new_train_yaml = join(working_path,'new_train_yaml')
new_data_yaml = join(working_path,'new_data_yaml')
hyp_scratch_yaml = join(working_path,'hyp.scratch.yaml')



In [None]:
len(listdir(in_images_path))
len(listdir(in_labels_path))

# Arrange train-valid data

## Split train-val using multi label stratification

In [None]:
# Get library for stratification train-test split
from skmultilearn.model_selection.measures import get_combination_wise_output_matrix
from skmultilearn.model_selection import iterative_train_test_split

In [None]:
# Read labels and construct label matrix

num_images = len(listdir(in_labels_path))
num_labels = 14
images = []
labels = np.zeros((num_images,num_labels))
image_num = 0

for root, dirs, files in os.walk(in_labels_path, topdown=True):
        for name in files:
            if name.endswith(".txt"):
                image_id = name[:-4]
                images.append(image_id)
                lbl_file = join(root, name)
                f = open(lbl_file, 'r')
                for line in f:
                    if len(line)>0:
                        lbl = int(line.split()[0])
                        labels[image_num,lbl] += 1          
                f.close()
                image_num +=1
    

In [None]:
X = np.arange(num_images).reshape(num_images,1)
y = labels

print(X.shape)
print(y.shape)

In [None]:
X_train, y_train, X_val, y_val = iterative_train_test_split(X, y, test_size = 0.2)
X_train_size = X_train.shape[0]
X_val_size = X_val.shape[0]
print("X_train size:", X_train_size)
print("X_val size:",X_val_size)

## Create links to train and validation data

In [None]:
# Clear data
rmtree(images_path, ignore_errors=True)
rmtree(labels_path, ignore_errors=True)
    
# Create directories
makedirs(images_train_path, exist_ok = True)
makedirs(images_val_path, exist_ok = True)
makedirs(labels_train_path, exist_ok = True)
makedirs(labels_val_path, exist_ok = True)

In [None]:
# Create links to training images and labels

for i in X_train.reshape(X_train_size,).tolist():
    image_id = images[i]
    src_img = join(in_images_path, image_id+'.jpg')
    src_lbl = join(in_labels_path, image_id+'.txt')
    dst_img = join(images_train_path, image_id+'.jpg')
    dst_lbl = join(labels_train_path, image_id+'.txt')
    symlink(src_img, dst_img)
    symlink(src_lbl, dst_lbl)

In [None]:
# Create links to validation images and labels

for i in X_val.reshape(X_val_size,).tolist():
    image_id = images[i]
    src_img = join(in_images_path, image_id+'.jpg')
    src_lbl = join(in_labels_path, image_id+'.txt')
    dst_img = join(images_val_path, image_id+'.jpg')
    dst_lbl = join(labels_val_path, image_id+'.txt')
    symlink(src_img, dst_img)
    symlink(src_lbl, dst_lbl)

# Prepare YoloV5

## Clone Ultralytics/yolov5 github repo

In [None]:
!git clone https://github.com/ultralytics/yolov5  # clone repo
!pip install -qr yolov5/requirements.txt  # install dependencies
!pip uninstall -y wandb  # open wandb bugs 

import torch

print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

## Download pretrained YOLOv5 weights

In [None]:
## Download pretrained weights
#chdir(yolo_path)
#from utils import google_utils

#weights = 'yolov5s.pt'
#google_utils.attempt_download(weights)

In [None]:
# copy the weight file created previously in training
weights = 'best.pt'
copyfile(join(training_weights_path,weights), join(yolo_path,weights))

## configure the YAML files for training a Yolo V5 Object Detection Model

In [None]:
with open(new_train_yaml, 'w+') as file:
    file.write(
        """
        # parameters
        nc: 14  # number of classes
        depth_multiple: 0.33  # model depth multiple
        width_multiple: 0.50  # layer channel multiple

        # anchors
        anchors:
          - [10,13, 16,30, 33,23]  # P3/8
          - [30,61, 62,45, 59,119]  # P4/16
          - [116,90, 156,198, 373,326]  # P5/32

        # YOLOv5 backbone
        backbone:
          # [from, number, module, args]
          [[-1, 1, Focus, [64, 3]],  # 0-P1/2
           [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
           [-1, 3, BottleneckCSP, [128]],
           [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
           [-1, 9, BottleneckCSP, [256]],
           [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
           [-1, 9, BottleneckCSP, [512]],
           [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
           [-1, 1, SPP, [1024, [5, 9, 13]]],
           [-1, 3, BottleneckCSP, [1024, False]],  # 9
          ]

        # YOLOv5 head
        head:
          [[-1, 1, Conv, [512, 1, 1]],
           [-1, 1, nn.Upsample, [None, 2, 'nearest']],
           [[-1, 6], 1, Concat, [1]],  # cat backbone P4
           [-1, 3, BottleneckCSP, [512, False]],  # 13

           [-1, 1, Conv, [256, 1, 1]],
           [-1, 1, nn.Upsample, [None, 2, 'nearest']],
           [[-1, 4], 1, Concat, [1]],  # cat backbone P3
           [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)

           [-1, 1, Conv, [256, 3, 2]],
           [[-1, 14], 1, Concat, [1]],  # cat head P4
           [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)

           [-1, 1, Conv, [512, 3, 2]],
           [[-1, 10], 1, Concat, [1]],  # cat head P5
           [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)

           [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
          ]
        """
    )

In [None]:
with open(new_data_yaml, 'w+') as file:
    file.write(
        """
        train: """\
        + images_train_path  \
        + """
        val: """\
        + images_val_path  \
        + """

        nc: 14
        names: ['Aortic enlargement', 'Atelectasis', 'Calcification', 'Cardiomegaly', 'Consolidation', 'ILD', 'Infiltration', 'Lung Opacity', 'Nodule/Mass', 'Other lesion', 'Pleural effusion', 'Pleural thickening', 'Pneumothorax', 'Pulmonary fibrosis']
        """
    )

## configure hyp.scratch.yaml for correct Augmentation

In [None]:
# changing augmentation of fliplr to 0 (was 0.5)

with open(hyp_scratch_yaml, 'w+') as file:
    file.write(
        """
        # Hyperparameters for COCO training from scratch
        # python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
        # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials

        lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
        lrf: 0.2  # final OneCycleLR learning rate (lr0 * lrf)
        momentum: 0.937  # SGD momentum/Adam beta1
        weight_decay: 0.0005  # optimizer weight decay 5e-4
        warmup_epochs: 3.0  # warmup epochs (fractions ok)
        warmup_momentum: 0.8  # warmup initial momentum
        warmup_bias_lr: 0.1  # warmup initial bias lr
        box: 0.05  # box loss gain
        cls: 0.5  # cls loss gain
        cls_pw: 1.0  # cls BCELoss positive_weight
        obj: 1.0  # obj loss gain (scale with pixels)
        obj_pw: 1.0  # obj BCELoss positive_weight
        iou_t: 0.20  # IoU training threshold
        anchor_t: 4.0  # anchor-multiple threshold
        # anchors: 3  # anchors per output layer (0 to ignore)
        fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
        hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
        hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
        hsv_v: 0.4  # image HSV-Value augmentation (fraction)
        degrees: 0.0  # image rotation (+/- deg)
        translate: 0.1  # image translation (+/- fraction)
        scale: 0.5  # image scale (+/- gain)
        shear: 0.0  # image shear (+/- deg)
        perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
        flipud: 0.0  # image flip up-down (probability)
        fliplr: 0.0  # image flip left-right (probability)
        mosaic: 1.0  # image mosaic (probability)
        mixup: 0.0  # image mixup (probability)
        """
    )
    
copyfile(hyp_scratch_yaml, join(yolo_path,'data','hyp.scratch.yaml'))

## Train the network

In [None]:
# Weights & Biases (optional)
!pip install -q wandb  
!wandb login  f2c61dceec2498843db52cce6699af52370fcf95 # use 'wandb disabled' or 'wandb enabled' to disable or enable

chdir(yolo_path)
!wandb online

In [None]:
proj_name = 'xray'
epochs = 80

### First training session with downloaded weights

In [None]:
!python train.py --img 512 --batch 16 --epochs $epochs --data $new_data_yaml --cfg $new_train_yaml --weights $weights --exist-ok --name $proj_name

In [None]:
!ls -R $yolo_runs

In [None]:
#from IPython.display import Image, clear_output  # to display images
#Image(filename=os.path.join(yolo_runs,'train/xray3','confusion_matrix.png'), width=700)