Reference: [Ultralytics YoloV5 github repository](https://github.com/ultralytics/yolov5)

# Imports

In [1]:
import os
import glob as glob
import matplotlib.pyplot as plt
import cv2
import requests #to download some data from internet
import random
import numpy as np

SEED = 42
np.random.seed(SEED)

In [2]:
!pwd

/home/jovyan/public/b_it_bot_work/2d_perception


## Prepare the Dataset


Dataset for Summer 2022 competition Source: [HBRS Bib cloud](https://bib-cloud.bib.hochschule-bonn-rhein-sieg.de/apps/files/?dir=/Shared/b-it-bots-ds/atwork/images/object_detection/YOLO/internal_robocup_2022/FULL_DATASET_SS22_COMPETITION&fileid=14231157)

The dataset is structured in the following manner:

```
├── dataset_ss22_v4.yaml
├── README.md
├── dataset_ss22_v4
        train
        ├── images
        └── labels
        valid
        ├── images
        └── labels

```

### The Dataset YAML File

The dataset YAML (`dataset_ss22_v4.yaml`) file containing the path to the training and validation images and labels. This file will also contain the class names from the dataset.

The dataset contains 20 classes.

The following block shows the contents of the `dataset_ss22_v4.yaml` file.

```yaml
train: ../dataset_ss22_v4/train/images
val: ../dataset_ss22_v4/valid/images

nc: 20

names: ['F20_20_B', 'R20', 'S40_40_B', 'S40_40_G', 'axis', 'bearing_box', 'bracket', 'brown_box', 'cup', 'dishwasher_soap', 'eye_glasses', 'insulation_tape', 'motor', 'pringles', 'screw_driver', 'sponge', 'spoon', 'tennis_ball', 'toothbrush', 'towel']
```

### Visualize a Few Ground Truth Images

In YOLO format, [x_center, y_center, width, height]


```
A------------------------
-------------------------
-------------------------
-------------------------
-------------------------
------------------------B
```

In Bounding Box format, A [x_min, y_min] and B [x_max, y_max].


Visualize 4 random samples from Dataset

[Reference](https://www.youtube.com/watch?v=Ciy1J97dbY0&ab_channel=LearnOpenCV)

In [3]:
class_names = ['F20_20_B', 'R20', 'S40_40_B', 'S40_40_G', 'axis', 'bearing_box', 'bracket', 'brown_box', 'cup', 
               'dishwasher_soap', 'eye_glasses', 'insulation_tape', 'motor', 'pringles', 'screw_driver', 'sponge', 
               'spoon', 'tennis_ball', 'toothbrush', 'towel']
colors = np.random.uniform(0, 255, size=(len(class_names), 3))

In [4]:
# Function to convert bounding boxes in YOLO format to xmin, ymin, xmax, ymax.
def yolo2bbox(bboxes):
    xmin, ymin = bboxes[0]-bboxes[2]/2, bboxes[1]-bboxes[3]/2
    xmax, ymax = bboxes[0]+bboxes[2]/2, bboxes[1]+bboxes[3]/2
    return xmin, ymin, xmax, ymax

In [None]:
def plot_box(image, bboxes, labels):
    # Need the image height and width to denormalize
    # the bounding box coordinates
    h, w, _ = image.shape
    for box_num, box in enumerate(bboxes):
        x1, y1, x2, y2 = yolo2bbox(box)
        # denormalize the coordinates
        xmin = int(x1*w)
        ymin = int(y1*h)
        xmax = int(x2*w)
        ymax = int(y2*h)
        width = xmax - xmin
        height = ymax - ymin

        class_name = class_names[int(labels[box_num])]

        cv2.rectangle(
            image,
            (xmin, ymin), (xmax, ymax),
            color=colors[class_names.index(class_name)],
            thickness=2
        )

        font_scale = min(1, max(3, int(w/500)))
        font_thickness = min(2, max(10, int(w/50)))

        p1, p2 = (int(xmin), int(ymin)), (int(xmax), int(ymax))
        # Text width and height
        tw, th = cv2.getTextSize(
            class_name,
            0, fontScale=font_scale, thickness=font_thickness
        )[0]
        p2 = p1[0] + tw, p1[1] + -th - 10
        cv2.rectangle(
            image,
            p1, p2,
            color=colors[class_names.index(class_name)],
            thickness=-1,
        )
        cv2.putText(
            image,
            class_name,
            (xmin+1, ymin-10),
            cv2.FONT_HERSHEY_SIMPLEX,
            font_scale,
            (255, 255, 255),
            font_thickness
        )
    return image

In [None]:
# Function to plot images with the bounding boxes.
def plot(image_paths, label_paths, num_samples):
    all_training_images = glob.glob(image_paths)
    all_training_labels = glob.glob(label_paths)
    all_training_images.sort()
    all_training_labels.sort()

    num_images = len(all_training_images)

    plt.figure(figsize=(15, 12))
    for i in range(num_samples):
        j = random.randint(0, num_images-1)
        # j = 0
        image = cv2.imread(all_training_images[j])
        with open(all_training_labels[j], 'r') as f:
            bboxes = []
            labels = []
            label_lines = f.readlines()
            for label_line in label_lines:
                label = label_line.split(' ')[0]
                bbox_string = label_line.split(' ')[1:]
                x_c, y_c, w, h = bbox_string
                x_c = float(x_c)
                y_c = float(y_c)
                w = float(w)
                h = float(h.split('\n')[0])
                bboxes.append([x_c, y_c, w, h])
                labels.append(label)
        result_image = plot_box(image, bboxes, labels)
        plt.subplot(2, 2, i+1)
        plt.imshow(result_image[:, :, ::-1])
        plt.axis('off')
    plt.subplots_adjust(wspace=0)
    plt.tight_layout()
    plt.show()


In [None]:
# Visualize a few training images.
plot(
    image_paths='dataset_ss22_v4/train/images/*', 
    label_paths='dataset_ss22_v4/train/labels/*',
    num_samples=4,
)

# plot(
#     image_paths='dataset_ss22_inference/train/images/*', 
#     label_paths='dataset_ss22_inference/train/labels/*',
#     num_samples=4,
# )

## Helper Functions for Logging

The helper functions are for logging of the results in the notebook while training the models.


In [5]:
def set_res_dir():
    # Directory to store results
    #res_dir_count = len(glob.glob('runs/train/*'))
    res_dir_count = len(glob.glob('/home/jovyan/public/logs/yolo5/train/*'))
    print(f"Current number of result directories: {res_dir_count}")
    if TRAIN:
        RES_DIR = f"/home/jovyan/public/logs/yolo5/train/results_{res_dir_count+1}"
        print(RES_DIR)
    else:
        RES_DIR = f"/home/jovyan/public/logs/yolo5/train/results_{res_dir_count}"
    return RES_DIR

## Clone YOLOV5 Repository

In [6]:
if not os.path.exists('yolov5'):
    !git clone https://github.com/ultralytics/yolov5.git

In [7]:
# Change to yoloV5 directory
%cd yolov5

/home/jovyan/public/b_it_bot_work/2d_perception/yolov5


## **Function to Monitor TensorBoard logs**

**NOTE**: TensorBoard logs can be visualized with [Local port link](http://10.20.118.78:31025/#scalars&runSelectionState=eyJ5b2xvNS90cmFpbi9yZXN1bHRzXzEiOmZhbHNlLCJ5b2xvNS90cmFpbi9yZXN1bHRzXzIiOmZhbHNlLCJ5b2xvNS90cmFpbi9yZXN1bHRzXzMiOmZhbHNlLCJ5b2xvNS90cmFpbi9yZXN1bHRzXzQiOmZhbHNlLCJ5b2xvNS90cmFpbi9yZXN1bHRzXzUiOmZhbHNlLCJ5b2xvNS90cmFpbi9yZXN1bHRzXzgiOmZhbHNlLCJ5b2xvNS90cmFpbi9yZXN1bHRzXzgyIjpmYWxzZSwieW9sbzUvdHJhaW4vcmVzdWx0c18xNCI6ZmFsc2UsInlvbG81L3RyYWluL3Jlc3VsdHNfMTMiOmZhbHNlLCJ5b2xvNS90cmFpbi9yZXN1bHRzXzEyIjpmYWxzZSwieW9sbzUvdHJhaW4vcmVzdWx0c18xMSI6ZmFsc2V9)


## Training using YOLOV5

In [8]:
TRAIN = True
FREEZE = True # freezing first 15 layers
EPOCHS = 1000

##### If already trained model (.pt file) is available, then give that model path in `--weights` argument
Note: User configurable arguments
- batch size
- data yaml path
- pre-trained weight file
- image size
- epochs
- result directory
- freeze layers

In [9]:

if TRAIN:   
    if FREEZE:
        RES_DIR = set_res_dir()
    
        # training by freezing first 15 layers out of 25 layers       
#         !python train.py \
#                 --batch 256 \
#                 --data ../dataset_ss22.yaml \
#                 --weights yolov5m.pt \
#                 --img 640 \
#                 --epochs {EPOCHS} \
#                 --name {RES_DIR} \
#                 --freeze 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
        
        # trainig on multi GPUs
        !python -m torch.distributed.run --nproc_per_node 2 train.py \
                --batch 512 \
                --data ../dataset_ss22_v4.yaml \
                --weights yolov5m.pt \
                --img 640 \
                --epochs {EPOCHS} \
                --name {RES_DIR} \
                --device 0,1 \
                --freeze 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
        
        
    
#         # trainig using pretrained model (multi GPUs)
#         !python -m torch.distributed.run --nproc_per_node 2 train.py \
#                 --batch 512 \
#                 --data ../dataset_ss22_v3.yaml \
#                 --resume /home/jovyan/public/logs/yolo5/train/results_22/weights/best.pt \
#                 --img 640 \
#                 --epochs {EPOCHS} \
#                 --name {RES_DIR} \
#                 --device 0,1 \
#                 --freeze 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
    
    else:
        RES_DIR = set_res_dir()
        # training all layers of model
        !python train.py --data ../dataset_ss22_v2.yaml --weights yolov5m.pt \
        --img 640 --epochs {EPOCHS} --batch-size 256 --name {RES_DIR}
else:
    # set the RES_DIR name
    res_dir_count = '19' 
    RES_DIR = f"/home/jovyan/public/logs/yolo5/train/results_{res_dir_count}"
    print("Set RES_DIR to: ", RES_DIR)


Current number of result directories: 68
/home/jovyan/public/logs/yolo5/train/results_69
*****************************************
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. 
*****************************************
[34m[1mtrain: [0mweights=yolov5m.pt, cfg=, data=../dataset_ss22_v4.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=1000, batch_size=512, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=0,1, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=/home/jovyan/public/logs/yolo5/train/results_69, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], save_period=-1, seed=0

[34m[1mtrain: [0mScanning '/home/jovyan/public/b_it_bot_work/2d_perception/dataset_ss22_v4[0m
[34m[1mval: [0mScanning '/home/jovyan/public/b_it_bot_work/2d_perception/dataset_ss22_v4/v[0m

[34m[1mAutoAnchor: [0m5.48 anchors/target, 1.000 Best Possible Recall (BPR). Current anchors are a good fit to dataset ✅
Plotting labels to /home/jovyan/public/logs/yolo5/train/results_69/labels.jpg... 
Image sizes 640 train, 640 val
Using 16 dataloader workers
Logging results to [1m/home/jovyan/public/logs/yolo5/train/results_69[0m
Starting training for 1000 epochs...

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
      0/999      22.1G      0.118    0.05252    0.08134       1870        640:  Reducer buckets have been rebuilt in this iteration.
      0/999      20.6G     0.1182    0.05368    0.08142        276        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575   0.000

                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.215      0.359      0.224      0.105

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
     23/999      21.6G    0.04759    0.04538    0.06396        251        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.222      0.356      0.257      0.112

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
     24/999      21.6G    0.04718    0.03992    0.06239        174        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.214      0.406      0.242     0.0913

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
     25/999      21.6G    0.04628    0.04285     0.0615        235        640: 1
          

     47/999      21.6G    0.03453    0.03671    0.03372        258        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.772      0.764      0.807      0.534

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
     48/999      21.6G    0.03448    0.03397    0.03386        206        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.793      0.769      0.825      0.552

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
     49/999      21.6G    0.03408    0.03511    0.03302        211        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.764      0.773      0.817      0.528

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
     50/99

     72/999      21.6G    0.03178    0.03181    0.01901        215        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.908      0.913      0.949      0.693

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
     73/999      21.6G    0.03104    0.03118    0.01838        219        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.901      0.873      0.929      0.669

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
     74/999      21.6G    0.03005    0.03036    0.01871        230        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.908      0.918      0.949        0.7

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
     75/99

     97/999      21.6G    0.02867    0.02754    0.01394        174        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.968      0.957       0.98      0.744

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
     98/999      21.6G    0.02823    0.02972    0.01341        235        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.962      0.954      0.981       0.75

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
     99/999      21.6G    0.02879    0.02859    0.01325        194        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.963      0.952       0.98      0.758

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
    100/99

    122/999      21.6G    0.02701     0.0287    0.01032        212        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.971      0.963      0.984      0.771

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
    123/999      21.6G    0.02733    0.02891    0.01047        214        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.977       0.97      0.988      0.779

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
    124/999      21.6G    0.02712    0.02812    0.01064        221        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.962      0.962      0.985       0.78

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
    125/99

    147/999      21.6G     0.0262    0.02771   0.008983        214        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.976      0.971      0.989      0.789

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
    148/999      21.6G    0.02571     0.0279    0.00904        228        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.985      0.984      0.991       0.79

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
    149/999      21.6G    0.02657    0.02755    0.00911        214        640: 1
                 Class     Images  Instances          P          R      mAP50   
                   all        400       1575      0.984      0.975       0.99      0.802

      Epoch    GPU_mem   box_loss   obj_loss   cls_loss  Instances       Size
    150/99

## Check Out the Validation Predictions and Inference

### Visualization and Inference Utilities

In [None]:
# Function to show validation predictions saved during training.
def show_valid_results(RES_DIR):
    !ls {RES_DIR}
    EXP_PATH = f"{RES_DIR}"
    validation_pred_images = glob.glob(f"{EXP_PATH}/*_pred.jpg")
    print(validation_pred_images)
    for pred_image in validation_pred_images:
        image = cv2.imread(pred_image)
        plt.figure(figsize=(19, 16))
        plt.imshow(image[:, :, ::-1])
        plt.axis('off')
        plt.show()

The following functions are for carrying out inference on images and videos.

In [None]:
# Helper function for inference on images.
def inference(RES_DIR, data_path):
    # Directory to store inference results.
    infer_dir_count = len(glob.glob('/home/jovyan/public/logs/yolo5/detect/*'))
    print(f"Current number of inference detection directories: {infer_dir_count}")
    INFER_DIR = f"/home/jovyan/public/logs/yolo5/detect/inference_{infer_dir_count+1}"
    print(INFER_DIR)
    # Inference on images.
    !python detect.py --weights {RES_DIR}/weights/best.pt \
    --source {data_path} --name {INFER_DIR} --device 0
    return INFER_DIR

In [None]:
def visualize(INFER_DIR):
# Visualize inference images.
    INFER_PATH = f"{INFER_DIR}"
    infer_images = glob.glob(f"{INFER_PATH}/*")
    print(infer_images)
    for pred_image in infer_images:
        image = cv2.imread(pred_image)
        plt.figure(figsize=(19, 16))
        plt.imshow(image[:, :, ::-1])
        plt.axis('off')
        plt.show()

**Visualize validation prediction images.**

In [None]:
show_valid_results(RES_DIR)

### Inference
In this section, we will carry out inference on unseen images and videos from the internet. 

The images for inference are in the `inference_images` directory.

**To carry out inference on images, we just need to provide the directory path where all the images are stored, and inference will happen on all images automatically.**

In [None]:
on_single_image = True

if on_single_image:
    # Inference on single image
    IMAGE_INFER_DIR = inference(RES_DIR, '/home/jovyan/public/b_it_bot_work/2d_perception_test/inference_images/inference_img01/1562121558.622500193_raw_rgb.jpg')
else:
    # Inference on images.
    IMAGE_INFER_DIR = inference(RES_DIR, '/home/jovyan/public/b_it_bot_work/2d_perception/day3_test_images')


IMAGE_INFER_DIR

In [None]:
# IMAGE_INFER_DIR
visualize(IMAGE_INFER_DIR)

# Export model (.pt) to ONNX model (.onnx)
###### Reference: https://learnopencv.com/object-detection-using-yolov5-and-opencv-dnn-in-c-and-python/

In [None]:
!python export.py --weights /home/jovyan/public/logs/yolo5/train/results_28/weights/best.pt --include onnx