# Training the Object Detection Model

This notebook holds the logic to teain an object detection model for detecting biomarkers from hair root micrographs.

In [None]:
from imageai.Detection.Custom import DetectionModelTrainer
from imageai.Detection.Custom import CustomObjectDetection
import boto3
import botocore
import pandas as pd
import cv2
import os
import random
import logging
import json

Let's define variables that we will need in the following code snippets.

In [None]:
labelling_filenames = ['Add your labelling csv files']
allowed_labels = ['Add your labels']
images_bucket_name = 'your-img-bucket'
training_data_directory = 'your-training-data-dir'

We need to separate the data into training and validation datasets. We will use a randomizer to divide the data about 20/80 between validation and training data. The data from given csv file will be converted to the YOLO format and the corresponding images will be downloaded from S3.

In [None]:
s3 = boto3.resource('s3')
img_bucket = s3.Bucket(images_bucket_name)

def download_image_from_bucket(key: str, filename:str):
    try:
        img_bucket.download_file(key, filename)
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == "404":
            print("The object does not exist.")
        else:
            raise

def minmax_label_to_yolo(label, image_height, image_width):
    yolo_label = []
    if label['label'] == 'Class1':
        class_index = '0'
    elif label['label'] == 'Class2':
        class_index = '1'
    elif label['label'] == 'Class3':
        class_index = '2'
    else:
        print('No such class being trained: ' + label['label'])
    
    label_width_in_pixels = label['x2']-label['x1']
    label_height_in_pixels = label['y2']-label['y1']
    yolo_label.append(class_index)
    yolo_label.append(str(round((label['x1'] + label_width_in_pixels / 2) / image_width, 5)))
    yolo_label.append(str(round((label['y1'] + label_height_in_pixels / 2) / image_height, 5)))
    yolo_label.append(str(round(label_width_in_pixels / image_width, 5)))
    yolo_label.append(str(round(label_height_in_pixels / image_height, 5)))

    return ' '.join(yolo_label)

def load_training_data(csv_path):
    df = pd.read_csv(csv_path)

    for index, row in df.iterrows():
        if random.uniform(0, 1) > 0.2:
            directory_path = training_data_directory + '/train/'
        else:
            directory_path = training_data_directory + '/validation/'

        filename = row['image'].split('/')[-1]
        labels = json.loads(row['label'])
        labels = list(filter(lambda x: x['label'] in allowed_labels, labels))

        image_folder = f'{directory_path}images/'
        image_path = f'{image_folder}{filename}'
        download_image_from_bucket(row['image'], image_path)
        image = cv2.imread(image_path)

        yolo_labels = []
        for label in labels:

            yolo_label = minmax_label_to_yolo(label, image.shape[0], image.shape[1])
            if len(yolo_labels) > 0:
                yolo_label = '\n' + yolo_label
            yolo_labels.append(yolo_label)

        labels_path = directory_path + 'annotations/'
        labels_file = filename.replace('.jpg', '.txt')
        with open(labels_path + labels_file, 'w') as f:
            f.writelines(yolo_labels)

In [None]:
for filename in labelling_filenames:
    load_training_data(filename)

In [None]:
trainer = DetectionModelTrainer()
trainer.setModelTypeAsYOLOv3()
trainer.setDataDirectory(data_directory=training_data_directory)
trainer.setTrainConfig(object_names_array=['your-class-names'], batch_size=16, num_experiments=200, train_from_pretrained_model='yolov3.pt')
trainer.trainModel()

In [None]:
trainer = DetectionModelTrainer()
trainer.setModelTypeAsYOLOv3()
trainer.setDataDirectory(data_directory=training_data_directory)
trainer.evaluateModel(model_path=f"{training_data_directory}/models", json_path=f"{training_data_directory}/json/object_detection_training_data_yolov3_detection_config.json", iou_threshold=0.5, object_threshold=0.9, nms_threshold=0.5)

In [None]:
detector = CustomObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath(f'{training_data_directory}/models/yolov3_object_detection_training_data_mAP-0.17144_epoch-11.pt')
detector.setJsonPath(f'{training_data_directory}/json/object_detection_training_data_yolov3_detection_config.json')
detector.loadModel()

for filename in os.listdir(f'{training_data_directory}/validation/images'):
    detections = detector.detectObjectsFromImage(input_image=f'{training_data_directory}/validation/images/{filename}', output_image_path=f'detected/{filename}', minimum_percentage_probability=5)
    print(detections)