<div style="text-align:center"><img src="https://www.ftmc.com.au/wp-content/uploads/2020/01/slide2-1080x506.jpg">
    <h1 style="text-align:center; 
               font-weight:bold; 
               position: absolute; 
               top: 50%; left: 50%; 
               transform: translate(-50%, -50%); 
               font-size:500%; 
               color:white">
        Abnormalities Detection Using YOLOv4</h1>

</div>

# Import required packages

In [None]:
import os
import shutil
import threading
import time
import random
import glob
from tqdm import tqdm
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import cv2

import tensorflow as tf
from tensorflow import keras
print('Tensorflow version: %s' % tf.__version__)

> Copy **dataset** to working folder for easy compiling

In [None]:
src = '../input/vinbigdata-512-image-dataset'
dst = './vinbigdata-512-image-dataset'

    
# Get total number of files from src folder
src_files_count = 0
for (_, _, files) in os.walk(src):
    src_files_count += len(files)

    
def copied_check(src_files_count, dst):
    dst_files_count = 0
    for (_, _, files) in os.walk(dst):
        dst_files_count += len(files)
    return src_files_count == dst

def check_on_progress(src_files_count, dst):
    dst_files_count = 0
    logged = 0
    while(dst_files_count < src_files_count):
        dst_files_count = 0
        for (_, _, files) in os.walk(dst):
            dst_files_count += len(files)
        percentage = int(dst_files_count/src_files_count * 100)
        if percentage % 10 == 0 and percentage > logged:
            print('Percentage: {:d}%'.format(percentage))
            logged = percentage
    
def copy_dir(src, dst):
    print('Start copying')
    shutil.copytree(src, dst)
    time.sleep(0.1)
    print('Done!')
    print(dst)

if not copied_check(src_files_count, dst):
    if os.path.exists(dst):
        shutil.rmtree(dst)
    # Start the copying procedure on a separate thread
    cp = threading.Thread(name='copy', target=copy_dir, args=(src, dst))
    cp.start()
    # Start the checking on a separate thread
    ch = threading.Thread(name='check', target=check_on_progress, args=(src_files_count, dst))
    ch.start()
else:
    print('Dataset has been already copied!')

> Copy **darknet** to working folder for writing files, making backup, config, etc.

In [None]:
darknet_src = '../input/darknetyolo'
darknet_dst = './darknetyolo'

# Get total number of files from src folder
d_src_files_count = 0
for (_, _, files) in os.walk(darknet_src):
    d_src_files_count += len(files)
    
if not copied_check(d_src_files_count, darknet_dst):
    if os.path.exists(darknet_dst):
        shutil.rmtree(darknet_dst)
    # Start the copying procedure on a separate thread
    cp_d = threading.Thread(name='copy_darknet', target=copy_dir, args=(darknet_src, darknet_dst))
    cp_d.start()
    # Start the checking on a separate thread
    ch_d = threading.Thread(name='check_darknet', target=check_on_progress, args=(d_src_files_count, darknet_dst))
    ch_d.start()
else:
    print('Dataset has been already copied!')

> Sanity check

In [None]:
!du -sh ../input/darknetyolo
!du -sh ./darknetyolo
!du -sh ../input/vinbigdata-512-image-dataset
!du -sh ./vinbigdata-512-image-dataset

# EDA + Preprocessing data

Configuration

In [None]:
IMG_WIDTH = 512
IMG_HEIGHT = 512
INPUT_SIZE = (512, 512)

Check the `.csv` files

In [None]:
TRAIN_DIR = './vinbigdata-512-image-dataset/vinbigdata/train'
TEST_DIR = './vinbigdata-512-image-dataset/vinbigdata/test'

train_df = pd.read_csv('./vinbigdata-512-image-dataset/vinbigdata/train.csv')
test_df = pd.read_csv('./vinbigdata-512-image-dataset/vinbigdata/test.csv')

train_df.head(10)

> Drop "no finding" images which are no useful for training process

In [None]:
train_df = train_df[train_df['class_id']!=14].reset_index(drop=True)

# Retrieve duplicated image names (which have more than 1 annotations)
train_image_names = pd.unique(train_df['image_id']).tolist()

NUM_TRAIN_FILES = len(train_image_names)
NUM_TEST_FILES = len(test_df)
print(f'Dataset has {len(train_df)} elements after removing normal records.')
print(f'Number of training files:\t{NUM_TRAIN_FILES}')
print(f'Number of testing files:\t{NUM_TEST_FILES}')

In [None]:
# Creating LabelMap
label_map = train_df.loc[:, ["class_name", "class_id"]]
label_map = label_map.drop_duplicates().reset_index(drop = True)
N_CLASSES = len(label_map)
label_map = label_map.sort_values(by=['class_id']).reset_index()['class_name']
label_map

In [None]:
plt.figure(figsize=(10, 10))
plt.grid(axis='x')
sns.countplot(data=train_df, y='class_name')

> Normalize annotations and write to `.txt` files for YOLO training

In [None]:
for name in tqdm(train_image_names):
    element = train_df[train_df['image_id']==name].reset_index()
    
    # Extract and normalize annotations
    class_id = element['class_id']
    x_cen = 1/2 * (element['x_max'] + element['x_min']) / element['width']
    y_cen = 1/2 * (element['y_max'] + element['y_min']) / element['height']
    w = (element['x_max'] - element['x_min']) / element['width']
    h = (element['y_max'] - element['y_min']) / element['height']
    
    
    with open(os.path.join(TRAIN_DIR, name + '.txt'), 'w') as f:
        for i in range(len(element)):
            line = f'{class_id[i]} {x_cen[i]} {y_cen[i]} {w[i]} {h[i]}'
            f.write(line)
            if i < len(element) - 1:
                f.write('\n')
                
print('Done!')

> Do a sanity check

In [None]:
indices = np.random.randint(NUM_TRAIN_FILES, size=4)
for name in train_df['image_id'][indices]:
    element = train_df[train_df['image_id']==name]
    with open(os.path.join(TRAIN_DIR, name + '.txt'), 'r') as f:
        if len(f.readlines()) == len(element):
            print(f'Correctly writing in file `{name}.txt`')

In [None]:
!cat ./vinbigdata-512-image-dataset/vinbigdata/train/fb929e0efd696fe0f54902da5e7ec57a.txt

In [None]:
row = 4
col = 4
indices = np.random.randint(len(train_image_names), size=row*col)

plt.figure(figsize=(20, 20))
for i in tqdm(range(row*col)):
    plt.subplot(row, col, i+1)
    img = plt.imread(os.path.join(TRAIN_DIR, train_df['image_id'][indices[i]] + '.png'))
    plt.imshow(img, cmap='gray')
    plt.xticks([])
    plt.yticks([])

> Random colors represent for specific classes

In [None]:
random_r = [random.uniform(0, 1) for _ in range(N_CLASSES)]
random_g = [random.uniform(0, 1) for _ in range(N_CLASSES)]
random_b = [random.uniform(0, 1) for _ in range(N_CLASSES)]

color_map_with_label = list(zip(random_r, random_g, random_b))
print('\n'.join(map(str, color_map_with_label)))

Helper function for plotting annotations

In [None]:
def plot_boxes(img_id, directory, ax=None):
    img_path = os.path.join(directory, img_id + '.png')
    anno_path = os.path.join(directory, img_id + '.txt')
    
    # Read image
    img = plt.imread(img_path)
    
    # Convert to RGB
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    
    with open(anno_path, 'r') as f:
        box_infos = f.readlines()
        class_id_list = []
        for box_info in box_infos:
            class_id, x_cen, y_cen, w, h = list(map(float, box_info.split()))
            class_id = int(class_id)
            class_id_list.append(class_id)
            xmin = int((x_cen - w/2)*IMG_WIDTH)
            ymin = int((y_cen - h/2)*IMG_HEIGHT)
            xmax = int((x_cen + w/2)*IMG_WIDTH)
            ymax = int((y_cen + h/2)*IMG_HEIGHT)
            cv2.rectangle(
                img, 
                pt1=(xmin, ymin), 
                pt2=(xmax, ymax), 
                color=color_map_with_label[class_id], 
                thickness=2
            )
            cv2.putText(
                img, 
                label_map[class_id], 
                (xmin, ymin-5), 
                cv2.FONT_HERSHEY_SIMPLEX, 
                0.5, 
                color_map_with_label[class_id], 
                1
            )
    
    if ax:
        ax.imshow(img)
        ax.set_title(f'{len(box_infos)} abnormalities detected belonging to {len(set(class_id_list))} classes')
    else:
        plt.figure(figsize=(10, 10))
        plt.imshow(img)
        plt.axis('off')
        plt.title(f'{len(box_infos)} abnormalities detected belonging to {len(set(class_id_list))} classes')

In [None]:
indices = random.sample(range(NUM_TRAIN_FILES), 2)

fig, axes = plt.subplots(1, 2, figsize=(20, 10))
plot_boxes(train_image_names[indices[0]], TRAIN_DIR, axes[0])
plot_boxes(train_image_names[indices[1]], TRAIN_DIR, axes[1])

# Setup configurations for YOLOv4

> Create `train.txt` and `val.txt`

In [None]:
data_paths = [os.path.join(TRAIN_DIR, image_name + '.png') for image_name in train_image_names]
darknet_path = '/kaggle/working/darknetyolo'
# Split into training and validation
split = int(len(data_paths) * 0.8)
train_data_paths = data_paths[:split]
val_data_paths = data_paths[split:]

# Write to files
with open(os.path.join(darknet_path, 'train.txt'), 'w') as f:
    f.write('\n'.join(train_data_paths))
    
with open(os.path.join(darknet_path, 'val.txt'), 'w') as f:
    f.write('\n'.join(val_data_paths))

> Set darknet config folder as read-only for preservation

In [None]:
!chmod 0444 ./darknetyolo/cfg

In [None]:
class ModelConfiguration():
    def __init__(self, input_size, class_names=None, darknet_path=None, backup=True):
        self.input_size = input_size
        self.class_names = class_names
        self.n_classes = len(class_names)
        self.config_files = ['obj.names', 'obj.data']
        self.darknet_path = darknet_path
        if backup:
            self.backup_path = os.path.join(self.darknet_path, 'backup')
            try:
                # Create back-up folder
                os.mkdir(self.backup_path)
            except:
                pass
    
    def create_config_files(self):
        config_paths = [os.path.join(self.darknet_path, config_file) for config_file in self.config_files]
        
        # Writing `obj.names`
        with open(config_paths[0], 'w') as f:
            for i in range(self.n_classes):
                f.write(self.class_names[i])
                if i < self.n_classes - 1:
                    f.write('\n')
                    
        # Writing `obj.data`
        with open(config_paths[1],'w') as f:
            f.write(f'class={self.n_classes}\n')
            f.write('train=' + os.path.join(self.darknet_path, 'train.txt') + '\n')
            f.write('valid=' + os.path.join(self.darknet_path, 'val.txt') + '\n')
            f.write('names=' + config_paths[0] + '\n')
            f.write('backup=' + self.backup_path)
            
    def config_model(self, lines_with_contents):
        '''Modify the model configuration file at certain lines
        
        Args: 
            `lines_with_contents`: dictionary, in which an element contains number of line and its respectively content
        '''
        sorted_keys = list(lines_with_contents.keys())
        sorted_keys.sort()
        
        standard_yolocfg_path = os.path.join(self.darknet_path, 'cfg', 'yolov4-custom.cfg')
        yolocfg_path = os.path.join(self.darknet_path, f'yolov4-{self.n_classes}c-{self.input_size}.cfg')
        
        with open(standard_yolocfg_path, 'r') as f:
            line = f.readline()
            modified_line = None
            modified_f = open(yolocfg_path, 'w')
            
            num_line = 1
            idx = 0
            while line:
                if idx < len(sorted_keys):
                    if num_line == sorted_keys[idx]:
                        modified_line = lines_with_contents[sorted_keys[idx]]
                        print('Changed `{}` into `{}`'.format(line.strip(), modified_line.strip()))
                        idx += 1
                modified_line = line
                modified_f.write(modified_line)
                line = f.readline()
                if line:
                    modified_f.write('\n')
                num_line += 1

In [None]:
config = ModelConfiguration(
    INPUT_SIZE, 
    class_names=label_map, 
    darknet_path='./darknetyolo', 
    backup=True
)

# Create `obj.data` and `obj.names` 
config.create_config_files()

# Customize config file for model
lines_with_contents = {
    # Define number of classes
    970: f'classes={N_CLASSES}', 
    1058: f'classes={N_CLASSES}', 
    1146: f'classes={N_CLASSES}',
    
    # Batch size and Subdivisions
    6: f'batch={64}',
    7: f'subdivisions={16}',
    
    # Input size
    8: f'width={IMG_WIDTH}',
    9: f'height={IMG_HEIGHT}',
    
    # Max batches
    20: f'max_batches={20000}',
    
    # Steps
    22: f'steps={16000},{18000}',
    
    # Burn-in
    19: f'burn_in={500}',
    
    # Filters before YOLO blocks
    963: f'filters={57}',
    1051: f'filters={57}',
    1139: f'filters={57}'
    
    
}
config.config_model(lines_with_contents)

In [None]:
!sed -n '1139p' darknetyolo/cfg/yolov4-custom.cfg

# Build darknet

In [None]:
%cd /kaggle/working/darknetyolo
!chmod +x ./darknet

In [None]:
!/usr/local/cuda/bin/nvcc --version

!sed -i 's/OPENCV=0/OPENCV=1/' Makefile
!sed -i 's/GPU=0/GPU=1/' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile
!sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1/' Makefile

> Make darknet

In [None]:
!make

# Train the Object Detector

In [None]:
!ls kaggle/working