# Convert VOC to yolo format

In [15]:
# Based on https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py


import xml.etree.ElementTree as ET
import pickle
import os
import random
import shutil
from os import listdir, getcwd
from os.path import join



def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

def convert_annotation(img_file, classes, voc_annotations_path, yolo_annotations_path):
    img_file_name = '.'.join(img_file.split('.')[:-1])
    in_file = open(os.path.join(voc_annotations_path, img_file_name + '.xml'), 'r')
    out_file = open(os.path.join(yolo_annotations_path, img_file_name + '.txt'), 'w')
    tree=ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
        

In [None]:
# Download face detection dataset with VOC annotations
%cd /content/
! wget https://s3-eu-west-1.amazonaws.com/training-dl/face_detection_dataset.zip
! unzip face_detection_dataset.zip


In [24]:
# Start with a dir with images and VOC annotations folders and a list of classes names
darknet_path = './darknet'


train_images_path = './face_detection_dataset/train/images'
train_voc_annotations_path = './face_detection_dataset/train/annotations'

val_images_path = './face_detection_dataset/test/images'
val_voc_annotations_path = './face_detection_dataset/test/annotations'


classes = ["Face"]

# Generate: labels, voc.data voc.labels, train.txt val.txt


In [32]:
# Create destination dir
destination_path = './yolo_faces_tiny'
relative_path_from_darknet = '../yolo_faces_tiny'

yolo_annotations_path = os.path.join(destination_path, 'images')

if not os.path.exists(destination_path):
    os.makedirs(destination_path)
if not os.path.exists(yolo_annotations_path):
    os.makedirs(yolo_annotations_path)
if not os.path.exists(os.path.join(destination_path, 'backup')):
    os.makedirs(os.path.join(destination_path, 'backup'))
if not os.path.exists(os.path.join(destination_path, 'images')):
    os.makedirs(os.path.join(destination_path, 'images'))



In [33]:
def copytree(src, dst, symlinks=False, ignore=None):
    for item in os.listdir(src):
        s = os.path.join(src, item)
        d = os.path.join(dst, item)
        if os.path.isdir(s):
            shutil.copytree(s, d, symlinks, ignore)
        else:
            shutil.copy2(s, d)

In [34]:
#copy train images dir. Pending to improve checking extension
copytree(train_images_path, os.path.join(destination_path, 'images'))

#get list of train images filenames
images_train = os.listdir(train_images_path)

# Select only compatible files
images_train = [img_name for img_name in images_train if str.lower(img_name.split('.')[-1]) in ['jpg','jpeg','png']]

# List of annotations files available
annotations_train = [str.lower(f) for f in os.listdir(train_voc_annotations_path)]

# create train labels and train.txt
train_files_list = []
for img_file in images_train:
    img_file_name = '.'.join(img_file.split('.')[:-1])
    if str.lower(img_file_name+'.xml') in annotations_train: # if have annotation file
        convert_annotation(img_file, classes, train_voc_annotations_path, os.path.join(destination_path, 'images'))
        train_files_list += [os.path.join(relative_path_from_darknet, 'images', img_file)]
    
with open(os.path.join(destination_path, 'train.txt'), 'w') as f:
    for item in train_files_list:
        f.write("%s\n" % item)


In [39]:
#copy val images dir. Pending to improve checking extension
copytree(val_images_path, os.path.join(destination_path, 'images'))

#get list of train images filenames
images_val = os.listdir(val_images_path)

# Select only compatible files
images_val = [img_name for img_name in images_val if str.lower(img_name.split('.')[-1]) in ['jpg','jpeg','png']]

# List of annotations files available
annotations_val = [str.lower(f) for f in os.listdir(val_voc_annotations_path)]


# create valid labels and valid.txt
val_files_list = []
for img_file in images_val:
    img_file_name = '.'.join(img_file.split('.')[:-1])
    if str.lower(img_file_name+'.xml') in annotations_val: # if have annotation file
        convert_annotation(img_file, classes, val_voc_annotations_path, os.path.join(destination_path, 'images'))
        val_files_list += [os.path.join(relative_path_from_darknet, 'images',  img_file)]

with open(os.path.join(destination_path, 'val.txt'), 'w') as f:
    for item in val_files_list:
        f.write("%s\n" % item)


In [12]:
# create voc.names with the classes

with open(os.path.join(destination_path, 'voc.names'), 'w') as labels_file:
    for c in classes:
        labels_file.write(c+'\n')


In [9]:
# create voc.data with the configuration

with open(os.path.join(destination_path, 'voc.data'), 'w') as data_file:
    data_file.write('classes = '+str(len(classes))+'\n')
    data_file.write('train = '+os.path.join(relative_path_from_darknet,'train.txt')+'\n')
    data_file.write('valid = '+os.path.join(relative_path_from_darknet,'val.txt')+'\n')
    data_file.write('names = '+os.path.join(relative_path_from_darknet,'voc.names')+'\n')
    data_file.write('backup = '+os.path.join(relative_path_from_darknet,'backup'))


In [16]:
# Copy cgf file and configure it

shutil.copyfile(os.path.join(darknet_path,'cfg','yolov3-tiny_obj.cfg'), os.path.join(destination_path,'yolo-obj.cfg'))  

# To use the full model. Copy cfg of full model
#shutil.copyfile(os.path.join(darknet_path,'cfg','yolov3.cfg'), os.path.join(destination_path,'yolo-obj.cfg'))


'/home/jorge/projects/training/yolo_faces/yolo-obj.cfg'

In [None]:
# Configure yolo-obj.cfg
# Based on https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects

#change line batch to batch=64
#change line subdivisions to subdivisions=8
#change line max_batches to (classes*2000), f.e. max_batches=6000 if you train for 3 classes
#change line steps to 80% and 90% of max_batches, f.e. steps=4800,5400
#change line classes=80 to your number of objects in each of 3 [yolo]-layers: LINES: 610, 696, 783
#change [filters=255] to filters=(classes + 5)x3 in the 3 [convolutional] before each [yolo] layer. Lines: 603, 689, 776

#So if classes=1 then should be filters=18. If classes=2 then write filters=21.

#Generally filters depends on the classes, coords and number of masks, 
#  i.e. filters=(classes + coords + 1)*<number of mask>, where mask is indices of anchors.
#  If mask is absence, then filters=(classes + coords + 1)*num

In [None]:
# Train the tiny model
# cd darknet
# ./darknet detector train ../yolo_faces_tiny/voc.data ../yolo_faces_tiny/yolov3-tiny-obj.cfg yolov3-tiny.conv.15 -dont_show



In [None]:
# Train the full yolo model
# cd darknet
# ./darknet detector train ../yolo_faces/voc.data ../yolo_faces/yolo-obj.cfg darknet53.conv.74
