## Labelling images to Yolo Format (Darknet)

In [31]:
import settings # all params for our configuration
import glob
import os
import xml.etree.ElementTree as et
from sklearn.model_selection import train_test_split
import numpy as np
classes = ["plate"]

def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return x, y, w, h


def convert_annotation(input_file, output_file):
    tree = et.parse(input_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    if (w ==0 or h ==0):
        raise Exception('width and height of the annotation cannot be 0. {}', input_file)

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
             float(xmlbox.find('ymax').text))
        bb = convert((w, h), b)
        output_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')


def generate_darknet_config_files(images_input_dir, annotations_input_dir, labels_output_dir, cfg_output_dir, rel_path_prefix):
    image_file_paths = []
    for image_file_path in glob.glob(images_input_dir + '/*.jpg'):
        image_file_paths.append(rel_path_prefix + image_file_path)
        filename = image_file_path.split('/')[-1].split('.')[0]
        annotation_file_path = '{}/{}.xml'.format(annotations_input_dir, filename)
        if not os.path.exists(annotation_file_path):
            raise Exception('Annotation not found for an image {}'.format(image_file_path))
        label_file_path = '{}/{}.txt'.format(labels_output_dir, filename)

        if not os.path.exists(labels_output_dir):
            os.mkdir(labels_output_dir)

        in_file = open(annotation_file_path)
        out_file = open(label_file_path, 'x')
        convert_annotation(in_file, out_file)
        in_file.close()
        out_file.close()

    print("Splite our data\n")
    X_train, X_test = train_test_split(image_file_paths, test_size=0.05, shuffle=True)
    print("shape of X_train, X_test:", np.shape(X_train), np.shape(X_test) )
    train_file = open(cfg_output_dir + '/train.txt', 'x')
    train_file.writelines("%s\n" % path for path in X_train)
    train_file.close()
    print("save label as txt file")
    test_file = open(cfg_output_dir + '/test.txt', 'x')
    test_file.writelines("%s\n" % path for path in X_test)
    test_file.close()


In [17]:
generate_darknet_config_files(settings.images_input, settings.annotations_input_xml,
                              settings.labels_output, settings.config_output, "../Yolo_LicensePlateDetection/")

Splite our data

shape of X_train, X_test: (428,) (23,)
save label as txt file


## Multiclass : 0--> 9, a,b,d,h,ww,p,j

In [37]:
import glob
import os
import settings
import xml.etree.ElementTree as et
from sklearn.model_selection import train_test_split

# Our classes: 
classes = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
           "a", "b", "d", "h", "p","m", "j","waw", "ww"]

print("Number of classes: ", len(classes))
def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return x, y, w, h

def convert_annotation(input_file, output_file):
    tree = et.parse(input_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    if (w ==0 or h ==0):
        raise Exception('width and height of the annotation cannot be 0. {}', input_file)

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
             float(xmlbox.find('ymax').text))
        bb = convert((w, h), b)
        output_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')


def generate_darknet_config_files(images_input_dir, annotations_input_dir, labels_output_dir, cfg_output_dir, rel_path_prefix):
    image_file_paths = []
    for image_file_path in glob.glob(images_input_dir + '/*.jpg'):
        image_file_paths.append(rel_path_prefix + image_file_path)
        filename = image_file_path.split('/')[-1].split('.')[0]
        annotation_file_path = '{}/{}.xml'.format(annotations_input_dir, filename)
        if not os.path.exists(annotation_file_path):
            raise Exception('Annotation not found for an image {}'.format(image_file_path))
        label_file_path = '{}/{}.txt'.format(labels_output_dir, filename)

        if not os.path.exists(labels_output_dir):
            os.mkdir(labels_output_dir)

        in_file = open(annotation_file_path)
        out_file = open(label_file_path, 'x')
        convert_annotation(in_file, out_file)
        in_file.close()
        out_file.close()

    X_train, X_test = train_test_split(image_file_paths, test_size=0.05, shuffle=True)
    train_file = open(cfg_output_dir + '/train.txt', 'x')
    train_file.writelines("%s\n" % path for path in X_train)
    train_file.close()

    test_file = open(cfg_output_dir + '/test.txt', 'x')
    test_file.writelines("%s\n" % path for path in X_test)
    test_file.close()


Number of classes:  19


In [30]:

generate_darknet_config_files(settings.images_input_character, settings.annotations_input_xml_character,
                              settings.labels_output_character, settings.config_output_character, "../Yolo_Character_Recognize/")

Crop Images

In [29]:
from PIL import Image
import ast
import os
import cv2
import os
import glob
from shutil import copyfile
import xml.etree.ElementTree as ET

original_file = '/home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/image_all/'
path_label='/home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/label_xml/*.xml'
dst = '/home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/image/'

for xml_file in glob.glob(path_label):  
    root = ET.parse(xml_file).getroot()
    filename = root.find('filename').text
    print(filename)
    copyfile(original_file+filename,dst+filename)


67_151.jpg
305_347.jpg
6_101.jpg
12_107.jpg
426_450.jpg
31_121.jpg
8_103.jpg
21_112.jpg
30_120.jpg
62_147.jpg
52_14.jpg
45_133.jpg
68_151.jpg
56_142.jpg
27_118.jpg
16_109.jpg
75_156.jpg
55_142.jpg
57_143.jpg
464_79.jpg
17_11.jpg
54_141.jpg
36_126.jpg
65_15.jpg
20_111.jpg
33_123.jpg
29_12.jpg
43_131.jpg
19_110.jpg
39_128.jpg
79_16.jpg
51_139.jpg
41_13.jpg
24_115.jpg
5_100.jpg
64_149.jpg
32_122.jpg
4_10.jpg
72_153.jpg
0_0.jpg
38_127.jpg
13_108.jpg
7_102.jpg
50_138.jpg
458_73.jpg
71_153.jpg
381_412.jpg
423_448.jpg
69_152.jpg
40_129.jpg
46_134.jpg
380_411.jpg
61_146.jpg
10_105.jpg
25_116.jpg
59_144.jpg
44_132.jpg
77_158.jpg
26_117.jpg
78_159.jpg
457_72.jpg
47_135.jpg
37_127.jpg
48_136.jpg
463_78.jpg
49_137.jpg
18_110.jpg
9_104.jpg
28_119.jpg
35_125.jpg
22_113.jpg
60_145.jpg
70_152.jpg
74_155.jpg
63_148.jpg
14_108.jpg
42_130.jpg
58_143.jpg
15_109.jpg
11_106.jpg
53_140.jpg
66_150.jpg
34_124.jpg
2_1.jpg
23_114.jpg
76_157.jpg


In [34]:
from PIL import Image
import ast
import os
import cv2
import os
import glob
import xml.etree.ElementTree as ET

original_file = '/home/koublal/Downloads/moroccoai-data-challenge-edition-001/train/' #you images directory
dst = '/home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/src/'


def check_folder_exists(path):
        if not os.path.exists(path):
            try:
                os.makedirs(path)
                print ('create ' + path)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise




seed_arr = []
for xml_file in glob.glob('/home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_annotation_xml/*.xml'): #your xml directory 
    root = ET.parse(xml_file).getroot()
    filename = root.find('filename').text

    for type_tag in root.findall('size'):
        #file_name = type_tag.find('filename').text
        width = type_tag.find('width').text
        height = type_tag.find('height').text

    for type_tag in root.findall('object'):
        class_name = type_tag.find('name').text
        xmin = type_tag.find('bndbox/xmin').text
        ymin = type_tag.find('bndbox/ymin').text
        xmax = type_tag.find('bndbox/xmax').text
        ymax = type_tag.find('bndbox/ymax').text
        all_list = [filename, width,height,class_name,xmin, ymin, xmax,ymax]

        seed_arr.append(all_list)
    
seed_arr.sort()
#print(str(len(seed_arr)))
#print(str(seed_arr))


for index, line in enumerate(seed_arr):
    filename = line[0]
    width = line[1]
    height = line[2]
    class_name = line[3]
    xmin = line[4]
    ymin = line[5]
    xmax = line[6]
    ymax = line[7]
    

#print(len(class_name))
    

    
    load_img_path = os.path.join(original_file, filename)
    #save img path

#save img path----------
    save_class_path = os.path.join(dst, class_name)
    check_folder_exists(save_class_path)
    save_img_path = os.path.join(save_class_path, str(index)+'_'+filename)
    
    img = cv2.imread(load_img_path)
    crop_img = img[int(ymin):int(ymax) ,int(xmin):int(xmax)]
    cv2.imwrite(save_img_path,crop_img)
    print('save ' + save_img_path)


save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/src/plate/0_0.jpg
save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/src/plate/1_0.jpg
save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/src/plate/2_1.jpg
save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/src/plate/3_1.jpg
save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/src/plate/4_10.jpg
save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/src/plate/5_100.jpg
save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/src/plate/6_101.jpg
save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/src/plate/7_102.jpg
save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/src/plate/8_103.jpg
save /home

In [35]:
from PIL import Image
import ast
import os
import cv2
import os
import glob
import xml.etree.ElementTree as ET

original_file = '/home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/image/' #you images directory
dst = '/home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/train_charcter_detection/classes/'


def check_folder_exists(path):
        if not os.path.exists(path):
            try:
                os.makedirs(path)
                print ('create ' + path)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise




seed_arr = []
for xml_file in glob.glob('/home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/label_xml/*.xml'): #your xml directory 
    root = ET.parse(xml_file).getroot()
    filename = root.find('filename').text

    for type_tag in root.findall('size'):
        #file_name = type_tag.find('filename').text
        width = type_tag.find('width').text
        height = type_tag.find('height').text

    for type_tag in root.findall('object'):
        class_name = type_tag.find('name').text
        xmin = type_tag.find('bndbox/xmin').text
        ymin = type_tag.find('bndbox/ymin').text
        xmax = type_tag.find('bndbox/xmax').text
        ymax = type_tag.find('bndbox/ymax').text
        all_list = [filename, width,height,class_name,xmin, ymin, xmax,ymax]

        seed_arr.append(all_list)
    
seed_arr.sort()
#print(str(len(seed_arr)))
#print(str(seed_arr))


for index, line in enumerate(seed_arr):
    filename = line[0]
    width = line[1]
    height = line[2]
    class_name = line[3]
    xmin = line[4]
    ymin = line[5]
    xmax = line[6]
    ymax = line[7]
    

#print(len(class_name))
    

    
    load_img_path = os.path.join(original_file, filename)
    #save img path

#save img path----------
    save_class_path = os.path.join(dst, class_name)
    check_folder_exists(save_class_path)
    save_img_path = os.path.join(save_class_path, str(index)+'_'+filename)
    
    img = cv2.imread(load_img_path)
    crop_img = img[int(ymin):int(ymax) ,int(xmin):int(xmax)]
    cv2.imwrite(save_img_path,crop_img)
    print('save ' + save_img_path)


create /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/train_charcter_detection/classes/0
save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/train_charcter_detection/classes/0/0_0_0.jpg
create /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/train_charcter_detection/classes/1
save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/train_charcter_detection/classes/1/1_0_0.jpg
create /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/train_charcter_detection/classes/5
save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/train_charcter_detection/classes/5/2_0_0.jpg
create /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter_detection/train_charcter_detection/classes/6
save /home/koublal/Downloads/moroccoai-data-challenge-edition-001/train_charcter