# Детектор номерных знаков

In [14]:
import os
import glob

import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf
import keras.callbacks
from keras import backend as K
from keras.layers import Input, Lambda, Conv2D
from keras.models import load_model, Model
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping

from yad2k.models.keras_yolo import (preprocess_true_boxes, yolo_body,
                                     yolo_eval, yolo_head, yolo_loss)
from yad2k.utils.draw_boxes import draw_boxes
# import voc_utils as data_parser
import random as rd
import cv2

In [4]:
%matplotlib inline

Функция получения классов. В нашем случае будет всего один класс - Автомобильные номера

In [5]:
def get_classes(classes_path):
    '''loads the classes'''
    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names

Функция загрузки Якорных ящиков (Anchors boxes)

In [6]:
def get_anchors(anchors_path):
    '''loads the anchors from a file'''
    with open(anchors_path) as f:
        anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape(-1, 2)


Получение имени файла без папок и расширения (т.е. его id)

In [7]:
def get_ids(voc_path):
    """Get image identifiers for corresponding list of dataset identifies.
    Parameters
    ----------
    voc_path : str
        Path to VOCdevkit directory.
    datasets : list of str tuples
        List of dataset identifiers in the form of (year, dataset) pairs.
    Returns
    -------
    ids : list of str
        List of all image identifiers for given datasets.
    """
    ids = []
    files_images = glob.iglob(os.path.join(voc_path, "*.JPEG"))
    for x in files_images:
        name = os.path.splitext(os.path.basename(x))[0]
        ids.append(name)
#     print("names: ", ids)
    return ids

Функция получения координат боксов из файла

In [8]:
def get_boxes_for_id(voc_path, image_id):
    """Get object bounding boxes annotations for given image.
    Parameters
    ----------
    voc_path : str
        Path to VOCdevkit directory.
    image_id : str
        Pascal VOC identifier for given image.
    Returns
    -------
    boxes : array of int
        bounding box annotations of class label, xcenter, ycenter, box_width, box_height, image_width, image_height
        7xN array.
    """
    fname = os.path.join(voc_path, 'obj/{}.txt'.format(image_id))

    boxes = []
    with open(fname) as txt_file:
        for line in txt_file.readlines():
            if(len(line) < 1):
                continue
            try:
                elems = line.split(' ')
                bbox = [
                    float(elems[1]),
                    float(elems[2]),
                    float(elems[3]),
                    float(elems[4]),
                    int(elems[0]),
                ]
                boxes.append(bbox)
            except:
                "problem with get boxes from file: " + str(fname)    
           

    return np.array(boxes)

Функция определения масок

In [9]:
def get_detector_mask(boxes, anchors):
    '''
    Precompute detectors_mask and matching_true_boxes for training.
    Detectors mask is 1 for each spatial position in the final conv layer and
    anchor that should be active for the given boxes and 0 otherwise.
    Matching true boxes gives the regression targets for the ground truth box
    that caused a detector to be active or 0 otherwise.
    '''
    detectors_mask = [0 for i in range(len(boxes))]
    matching_true_boxes = [0 for i in range(len(boxes))]
    for i, box in enumerate(boxes):
        detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [416, 416])

    return np.array(detectors_mask), np.array(matching_true_boxes)

Загрузчик о аугментатор изображений и координат расположений

In [10]:
class TextImageGenerator(keras.callbacks.Callback):
    
    def __init__(self, images_dir, samples, batch_size, img_w, img_h, anchors, classes):
        self.images_dir = images_dir
        self.img_h = img_h
        self.img_w = img_w
        self.samples = samples
        self.batch_size = batch_size
        self.anchors = anchors
        self.classes = classes
        
        self.n = len(self.samples)
        self.indexes = list(range(self.n))
        rd.shuffle(self.indexes)
        self.cur_index = 0
        
    def build_data(self, image_id):
        image_path = os.path.join(self.images_dir, 'obj/{}.JPEG'.format(image_id))
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        boxes = data_parser.get_boxes_for_id(self.images_dir, image_id)
        img_w = img.shape[1]
        img_h = img.shape[0]

        img = cv2.resize(img, (self.img_w, self.img_h))
        img = img.astype(np.float32)
        img /= 255
        img = img.T.transpose(1,2,0)

        for i, box in enumerate(boxes):
            center_x_orig = box[0]   
            center_y_orig = box[1]   
            w_orig = box[2]   
            h_orig = box[3]   

            new_center_x_orig = center_x_orig
            new_center_y_orig = center_y_orig
            new_w_orig = w_orig * (self.img_w / img_w )
            new_h_orig = h_orig * (self.img_h / img_h )

            box[0] = new_center_x_orig
            box[1] = new_center_y_orig
            box[2] = new_w_orig
            box[3] = new_h_orig

       
        return img, boxes
        
    def get_output_size(self):
        return len(alphabet) + 1
    
    def next_sample(self):
        self.cur_index += 1
        if self.cur_index >= self.n:
            self.cur_index = 0
            rd.shuffle(self.indexes)
            
        return build_data(self.samples[self.indexes[self.cur_index]])
    
    def next_batch(self):
        while True:
            images = [] 
            boxes_list = []
            max_boxes = 0
            for i in range(self.batch_size):
                img, boxes = self.next_sample()
                
                images.append(img)
                boxes = boxes.reshape((-1, 5))
                boxes_list.append(boxes)
            
                if boxes.shape[0] > max_boxes:
                    max_boxes = boxes.shape[0]

                # add zero pad for training
            for i, boxes in enumerate(boxes_list):
                if boxes.shape[0]  < max_boxes:
                    zero_padding = np.zeros( (max_boxes-boxes.shape[0], 5), dtype=np.float32)
                    boxes_list[i] = np.vstack((boxes, zero_padding))
            
            image_data, boxes_list = np.array(images), np.array(boxes_list) 
            detectors_mask, matching_true_boxes = get_detector_mask(boxes_list, self.anchors)
            
            inputs = [image_data, boxes_list, detectors_mask, matching_true_boxes]
            outputs = {'yolo_loss': np.zeros([self.batch_size])}
            yield (inputs, outputs)
            

Функция создающая нашу модель

In [11]:
def create_model_tiny(anchors, class_names, load_pretrained=True, freeze_body=True):
    '''
    returns the body of the model and the model
    # Params:
    load_pretrained: whether or not to load the pretrained model or initialize all weights
    freeze_body: whether or not to freeze all weights except for the last layer's
    # Returns:
    model_body: YOLO with new output layer
    model: YOLO with custom loss Lambda layer

    '''

    detectors_mask_shape = (13, 13, 5, 1)
    matching_boxes_shape = (13, 13, 5, 5)

    # Create model input layers.
    image_input = Input(shape=(416, 416, 3), name="image_input")
    boxes_input = Input(shape=(None, 5), name="boxes_input")
    detectors_mask_input = Input(shape=detectors_mask_shape, name="detectors_mask_input")
    matching_boxes_input = Input(shape=matching_boxes_shape, name="matching_boxes_input")

    # Create model body.
    yolo_model = load_model(os.path.join('model_data', 'tiny_yolo.h5'))
    topless_yolo = Model(yolo_model.inputs , outputs=yolo_model.layers[-2].output)

    if load_pretrained:
        # Save topless yolo:
        topless_yolo_path = os.path.join('model_data', 'tiny_yolo_topless.h5')
        if not os.path.exists(topless_yolo_path):
            print("CREATING TOPLESS WEIGHTS FILE")
            yolo_path = os.path.join('model_data', 'tiny_yolo.h5')
            model_body = load_model(yolo_path)
            model_body = Model(model_body.inputs, outputs=model_body.layers[-2].output)
            model_body.save_weights(topless_yolo_path, overwrite=True)
        topless_yolo.load_weights(topless_yolo_path)

    if freeze_body:
        for layer in topless_yolo.layers:
            layer.trainable = False
    final_layer = Conv2D(len(anchors)*(5+len(class_names)), (1, 1), activation='linear', name="final_conv2d_layer")(topless_yolo.output)

    model_body = Model(yolo_model.inputs, final_layer)

    # Place model loss on CPU to reduce GPU memory usage.
    with tf.device('/cpu:0'):
        # TODO: Replace Lambda with custom Keras layer for loss.
        model_loss = Lambda(
            yolo_loss,
            output_shape=(1, ),
            name='yolo_loss',
            arguments={'anchors': anchors,
                       'num_classes': len(class_names)})([
                           model_body.output, boxes_input,
                           detectors_mask_input, matching_boxes_input
                       ])

    model = Model(
        [model_body.input, boxes_input, detectors_mask_input,
         matching_boxes_input], model_loss)

    return model_body, model

Функция тренировки нашей модели

In [12]:
def train(model_name, model, class_names, anchors, images_dir, images_ids, val_split=0.2):
    '''
    retrain/fine-tune the model
    logs training with tensorboard
    saves training weights in current directory
    best weights according to val_loss is saved as trained_stage_3_best.h5
    
     image_data, boxes, detectors_mask, matching_true_boxes
    '''
    
    model.compile(
        optimizer='adam', loss={
            'yolo_loss': lambda y_true, y_pred: y_pred
        })  # This is a hack to use the custom loss function in the last layer.

    minibatch_size = 32
    val_words = int(((len(images_ids)) * val_split) - \
                    (((len(images_ids)) * val_split) % minibatch_size))
    print("len train",len(images_ids))
    print("len val",val_words)
    
    logging = TensorBoard()
    
    img_gen = TextImageGenerator(images_dir, images_ids, minibatch_size, 416, 416, anchors, class_names)
    

    checkpoint = ModelCheckpoint("model_data/{}_trained_stage_1.h5".format(model_name), monitor='val_loss',
                                 save_weights_only=True, save_best_only=True)
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=15, verbose=1, mode='auto')

    
    model.fit_generator(generator=img_gen.next_batch(),
                    steps_per_epoch=img_gen.n // minibatch_size,
                    epochs=5,
                    validation_data=img_gen.next_batch(),
                    validation_steps=val_words // minibatch_size,
                    callbacks=[logging, img_gen])
    
    
    model.save_weights('model_data/{}_trained_stage_1.h5'.format(model_name))

    model_body, model = create_model_tiny(anchors, class_names, load_pretrained=False, freeze_body=False)

#     model.load_weights('model_data/{}_trained_stage_1.h5'.format(model_name))

    model.compile(
        optimizer='adam', loss={
            'yolo_loss': lambda y_true, y_pred: y_pred
        })  # This is a hack to use the custom loss function in the last layer.

    model.fit_generator(generator=img_gen.next_batch(),
                    steps_per_epoch=img_gen.n // minibatch_size,
                    epochs=30,
                    validation_data=img_gen.next_batch(),
                    validation_steps=val_words // minibatch_size,
                    callbacks=[logging, img_gen])
#     model.fit([image_data, boxes, detectors_mask, matching_true_boxes],
#               np.zeros(len(image_data)),
#               validation_split=0.1,
#               batch_size=64,
#               epochs=30,
#               callbacks=[logging])

#     model.save_weights('model_data/{}_trained_stage_2.h5'.format(model_name))
    model.fit_generator(generator=img_gen.next_batch(),
                    steps_per_epoch=img_gen.n // minibatch_size,
                    epochs=30,
                    validation_data=img_gen.next_batch(),
                    validation_steps=val_words // minibatch_size,
                    callbacks=[logging, img_gen, checkpoint, early_stopping])
    
#     model.fit([image_data, boxes, detectors_mask, matching_true_boxes],
#               np.zeros(len(image_data)),
#               validation_split=0.1,
#               batch_size=64,
#               epochs=30,
#               callbacks=[logging, checkpoint, early_stopping])

    model.save_weights('model_data/{}_trained_stage_3.h5'.format(model_name))

Определим параметры и загрузим изображения

In [15]:
model_name = "tiny"
voc_path = os.path.expanduser("in/data/")

anchors = get_anchors(os.path.join('in', 'yolo_anchors.txt'))
class_names = get_classes(os.path.join(voc_path, "obj.names"))
images_ids = get_ids(os.path.join(voc_path, "obj/"))

In [None]:
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.3
set_session(tf.Session(config=config))

In [None]:
model_body, model = create_model_tiny(anchors, class_names)
#model_name, model, class_names, anchors, images_dir, images_ids, val_split=0.2
train(
    model_name,
    model,
    class_names,
    anchors,
    voc_path,
    images_ids
)

## Тестируем что получилось

Функция отрисовки результата

In [None]:
model_body, model = create_model_tiny(anchors, class_names)
model_body.load_weights('model_data/{}_trained_stage_1.h5'.format(model_name))

In [None]:
def draw(model_body, class_names, anchors,images_dir, image_data, save_all=True):
#     '''
#     Draw bounding boxes on image data
#     '''
#     if image_set == 'train':
#         image_data = np.array([np.expand_dims(image, axis=0)
#             for image in image_data[:int(len(image_data)*.9)]])
#     elif image_set == 'val':
#         image_data = np.array([np.expand_dims(image, axis=0)
#             for image in image_data[int(len(image_data)*.9):]])
#     elif image_set == 'all':
#         image_data = np.array([np.expand_dims(image, axis=0)
#             for image in image_data])
#     else:
#         ValueError("draw argument image_set must be 'train', 'val', or 'all'")



    # Create output variables for prediction.
    yolo_outputs = yolo_head(model_body.output, anchors, len(class_names))
    input_image_shape = K.placeholder(shape=(2, ))
    boxes, scores, classes = yolo_eval(
        yolo_outputs, input_image_shape, score_threshold=0.07, iou_threshold=0.0)

    # Run prediction on overfit image.
    sess = K.get_session()  # TODO: Remove dependence on Tensorflow session.

#     if  not os.path.exists(out_path):
#         os.makedirs(out_path)
    for i in range(len(image_data)):
        image_path = os.path.join(images_dir, 'obj/{}.JPEG'.format(image_data[i]))
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (416,416))
        
        out_boxes, out_scores, out_classes = sess.run(
            [boxes, scores, classes],
            feed_dict={
                model_body.input: [img],
                input_image_shape: [img.shape[0], img.shape[1]],
                K.learning_phase(): 0
            })
        print('Found {} boxes for image.'.format(len(out_boxes)))
        print(out_boxes)

        # Plot image with predicted boxes.
        image_with_boxes = draw_boxes(img, out_boxes, out_classes,
                                    class_names, out_scores)
        # Save the image:
#         if save_all or (len(out_boxes) > 0):
#             image = PIL.Image.fromarray(image_with_boxes)
#             image.save(os.path.join(out_path,str(i)+'.png'))

        # To display (pauses the program):
        plt.imshow(image_with_boxes, interpolation='nearest')
        plt.show()

In [None]:
image_path = os.path.join(voc_path, 'obj/{}.JPEG'.format("image_0000"))
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (416,416))
plt.imshow(img)

In [None]:
draw(model_body,
    class_names,
    anchors,
    voc_path,
    images_ids[:1],
    save_all=True)

Сохраним модель с весами

In [None]:
model_body.save(os.path.join('model_data', '{}_yolo_finish.h5'.format(model_name)))

## Функция для выгрузки в Андроид

In [None]:
def save_to_android(input_name, output_name, model_name, weights_name):
    K.set_learning_phase(0)

    model = load_model(os.path.join('model_data', model_name), compile=False)
    model.load_weights(os.path.join('model_data', weights_name))

    sess = K.get_session()

    export_base_path = 'tf-exports'
    if not os.path.exists(export_base_path):
        os.makedirs(export_base_path)
    
    protobuf_path = os.path.join(export_base_path, 'yolo-tiny-v1.pbtxt')
    checkpoint_path = os.path.join(export_base_path, 'yolo-tiny-v1.ckpt')

    tf.train.write_graph(sess.graph_def, '.', protobuf_path)

    saver = tf.train.Saver()
    saver.save(sess, save_path = checkpoint_path)

Сохраним данные о модели в protobuf для последующей выгрузки в Андроид

In [None]:
input_name = 'input_1' 
output_name = 'final_conv2d_layer/BiasAdd' 
save_to_android(input_name, 
                output_name,
                model_name='{}_yolo_finish.h5'.format(model_name), 
                weights_name='{}_trained_stage_3_best.h5'.format(model_name))

## Функция нарезающая номерные знаки, пригодится для подготовки данных для распознания

In [None]:
def crop_and_resize(index, image, boxes, box_classes, croped_output_path):
    image = Image.fromarray(np.floor(image * 255 + 0.5).astype('uint8'))

    for i, c in list(enumerate(box_classes)):
        box = boxes[i]

        top, left, bottom, right = box
        top = max(0, np.floor(top + 0.5).astype('int32'))
        left = max(0, np.floor(left + 0.5).astype('int32'))
        bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
        right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
        print("croped and resized:",os.path.join(croped_output_path, str(index) + '.png'))

        image_crop = image.crop([left + i, top + i, right - i, bottom - i])
        image_resize = image_crop.resize((416,416), resample=Image.BICUBIC)
        image_resize.save(os.path.join(croped_output_path, str(index) + '.png'))
        

In [None]:
def crop_and_save_numbers_licence(model, class_names, anchors, image_data, output_path="output_images", save_all=True, croped_output_path=None):
    '''
    Draw bounding boxes on image data
    '''
    image_data = np.array([np.expand_dims(image, axis=0)
                           for image in image_data])
    print("model.output", model.output)
    # Tensor("yolo_loss/mul_13:0", shape=(), dtype=float32, device=/device:CPU:0)
    # Create output variables for prediction.
    yolo_outputs = yolo_head(model.output, anchors, len(class_names))
    input_image_shape = K.placeholder(shape=(2, ))
    boxes, scores, classes = yolo_eval(
        yolo_outputs, input_image_shape, score_threshold=0.07, iou_threshold=0.0)

    # Run prediction on overfit image.
    sess = K.get_session()  # TODO: Remove dependence on Tensorflow session.

    if not os.path.exists(output_path):
        os.makedirs(output_path)
    for i in range(len(image_data)):
        out_boxes, out_scores, out_classes = sess.run(
            [boxes, scores, classes],
            feed_dict={
                model.input: image_data[i],
                input_image_shape: [image_data.shape[2], image_data.shape[3]],
                K.learning_phase(): 0
            })
        print('Found {} boxes for image.'.format(len(out_boxes)))
        print(out_boxes)
        print("croped_output_path:",croped_output_path)

        # Plot image with predicted boxes.
        image_with_boxes = draw_boxes(image_data[i][0], out_boxes, out_classes,
                                      class_names, out_scores)
        # Save the image:
        if save_all or (len(out_boxes) > 0):
            image = Image.fromarray(image_with_boxes)
            image.save(os.path.join(output_path, str(i) + '.png'))

        if croped_output_path != None :
            crop_and_resize(i, image_data[i][0], out_boxes, out_classes, croped_output_path)
        # To display (pauses the program):
        # plt.imshow(image_with_boxes, interpolation='nearest')
        # plt.show()