## Link to Tutorial that I followed
https://towardsdatascience.com/getting-started-with-bounding-box-regression-in-tensorflow-743e22d0ccb3

In [1]:
import tensorflow as tf
import keras
import numpy as np
import cv2
import glob
from PIL import Image , ImageDraw
import os
import xmltodict

Using TensorFlow backend.


In [2]:
input_dim = 228

In [3]:
images = []
image_paths = glob.glob("training_images/*.jpg")

In [4]:
for i in image_paths:
    image = Image.open(i).resize((input_dim, input_dim))
    # Normalize image
    image = np.asarray(image) / 255.0
    images.append(image)

In [6]:
xml_paths = glob.glob("training_images/*.xml")
objects = []
bndboxes = []
for path in xml_paths:
    annotation_dict = xmltodict.parse(open(path, 'rb'))
    objects.append(annotation_dict['annotation']['object']['name'])
    bb = []
    buff = annotation_dict['annotation']['object']['bndbox']
    bb.append(int(buff['xmin']))
    bb.append(int(buff['ymin']))
    bb.append(int(buff['xmax']))
    bb.append(int(buff['ymax']))
    bb = np.asarray(bb) / input_dim
    bndboxes.append(bb)

In [7]:
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

In [8]:
np.unique(objects)

# cucumber 0, eggplant 1, mushroom 2  

array(['cucumber', 'eggplant', 'mushroom'], dtype='<U8')

In [9]:
boxes = np.array(bndboxes)
encoder = LabelBinarizer()
classes_one_hot = encoder.fit_transform(objects)
Y = np.concatenate([boxes, classes_one_hot], axis = 1)
X = np.asarray(images)

In [10]:
x_train, x_test, y_train, y_test = train_test_split(X,Y, train_size = 0.6)

In [11]:
# Creating the keras model

In [12]:
from tensorflow import keras
from tensorflow.keras import backend as K

In [13]:
input_shape = ( input_dim , input_dim , 3 )
dropout_rate = 0.5
alpha = 0.2

def calculate_iou( target_boxes , pred_boxes ):
    xA = K.maximum( target_boxes[ ... , 0], pred_boxes[ ... , 0] )
    yA = K.maximum( target_boxes[ ... , 1], pred_boxes[ ... , 1] )
    xB = K.minimum( target_boxes[ ... , 2], pred_boxes[ ... , 2] )
    yB = K.minimum( target_boxes[ ... , 3], pred_boxes[ ... , 3] )
    interArea = K.maximum( 0.0 , xB - xA ) * K.maximum( 0.0 , yB - yA )
    boxAArea = (target_boxes[ ... , 2] - target_boxes[ ... , 0]) * (target_boxes[ ... , 3] - target_boxes[ ... , 1])
    boxBArea = (pred_boxes[ ... , 2] - pred_boxes[ ... , 0]) * (pred_boxes[ ... , 3] - pred_boxes[ ... , 1])
    iou = interArea / ( boxAArea + boxBArea - interArea )
    return iou

def iou_metric( y_true , y_pred ):
    return calculate_iou( y_true , y_pred)

def custom_loss( y_true , y_pred ):
    mse = tf.losses.mean_squared_error( y_true , y_pred ) 
    iou = calculate_iou( y_true , y_pred ) 
    return mse + (1 - iou)

In [16]:
num_classes = 3
alpha = 0.2
pred_vector_length = 4 + num_classes

model_layers = [
    keras.layers.Conv2D(16, kernel_size = (3,3), strides = 1, input_shape = input_shape),
    keras.layers.LeakyReLU(alpha),
    keras.layers.Conv2D(16, kernel_size = (3,3), strides = 1),
    keras.layers.LeakyReLU(alpha),
    keras.layers.MaxPooling2D(pool_size=(2,2)),
    
    keras.layers.Conv2D(32, kernel_size=(3, 3), strides=1),
    keras.layers.LeakyReLU(alpha=alpha),
    keras.layers.Conv2D(32, kernel_size=(3, 3), strides=1),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.MaxPooling2D( pool_size=( 2 , 2 ) ),
    
    keras.layers.Conv2D(64, kernel_size=(3, 3), strides=1),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Conv2D(64, kernel_size=(3, 3), strides=1),
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.MaxPooling2D( pool_size=( 2 , 2 ) ),
    
    keras.layers.Flatten(),
    
    keras.layers.Dense( 640 ) ,
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Dropout(dropout_rate),
    keras.layers.Dense( 480 ) , 
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Dense( 120 ) , 
    keras.layers.LeakyReLU( alpha=alpha ) ,
    keras.layers.Dense( 62 ) , 
    keras.layers.LeakyReLU( alpha=alpha ) ,
    
    keras.layers.Dense(pred_vector_length),
    keras.layers.LeakyReLU(alpha=alpha) 
]

model = keras.models.Sequential(model_layers)
model.compile(
    optimizer = 'adam',
    loss = custom_loss,
    metrics=[iou_metric]
)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [17]:
model.fit(x_train, y_train, batch_size=20, epochs=10, verbose=1)

Train on 111 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1c625803d0>

In [48]:
test_image = cv2.imread("./test_image.jpg")
test_image = cv2.resize(test_image, (228, 228))[:,:,::-1] / 255.0

In [49]:
im = Image.fromarray(np.uint8(test_image))

y_pred = model.predict(test_image[np.newaxis, : , :, :])
test_pred = y_pred[0]

class_score = test_pred[-3:]
test_bbox = test_pred[:4]
test_bbox = test_bbox * input_dim
max_class = np.argmax(class_score)

test_show = cv2.rectangle(test_image, (test_bbox[0], test_bbox[1]), (test_bbox[2], test_bbox[3]), (0, 255, 0), 1) * 255.0
test_show = Image.fromarray(np.uint8(test_show))
test_show.show()