In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import os, glob
from natsort import natsorted
from tensorflow.keras.preprocessing.image import load_img
import cv2

## Load data

In [2]:
files = glob.glob('../data/train/*')

In [3]:
print('Number of images: ', len(files))

Number of images:  10000


In [4]:
labels = pd.read_csv('./labels.txt', header = None, names = ['id', 'class', 'x', 'y', 'w', 'h'])

In [5]:
labels

Unnamed: 0,id,class,x,y,w,h
0,0,0,158.293543,163.840423,291.767249,291.767249
1,1,0,174.976577,145.041054,285.589838,285.589838
2,2,0,176.162791,156.819932,268.717581,268.717581
3,3,0,257.477453,201.296374,84.595788,84.595788
4,4,0,71.470291,167.118748,133.755485,133.755485
...,...,...,...,...,...,...
9995,9995,1,163.227646,86.690061,209.817385,11.481389
9996,9996,1,149.334399,243.464501,157.174169,146.648070
9997,9997,1,141.026465,240.124885,5.825795,97.160807
9998,9998,1,63.400575,196.567790,125.709731,119.307785


In [6]:
files = natsorted(files)

Load images and lables into numpy arrays 

In [101]:
X = []
for path in files:
    img = load_img(path)
    img_arr = np.asarray(img)
    X.append(img_arr)
    
X = np.array(X)

In [102]:
Y = labels[['x', 'y', 'w', 'h']]
Y = np.array(Y)

In [103]:
print('Shape of X array: ', X.shape)
print('Shape of Y array: ', Y.shape)
print('Shape of single image: ', X[0].shape)

Shape of X array:  (10000, 320, 320, 3)
Shape of Y array:  (10000, 4)
Shape of single image:  (320, 320, 3)


Normalize input values to 1

In [104]:
#X = X[:,:,:,0]
X = X / X.max()

In [105]:
X.max()

1.0

Custom function to draw bboxes

In [107]:
def draw_bbox(img, true, pred = None):

    xc, yc, w, h = true
    
    start = (int(xc - w / 2), int(yc - h/ 2))
    end = (int(xc + w /2), int(yc + h / 2))
    cv2.rectangle(img, start, end, (0,0,255))
    if type(pred) != None:
        xc, yc, w, h = pred
        start = (int(xc - w / 2), int(yc - h/ 2))
        end = (int(xc + w /2), int(yc + h / 2))
        cv2.rectangle(img, start, end, (0,0,255))
    
    cv2.imshow('bbox', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Training

#### Data splitting 

In [109]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42, shuffle = True)

#### IoU (Intersection over Union) metric 

In [141]:
def iou(true, pred):
    xct, yct, wt, ht = true
    xcp, ycp, wp, hp = pred
    x1 = xct - wt / 2
    y1 = yct - ht / 2
    x2 = x1 + wp
    y2 = y1 + hp
    
    x3 = xcp - wp / 2
    y3 = ycp - hp / 2
    x4 = x3 + wp
    y4 = y3 + hp    
    
    x_inter1 = max(x1, x3)
    y_inter1 = max(y1, y3)
    x_inter2 = min(x2, x4)
    y_inter2 = min(y2, y4)
    
    w_inter = x_inter2 - x_inter1
    h_inter = y_inter2 - y_inter1
    
    area_inter = w_inter * h_inter
    area_union = wt * ht + wp * hp - area_inter
    
    return area_inter / area_union

# model

### define model

In [112]:
def CNN_localization(nFilters, kernel_size, pool_size, nNeurons, nHiddenLayers, inputShape, outputWidth):
    inputs = tf.keras.Input(shape=inputShape)
    x = inputs  
    x = tf.keras.layers.Conv2D(nFilters, (kernel_size, kernel_size), activation='relu', input_shape=inputShape)(x)
    x = tf.keras.layers.MaxPooling2D(pool_size)(x)
    x = tf.keras.layers.Conv2D(nFilters, (kernel_size, kernel_size), activation='relu')(x)
    x = tf.keras.layers.MaxPooling2D(pool_size)(x)
    x = tf.keras.layers.Conv2D(nFilters, (kernel_size, kernel_size), activation='relu')(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(int(nNeurons), activation='relu')(x)
    x = tf.keras.layers.Dense(int(nNeurons / 2), activation='relu')(x)
    x = tf.keras.layers.Dense(int(nNeurons / 4), activation='relu')(x)
    outputs = tf.keras.layers.Dense(outputWidth)(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(loss = tf.keras.losses.mse, optimizer = 'adam')
    return model

### model training v1 

In [113]:
%%time 

batch_size = 64
nFilters = 32
kernel_size = 3
pool_size = (5,5)
nNeurons = 128 
nHiddenLayers = 1 
inputShape = (320, 320, 3)
outputWidth = 4

model = CNN_localization(nFilters, kernel_size, pool_size, nNeurons, nHiddenLayers, inputShape, outputWidth)
model_fit = model.fit(X_train, Y_train, epochs=30, validation_split = 0.15) 

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
CPU times: total: 7h 15min 26s
Wall time: 38min 13s


In [123]:
n = 2500
pred = model.predict(X_test[n:n+1])
draw_bbox(X_test[n], Y_test[n], pred[0])



In [69]:
pred[0]

array([122.31591 , 118.56896 ,  60.966293,  70.17118 ], dtype=float32)

In [119]:
def draw_bbox(img, true, pred = None):

    xc, yc, w, h = true
    
    start = (int(xc - w / 2), int(yc - h/ 2))
    end = (int(xc + w /2), int(yc + h / 2))
    cv2.rectangle(img, start, end, (0,0,255))
    if type(pred) != None:
        xc, yc, w, h = pred
        start = (int(xc - w / 2), int(yc - h/ 2))
        end = (int(xc + w /2), int(yc + h / 2))
        cv2.rectangle(img, start, end, (0,255,0))
    
    cv2.imshow('bbox', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [100]:
draw_bbox(X_test[1200], Y_test[1200])

### model training v2

In [124]:
%%time 

batch_size = 64
nFilters = 32
kernel_size = 5
pool_size = (3,3)
nNeurons = 128 
nHiddenLayers = 1 
inputShape = (320, 320, 3)
outputWidth = 4

model2 = CNN_localization(nFilters, kernel_size, pool_size, nNeurons, nHiddenLayers, inputShape, outputWidth)
model2_fit = model2.fit(X_train, Y_train, epochs=40, validation_split = 0.15) 

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
CPU times: total: 20h 8min 47s
Wall time: 1h 49min 44s


In [142]:
n = 100
pred = model2.predict(X_test[n:n+1])
draw_bbox(X_test[n], Y_test[n], pred[0])



In [150]:
iou_all = []
errors = []
for i in range(Y_test.shape[0]):
    pred = model2.predict(X_test[i:i+1], verbose = 0)
    result = iou(Y_test[i], pred[0])
    if result <= 1:
        iou_all.append(result)
    else:
        errors.append([i, result])

In [156]:
print('Number of IoU < 1: ', len(iou_all))
print('Mean IoU: ', np.mean(iou_all))

Number of IoU < 1:  2894
Mean IoU:  0.6193087811257244
