# Advanced Computer Vision Topics

## Session4: Object Detection (3 hours)

In [None]:
import os
import warnings
import cv2

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.applications.mobilenet import (
    MobileNet, preprocess_input,
)
from tensorflow.keras.preprocessing.image import (
    img_to_array, load_img,
)
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Reshape
from tensorflow.keras.callbacks import EarlyStopping

from plot_layers import (
    plot_layer_outputs, imshow, apply_patch,
    IoU_metric,
)
warnings.simplefilter("ignore")
%matplotlib inline

In [None]:
LOCAL_DATA_PATH = os.path.join(os.path.abspath(''), 'data', 'images', 'localization', 'pets')
# Ref: https://www.robots.ox.ac.uk/~vgg/data/pets/

In [None]:
TARGET_SIZE = (128, 128)
# image name and the object bounding box informations
TRAIN_DATA_DF = pd.read_csv(os.path.join(LOCAL_DATA_PATH, 'train.csv'), header=None)
TEST_DATA_DF = pd.read_csv(os.path.join(LOCAL_DATA_PATH, 'test.csv'), header=None)

In [None]:
HEADER = ['ImagePath', 'Height', 'Width', 'X0', 'Y0', 'X1', 'Y1', 'Breed', 'BreedCategory']
TRAIN_DATA_DF.columns = HEADER
TEST_DATA_DF.columns = HEADER

In [None]:
TRAIN_DATA_DF.head()

In [None]:
TEST_DATA_DF.head()

In [None]:
def compute_y(df_inp):
    global TARGET_SIZE
    df_out = pd.DataFrame()
    # compute the y data for the target size
    df_out['X0'] = (df_inp['X0'] /  df_inp['Width']) * TARGET_SIZE[0]
    df_out['Y0'] = (df_inp['Y0'] /  df_inp['Height']) * TARGET_SIZE[1]

    df_out['Width'] = ((df_inp['X1'] - df_inp['X0']) / df_inp['Width']) * TARGET_SIZE[0]
    df_out['Height'] = ((df_inp['Y1'] - df_inp['Y0']) / df_inp['Height']) * TARGET_SIZE[1]

    return df_out

In [None]:
# # compute the y train data for the target size
# Y_TRAIN_DF['X0'] = (TRAIN_DATA_DF['X0'] /  TRAIN_DATA_DF['Width']) * TARGET_SIZE[0]
# Y_TRAIN_DF['Y0'] = (TRAIN_DATA_DF['X0'] /  TRAIN_DATA_DF['Height']) * TARGET_SIZE[1]

# Y_TRAIN_DF['Width'] = ((TRAIN_DATA_DF['X1'] - TRAIN_DATA_DF['X0']) / TRAIN_DATA_DF['Width']) * TARGET_SIZE[0]
# Y_TRAIN_DF['Height'] = ((TRAIN_DATA_DF['Y1'] - TRAIN_DATA_DF['Y0']) / TRAIN_DATA_DF['Height']) * TARGET_SIZE[1]
Y_TRAIN_DF = compute_y(TRAIN_DATA_DF)
Y_TRAIN_DF.head()

In [None]:
Y_TEST_DF = compute_y(TEST_DATA_DF)
Y_TEST_DF.head()

In [None]:
y_train = Y_TRAIN_DF.to_numpy()
y_test = Y_TEST_DF.to_numpy()

In [None]:
y_train

In [None]:
y_test

In [None]:
# Load x_train and x_test
x_train = TRAIN_DATA_DF['ImagePath'].apply(lambda x: os.path.join(LOCAL_DATA_PATH, *x.split('/'))).to_numpy()
x_test = TEST_DATA_DF['ImagePath'].apply(lambda x: os.path.join(LOCAL_DATA_PATH, *x.split('/'))).to_numpy()

In [None]:
x_train[:5]

In [None]:
x_test[:5]

In [None]:
def has_files(paths):
    count = 0
    for path in paths:
        if os.path.exists(path):
            continue
        count += 1
        
    print(f"{count}/{len(paths)} missing")
has_files(x_train)
has_files(x_test)

In [None]:
SAMPLE_LOCATION = 100
path = x_train[SAMPLE_LOCATION]
img = cv2.imread(x_train[SAMPLE_LOCATION])
imshow(img)

In [None]:
bb_x0 = TRAIN_DATA_DF['X0'][SAMPLE_LOCATION]
bb_y0 = TRAIN_DATA_DF['Y0'][SAMPLE_LOCATION]
bb_x1 = TRAIN_DATA_DF['X1'][SAMPLE_LOCATION]
bb_y1 = TRAIN_DATA_DF['Y1'][SAMPLE_LOCATION]

In [None]:
bb_x0, bb_y0, bb_x1, bb_y1

In [None]:
apply_patch(img, bb_x0, bb_y0, bb_x1, bb_y1)

In [None]:
def preprocess_images_for_model(paths):
    out = []
    for path in paths:
        img = load_img(path, target_size=TARGET_SIZE)
        img_arr = img_to_array(img)
        out.append(preprocess_input(img_arr))
        
    return np.array(out)

In [None]:
x_train = preprocess_images_for_model(x_train)
x_test = preprocess_images_for_model(x_test)

In [None]:
x_train.shape, y_train.shape

In [None]:
x_test.shape, y_test.shape

In [None]:
ALPHA = 1.0 # Width hyper parameter for MobileNet (0.25, 0.5, 0.75, 1.0).

def create_transfer_model_from_mobilenet(alpha, is_trainable=False):
    global TARGET_SIZE
    model = MobileNet(
        input_shape=(*TARGET_SIZE, 3), 
        include_top=False,  # Do not include classification/top layer
        alpha=alpha
    )


    for layer in model.layers:
        layer.trainable = is_trainable # freeze/unfreeze the pretrained layers.
    
    previous_layer = model.layers[-1]

    custom_layer1 = Conv2D(4, kernel_size=4)(previous_layer.output)
    output_layer = Reshape((4,))(custom_layer1)  # output size is 4.

    return Model(inputs=model.input, outputs=output_layer)


In [None]:
model = create_transfer_model_from_mobilenet(alpha=ALPHA, is_trainable=False)

In [None]:
model.summary()

In [None]:
model.compile(
    loss="mean_squared_error", # Regression loss is MSE
    optimizer="adam",
    metrics=['accuracy', IoU_metric]
)

In [None]:
callback = EarlyStopping(monitor='accuracy', patience=5, min_delta=0.01)

In [None]:
# Fit the model
model.fit(
    x_train, y_train,
    validation_data=(x_test, y_test),
    epochs=5, batch_size=32,
    callbacks=[callback]
)

In [None]:
model.evaluate(x_test, y_test)

In [None]:
def test_model(model):
    images = {
        'samoyed_174.jpg',
        'shiba_inu_163.jpg',
        'Abyssinian_14.jpg',
    }
    for image in images:
        image_file = os.path.join(LOCAL_DATA_PATH, 'images', image)
        img = load_img(image_file, target_size=TARGET_SIZE)
        img_array = img_to_array(img)
        preds = model.predict(np.expand_dims(img_array, axis=0))[0]
        img = cv2.imread(image_file)
        height, width, _ = img.shape

        x0 = int(preds[0] * width / TARGET_SIZE[0])
        y0 = int(preds[1] * height / TARGET_SIZE[1])

        x1 = int((preds[0] + preds[2]) * width / TARGET_SIZE[0])
        y1 = int((preds[1] + preds[3]) * height / TARGET_SIZE[1])
        apply_patch(img, x0, y0, x1, y1)

In [None]:
test_model(model)