In [None]:
# Download the data
!wget https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
!wget https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
!tar --gunzip --extract --verbose --file=annotations.tar.gz
!tar --gunzip --extract --verbose --file=images.tar.gz

In [16]:
# Preprocess data into csv files 
from preprocess_data import main
main()

class Dog: 2498 images
class Cat: 1188 images
3686/3686
Done!


In [17]:
# Import Libraries and Define model

from tensorflow.keras import Model
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback
from tensorflow.keras.layers import Conv2D, Reshape, Dense, GlobalAveragePooling2D
from tensorflow.keras.utils import Sequence
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.backend import epsilon

# 0.35, 0.5, 0.75, 1.0
ALPHA = 0.75

# 96, 128, 160, 192, 224
IMAGE_SIZE = 96

CLASSES = 2

def create_model(trainable=False):
    model = MobileNetV2(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), include_top=False, alpha=ALPHA)

    # to freeze layers
    for layer in model.layers:
        layer.trainable = trainable

    out = model.layers[-1].output

    x = Conv2D(4, kernel_size=3)(out)
    x = Reshape((4,), name="coords")(x)

    y = GlobalAveragePooling2D()(out)
    y = Dense(CLASSES, name="classes", activation="softmax")(y)

    return Model(inputs=model.input, outputs=[x, y])

In [19]:
# Define loss functions for object detection  
import math
import numpy as np
import tensorflow as tf

def log_mse(y_true, y_pred):
  return tf.reduce_mean(tf.math.log1p(tf.math.squared_difference(y_pred, y_true)), axis=-1)

def focal_loss(alpha=0.9, gamma=2):
  def focal_loss_with_logits(logits, targets, alpha, gamma, y_pred):
    weight_a = alpha * (1 - y_pred) ** gamma * targets
    weight_b = (1 - alpha) * y_pred ** gamma * (1 - targets)
    
    return (tf.math.log1p(tf.exp(-tf.abs(logits))) + tf.nn.relu(-logits)) * (weight_a + weight_b) + logits * weight_b

  def loss(y_true, y_pred):
    y_pred = tf.clip_by_value(y_pred, tf.keras.backend.epsilon(), 1 - tf.keras.backend.epsilon())
    logits = tf.math.log(y_pred / (1 - y_pred))

    loss = focal_loss_with_logits(logits=logits, targets=y_true, alpha=alpha, gamma=gamma, y_pred=y_pred)

    return tf.reduce_mean(loss)

  return loss

In [None]:
# Model Summary
model = create_model()
model.summary()

In [20]:
# Data Generator and Validations
from data_generator import DataGenerator, Validation

TRAIN_CSV = "train.csv"
VALIDATION_CSV = "validation.csv"

train_datagen = DataGenerator(TRAIN_CSV)
validation_datagen = Validation(generator=DataGenerator(VALIDATION_CSV))

In [None]:
# Launch model training

EPOCHS = 70
BATCH_SIZE = 32
PATIENCE = 15

MULTI_PROCESSING = False
THREADS = 1

optimizer = Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(loss={"coords" : log_mse, "classes" : focal_loss()}, loss_weights={"coords" : 1, "classes" : 1}, optimizer=optimizer, metrics=[])
checkpoint = ModelCheckpoint("model-iou.h5", monitor="val_iou", verbose=1, save_best_only=False,
                                 save_weights_only=True, mode="max")
stopEarly = EarlyStopping(monitor="val_iou", patience=PATIENCE, mode="max")
reduce_lr = ReduceLROnPlateau(monitor="val_iou", factor=0.2, patience=10, min_lr=1e-7, verbose=1, mode="max")

model.fit(train_datagen,
          epochs=EPOCHS,
          callbacks=[validation_datagen, checkpoint, reduce_lr, stopEarly],
          workers=THREADS,
          use_multiprocessing=MULTI_PROCESSING,
          shuffle=True,
          verbose=1)


In [None]:
#Test Model

import glob
import cv2
import numpy as np
from google.colab.patches import cv2_imshow

WEIGHTS_FILE = "model-iou.h5"
IMAGES = "images/*jpg"

model = create_model()
model.load_weights(WEIGHTS_FILE)

class_names = {0:'Dog', 1:'Cat'}

for filename in glob.glob(IMAGES):
  unscaled = cv2.imread(filename)
  image_height, image_width, _ = unscaled.shape

  image = cv2.resize(unscaled, (IMAGE_SIZE, IMAGE_SIZE))
  feat_scaled = preprocess_input(np.array(image, dtype=np.float32))

  region, class_id = model.predict(x=np.array([image]))
  region = region[0]

  x0 = int(region[0] * image_width / IMAGE_SIZE)
  y0 = int(region[1]  * image_height / IMAGE_SIZE)

  x1 = int((region[0] + region[2]) * image_width / IMAGE_SIZE)
  y1 = int((region[1] + region[3]) * image_height / IMAGE_SIZE)

  class_id = np.argmax(class_id, axis=1)[0]

  cv2.rectangle(unscaled, (x0, y0), (x1, y1), (0, 0, 255), 1)
  cv2.putText(unscaled, "class: {}".format(class_names[class_id]), (x0, y0), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
  cv2_imshow(unscaled)
  cv2.waitKey(0)
  cv2.destroyAllWindows()