# 📚 Libraries

In [2]:
import os
import cv2
import imutils
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.models import load_model

# 🖍 Initialize variables

In [3]:
data = []
target = []
file_names = []

# 📖 Read annotation

In [12]:
ann_path = "CALTECH101_STOP_SIGN/stop_sign_annotations_converted.txt"
rows = open(ann_path).read().strip().split("\n")

In [14]:
for idx, row in enumerate(rows):

    # break the row to the file_name
    # and coordinates of bounding box
    row = row.split(",")

    file_name = row[0]
    file_name = file_name.split(".")[0]
    file_name = file_name.split("_")[1]
    file_name = "image_" + file_name + ".jpg"

    coords = row[1]
    coords = coords.split(" ")

    # we have unusual last line,
    # so there will be an if-else

    if (idx != 63):
        coords = coords[1:-1]
    else:
        coords = coords[1:]

    # convert to int
    coords = [int(c) for c in coords]

    # read image
    path = "CALTECH101_STOP_SIGN/stop_sign/"
    full_path = path + file_name
    img = cv2.imread(full_path)
    (h, w) = img.shape[:2]

    # scale the bounding box coordinates 
    # relative to the dimensions of the img
    Xmin = float(coords[0]) / w
    Ymin = float(coords[1]) / h
    Xmax = float(coords[2]) / w
    Ymax = float(coords[3]) / h

    # load the image again with 
    # tensorflow and preprocess it
    img = load_img(full_path, target_size=(224, 224))
    img = img_to_array(img)

    data.append(img)
    target.append((Xmin, Ymin, Xmax, Ymax))
    file_names.append(file_name)

# 📊 Data Preparation

In [15]:
# normalize data, scaling from [0, 255] to [0, 1]
data = np.array(data, dtype="float32") / 255.0

target = np.array(target, dtype="float32")

Let's check the shape of data.

In [16]:
data.shape

(64, 224, 224, 3)

Let's check the shape of targets.

In [17]:
target.shape

(64, 4)

After that we can split dataset to train and test sets (90% to 10% respectively).

In [18]:
split = train_test_split(data, target, file_names, test_size=0.10, random_state=42)

Here we divide split data to different variables.

In [19]:
(train_imgs, test_imgs) = split[:2]
(train_target, test_target) = split[2:4]
(train_filenames, test_filenames) = split[4:]

Also we'll save the names of test images to check them later.

In [20]:
f = open("test_images.txt", "w")
f.write("\n".join(test_filenames))
f.close()

# 🧠 Neural Network
Here we'll use [VGG16](https://www.tensorflow.org/api_docs/python/tf/keras/applications/vgg16/VGG16) from tensorflow. We don't need the top of the net, so we won't include it.

In [21]:
vgg_model = VGG16(weights="imagenet", 
                  include_top=False,
                  input_tensor=Input(shape=(224, 224, 3)))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 1us/step


Also, we'll freeze unnessesary layers.

In [22]:
vgg_model.trainable = False

Here we flatten the max-pooling output of VGG.

In [23]:
flatten = vgg_model.output
flatten = Flatten()(flatten)

Here we add a fully-connected layer header to output.

In [24]:
bbox_head = Dense(128, activation="relu")(flatten)
bbox_head = Dense(64, activation="relu")(bbox_head)
bbox_head = Dense(32, activation="relu")(bbox_head)
bbox_head = Dense(4, activation="sigmoid")(bbox_head)

Here we build the model that we will fine-tune for bounding box regression.

In [25]:
model = Model(inputs=vgg_model.input, outputs=bbox_head)

Let's set some hyperparameters of neural net: *learning rate, epochs, batch size*.

In [26]:
LR = 1e-4
EPOCHS = 70
BATCH_SIZE = 32

Here we set *optimizer* and *compile* our model. The architecture of model you can see below.

In [28]:
from keras.optimizers import Adam

opt = Adam(learning_rate=LR)  # Use 'learning_rate' instead of 'lr'
model.compile(loss="mse", optimizer=opt)
print(model.summary())


None


And let's train the network!

In [29]:

H = model.fit(train_imgs,
              train_target,
              validation_data=(test_imgs, test_target),
              batch_size=BATCH_SIZE,
              epochs=EPOCHS,
              verbose=1)

Epoch 1/70




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5s/step - loss: 0.1720 - val_loss: 0.0797
Epoch 2/70
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4s/step - loss: 0.0720 - val_loss: 0.0384
Epoch 3/70
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4s/step - loss: 0.0411 - val_loss: 0.0178
Epoch 4/70
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4s/step - loss: 0.0224 - val_loss: 0.0108
Epoch 5/70
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4s/step - loss: 0.0155 - val_loss: 0.0083
Epoch 6/70
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4s/step - loss: 0.0143 - val_loss: 0.0073
Epoch 7/70
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4s/step - loss: 0.0121 - val_loss: 0.0067
Epoch 8/70
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4s/step - loss: 0.0118 - val_loss: 0.0062
Epoch 9/70
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4s/step - loss: 0

Here we save the model in order to use it later.

In [37]:
model.save("model_stop_signs_caltech101.keras")
