In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D,Flatten, Dense, Reshape, Concatenate
from tensorflow.keras.activations import sigmoid, softmax
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import keras.backend as K

In [None]:
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar

!tar xvf VOCtrainval_06-Nov-2007.tar
!tar xvf VOCtest_06-Nov-2007.tar

!rm VOCtrainval_06-Nov-2007.tar
!rm VOCtest_06-Nov-2007.tar

!rm -r /content/VOCdevkit/VOC2007/Annotations
!rm -r /content/VOCdevkit/VOC2007/ImageSets
!rm -r /content/VOCdevkit/VOC2007/SegmentationClass/
!rm -r /content/VOCdevkit/VOC2007/SegmentationObject

!gdown --id 1ZU2cim1iw-o3SWtf2XLxYOGJQv6GRC-v
!gdown --id 1HLhuo0Gw3fE_l320AwYYDcUVu9_jDQCq
!gdown --id 10ZYfoqBziUxvS4yBtJtUrxXEYxZFnZMk

In [3]:
def read(image_path, label):
    image = cv2.imread(image_path)
    image_h, image_w = image.shape[:2]
    image = cv2.resize(image,(448,448))
    image = image / 255 # Normalization

    label_matrix = np.zeros((7,7,30))
    for l in label:
        l=l.split(',')
        xmin = int(l[0])
        ymin = int(l[1])
        xmax = int(l[2])
        ymax = int(l[3])
        c = int(l[4])
        x = (xmin + xmax) / 2 / image_w
        y = (ymin + ymax) / 2 / image_h
        w = (xmax - xmin) / image_w
        h = (ymax - ymin) / image_h
        loc = [7*x,7*y]
        loc_i = int(loc[1])
        loc_j = int(loc[0])
        y = loc[1] - loc_i
        x = loc[0] - loc_j

        if label_matrix[loc_i, loc_j,24] == 0:
            label_matrix[loc_i, loc_j, c] = 1
            label_matrix[loc_i, loc_j, 20:22] = 1
            label_matrix[loc_i, loc_j, 22:26] = [x, y, w, h]
            label_matrix[loc_i, loc_j, 26:30] = [x, y, w, h]
            

    return image, label_matrix 

In [4]:
class My_Genrator(keras.utils.Sequence):
    def __init__(self, images, labels, batch_size):
        self.images = images
        self.labels = labels
        self.batch_size = batch_size

    def __len__(self):
        return (np.ceil(len(self.images) / float(self.batch_size))).astype(np.int)
    
    def __getitem__(self, idx):
        batch_x = self.images[idx * self.batch_size : (idx + 1)* self.batch_size]
        batch_y = self.labels[idx * self.batch_size : (idx + 1)* self.batch_size]

        train_image = []
        train_label = []

        for i in range(0,len(batch_x)):
            img_path = batch_x[i]
            label = batch_y[i]
            image, label_matrix = read(img_path, label)
            train_image.append(image)
            train_label.append(label_matrix)
        return np.array(train_image), np.array(train_label)

In [5]:
X_train = list()
Y_train = list()
with open('2007_train.txt') as f:
    for line in f.readlines():
        line_arr = line.strip().split(' ')
        X_train.append(line_arr[0])
        Y_train.append(line_arr[1:])

In [6]:
X_val = list()
Y_val = list()
with open('2007_val.txt') as f:
    for line in f.readlines():
        line_arr = line.strip().split(' ')
        X_val.append(line_arr[0])
        Y_val.append(line_arr[1:])

In [None]:
batch_size = 32
train_generator = My_Genrator(X_train, Y_train, batch_size)
val_generator = My_Genrator(X_val, Y_val, batch_size)
x_train, y_train = train_generator.__getitem__(0)

print(x_train.shape)
print(y_train.shape)

In [8]:
class Yolo_Reshape(tf.keras.layers.Layer):
    def __init__(self, target_shape):
        super(Yolo_Reshape, self).__init__()
        self.target_shape = tuple(target_shape)


    def call(self, input):
        # grids 7x7
        S = [self.target_shape[0], self.target_shape[1]]
        # classes
        C = 20
        # num of bounding boxes per grid
        B = 2

        idx1 = S[0] * S[1] * C
        idx2 = idx1 + S[0] * S[1] * B

        # class probabilities
        class_probs = K.reshape(input[:, :idx1], (K.shape(input)[0],) + tuple([S[0], S[1], C]))
        class_probs = K.softmax(class_probs)

        #confidence
        confs = K.reshape(input[:, idx1:idx2], (K.shape(input)[0],) + tuple([S[0], S[1], B]))
        confs = K.sigmoid(confs)

        # boxes
        boxes = K.reshape(input[:, idx2:], (K.shape(input)[0],) + tuple([S[0], S[1], B * 4]))
        boxes = K.sigmoid(boxes)

        outputs = K.concatenate([class_probs, confs, boxes])
        return outputs

In [None]:
inputs = Input(shape=(448,448,3))

x = Conv2D(64,7,2,padding='same',activation='relu')(inputs)
x = MaxPool2D()(x)

x = Conv2D(192,3,1,padding='same',activation='relu')(x)
x = MaxPool2D()(x)

x = Conv2D(128,1,1,padding='same',activation='relu')(x)
x = Conv2D(256,3,1,padding='same',activation='relu')(x)
x = Conv2D(256,1,1,padding='same',activation='relu')(x)
x = Conv2D(512,3,1,padding='same',activation='relu')(x)
x = MaxPool2D()(x)

x = Conv2D(256,1,1,padding='same',activation='relu')(x)
x = Conv2D(512,3,1,padding='same',activation='relu')(x)
x = Conv2D(256,1,1,padding='same',activation='relu')(x)
x = Conv2D(512,3,1,padding='same',activation='relu')(x)
x = Conv2D(256,1,1,padding='same',activation='relu')(x)
x = Conv2D(512,3,1,padding='same',activation='relu')(x)
x = Conv2D(256,1,1,padding='same',activation='relu')(x)
x = Conv2D(512,3,1,padding='same',activation='relu')(x)
x = Conv2D(512,1,1,padding='same',activation='relu')(x)
x = Conv2D(1024,3,1,padding='same',activation='relu')(x)
x = MaxPool2D()(x)

x = Conv2D(512,1,1,padding='same',activation='relu')(x)
x = Conv2D(1024,3,1,padding='same',activation='relu')(x)
x = Conv2D(512,1,1,padding='same',activation='relu')(x)
x = Conv2D(1024,3,1,padding='same',activation='relu')(x)
x = Conv2D(1024,3,1,padding='same',activation='relu')(x)
x = Conv2D(1024,3,2,padding='same',activation='relu')(x)

x = Conv2D(1024,3,1,activation='relu')(x)
x = Conv2D(1024,3,1,activation='relu')(x)

x = Flatten()(x)

x = Dense(512,activation='relu')(x)
x = Dense(1024,activation='relu')(x)

x = Dense(1470,activation='sigmoid')(x)

outputs = Yolo_Reshape((7,7,30))(x)

model = Model(inputs,outputs)
model.summary()

In [10]:
def yolo_loss(y_true,y_pred):
    true_class = y_true[...,:20]
    true_trust = y_true[...,20:22]
    true_box = y_true[...,22:30]

    pred_class = y_pred[...,:20]
    pred_trust = y_pred[...,20:22]
    pred_box = y_pred[...,22:30]

    mse = tf.keras.losses.MeanSquaredError()
    ce = tf.keras.losses.CategoricalCrossentropy()
    bce = tf.keras.losses.BinaryCrossentropy()

    class_loss = mse(true_class,pred_class)
    trust_loss = ce(true_trust,pred_trust)
    box_loss = bce(true_box,pred_box)

    return class_loss + trust_loss + box_loss


In [11]:
model.compile(optimizer=Adam(0.001),loss=yolo_loss)

In [None]:
history = model.fit(x=train_generator,batch_size=32,epochs=5,validation_data=val_generator)