In [None]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Reshape, LeakyReLU
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Layer
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from matplotlib import pyplot as plt
import cv2
import math
import os
import numpy as np
import tensorflow as tf

classes_name=["human","static","ignore"]


def model_tiny_yolov1(inputs):
    x = Conv2D(4, (3, 3), padding='same', name='convolutional_0', use_bias=False,
               kernel_regularizer=l2(5e-4), trainable=False)(inputs)
    x = BatchNormalization(name='bnconvolutional_0', trainable=False)(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='same')(x)

    x = Conv2D(8, (3, 3), padding='same', name='convolutional_1', use_bias=False,
               kernel_regularizer=l2(5e-4), trainable=False)(x)
    x = BatchNormalization(name='bnconvolutional_1', trainable=False)(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='same')(x)

    x = Conv2D(32, (3, 3), padding='same', name='convolutional_2', use_bias=False,
               kernel_regularizer=l2(5e-4), trainable=False)(x)
    x = BatchNormalization(name='bnconvolutional_2', trainable=False)(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='same')(x)

    x = Conv2D(32, (3, 3), padding='same', name='convolutional_3', use_bias=False,
               kernel_regularizer=l2(5e-4), trainable=False)(x)
    x = BatchNormalization(name='bnconvolutional_3', trainable=False)(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='same')(x)

    x = Conv2D(64, (3, 3), padding='same', name='convolutional_4', use_bias=False,
               kernel_regularizer=l2(5e-4), trainable=False)(x)
    x = BatchNormalization(name='bnconvolutional_4', trainable=False)(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='same')(x)

    x = Conv2D(128, (3, 3), padding='same', name='convolutional_5', use_bias=False,
               kernel_regularizer=l2(5e-4), trainable=False)(x)
    x = BatchNormalization(name='bnconvolutional_5', trainable=False)(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), padding='same')(x)

    x = Conv2D(256, (3, 3), padding='same', name='convolutional_6', use_bias=False,
               kernel_regularizer=l2(5e-4), trainable=False)(x)
    x = BatchNormalization(name='bnconvolutional_6', trainable=False)(x)
    x = LeakyReLU(alpha=0.1)(x)

    x = Conv2D(128, (3, 3), padding='same', name='convolutional_7', use_bias=False,
               kernel_regularizer=l2(5e-4), trainable=False)(x)
    x = BatchNormalization(name='bnconvolutional_7', trainable=False)(x)
    x = LeakyReLU(alpha=0.1)(x)

    x = Flatten()(x)
    x = Dense(128, activation='linear', name='connected_0')(x)
    #x = Reshape((4, 4, 8))(x)
    #outputs = Yolo_Reshape((4, 4, 7))(x)

    return x

In [None]:
def model_tiny_yolov1(inputs):
    x = Conv2D(16, (1, 1))(inputs)
    x = Conv2D(32, (3, 3))(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.3)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(16, (3, 3))(x)
    x = Conv2D(32, (3, 3))(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.3)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    #x = Dropout(0.25)(x)

    x = Flatten()(x)
    #x = Dense(256, activation='sigmoid')(x)
    x = Dense(128, activation='linear')(x)
    x = Reshape((4,4,8))(x)

    return x

In [None]:
input_shape = (1, 128, 128, 3)

inputs = Input(input_shape[1:4])

model = model_tiny_yolov1(inputs)

outputs = model_tiny_yolov1(inputs)
model = Model(inputs=inputs, outputs=outputs)

model.summary()

In [None]:
def label_img(img_path,label_path):
    
    IMAGES = []
    
    LABELS = []
    
    img_list = os.listdir(img_path)
    
    for image_path in img_list:
        
        # Read image
        img = cv2.imread(img_path + "/" + image_path)
        #print(img_path + "/" + img_path)
        #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        #gray_resize = cv2.resize(gray,(128,128))
        #gray_resize = np.expand_dims(gray_resize, axis=2)
        #IMAGES.append(gray_resize)
        #try 3 channel
        img = cv2.resize(img,(128,128))
        IMAGES.append(img)
        
        # Read Labels
        label_pth = label_path + "/" + image_path.split(".")[0]+".txt"
        
        with open(label_pth) as file:
            lines = file.readlines()
            lines = [line.rstrip() for line in lines]
        
        G_T = np.zeros((4,4,8))
        
        for line in lines:
    
            g_t = [0,0,0,0,0,0,0,0]
    
            p=line.split(" ")
    
            g_t[0] = float(p[1])
            g_t[1] = float(p[2])
            g_t[2] = float(p[3])
            g_t[3] = float(p[4])
            g_t[4] = float(1.0)
    
            if p[0]=="0":
                g_t[5] = float(1.0)
            if p[0]=="1":
                g_t[6] = float(1.0)
            if p[0]=="2":
                g_t[7] = float(1.0)
    
            G_T[math.ceil(4*float(p[1]))-1,math.ceil(4*float(p[2]))-1] = g_t
        
        LABELS.append(G_T)
    
    return IMAGES, LABELS

In [None]:
def new_label_img(img_path,label_path):
    
    IMAGES = []
    
    LABELS = []
    
    img_list = os.listdir(img_path)
    
    for image_path in img_list:
        
        # Read image
        img = cv2.imread(img_path + "/" + image_path)
        #print(img_path + "/" + img_path)
        #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        #gray_resize = cv2.resize(gray,(128,128))
        #gray_resize = np.expand_dims(gray_resize, axis=2)
        #IMAGES.append(gray_resize)
        #try 3 channel
        img = cv2.resize(img,(128,128))
        IMAGES.append(img)
        
        # Read Labels
        label_pth = label_path + "/" + image_path.split(".")[0]+".txt"
        
        with open(label_pth) as file:
            lines = file.readlines()
            lines = [line.rstrip() for line in lines]
        
        G_T = np.zeros((4,4,8))
        
        for line in lines:
    
            g_t = [0,0,0,0,0,0,0,0]
    
            p=line.split(" ")
    
            x = float(p[1])
            y = float(p[2])
            
            loc = [4 * x, 4 * y]
            loc_i = int(loc[1])
            loc_j = int(loc[0])
            y = loc[1] - loc_i
            x = loc[0] - loc_j
            
            g_t[3] = x
            g_t[4] = y
            
            g_t[5] = float(p[3])
            g_t[6] = float(p[4])
            g_t[7] = float(1.0)
    
            if p[0]=="0":
                g_t[0] = float(1.0)
            if p[0]=="1":
                g_t[1] = float(1.0)
            if p[0]=="2":
                g_t[2] = float(1.0)
    
            G_T[math.ceil(4*float(p[1]))-1,math.ceil(4*float(p[2]))-1] = g_t
        
        LABELS.append(G_T)
    
    return IMAGES, LABELS

In [None]:
import tensorflow.keras.backend as K
import tensorflow as tf

def xywh2minmax(xy, wh):
    xy_min = xy - wh / 2
    xy_max = xy + wh / 2

    return xy_min, xy_max

def iou(pred_mins, pred_maxes, true_mins, true_maxes):
    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_wh = pred_maxes - pred_mins
    true_wh = true_maxes - true_mins
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    return iou_scores

def my_loss_fn(y_true, y_pred):
    
    ## IOU loss
    
    conf_pred = y_pred[..., 4:5]
    conf_pred = K.sigmoid(conf_pred)
    predict_trust = conf_pred
    #y_pred = tf.cast(y_pred,dtype="float64")
    box_true = y_true[..., :4]
    box_pred = y_pred[..., :4]
    box_pred = K.sigmoid(box_pred)
    
    xy_min_pred, xy_max_pred = xywh2minmax(128*box_pred[..., :2],128*box_pred[..., 2:4])
    xy_min_true, xy_max_true = xywh2minmax(128*box_true[..., :2],128*box_true[..., 2:4])
    
    #################################################################
    
    response_mask = y_true[..., 5]  # ? * 7 * 7
    response_mask = K.expand_dims(response_mask)  # ? * 7 * 7 * 1
    
    iou_scores = iou(xy_min_pred, xy_max_pred, xy_min_true, xy_max_true)
    iou_scores = K.expand_dims(iou_scores)
    iou_scores = K.expand_dims(iou_scores)
    
    best_ious = K.max(iou_scores, axis=4)  # ? * 7 * 7 * 2
    best_box = K.max(best_ious, axis=3, keepdims=True)  # ? * 7 * 7 * 1

    box_mask = K.cast(best_ious >= best_box, K.dtype(best_ious))  # ? * 7 * 7 * 2
    
    no_object_loss = 0.5 * (1 - box_mask * response_mask) * K.square(0 - predict_trust)
    object_loss = box_mask * response_mask * K.square(1 - predict_trust)
    confidence_loss = no_object_loss + object_loss
    confidence_loss = K.sum(confidence_loss)

    class_true = y_true[..., 5:8]
    class_pred = y_pred[..., 5:8]
    class_pred = K.softmax(class_pred)
    
    class_loss = response_mask * K.square(class_true - class_pred)
    class_loss = K.sum(class_loss)
    
    
    box_mask = K.expand_dims(box_mask)
    response_mask = K.expand_dims(response_mask)
    
    label_xy = 128*box_true[..., :2]
    label_wh = 128*box_true[..., 2:4]
    
    predict_xy = 128*box_pred[..., :2]
    predict_wh = 128*box_pred[..., 2:4]

    box_loss = 5 * box_mask * response_mask * K.square((label_xy - predict_xy) / 128)
    box_loss += 5 * box_mask * response_mask * K.square((K.sqrt(label_wh) - K.sqrt(predict_wh)) / 128)
    box_loss = K.sum(box_loss)
    
    loss = box_loss + confidence_loss + class_loss
    
    return loss

In [None]:
LABELS[0][..., :2]

In [None]:
import tensorflow.keras.backend as K
import tensorflow as tf

def xywh2minmax(xy, wh):
    xy_min = xy - wh / 2
    xy_max = xy + wh / 2

    return xy_min, xy_max

def iou(pred_mins, pred_maxes, true_mins, true_maxes):
    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_wh = pred_maxes - pred_mins
    true_wh = true_maxes - true_mins
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    return iou_scores

def my_loss_fn(y_true, y_pred):
    
    ## IOU loss
    
    conf_pred = y_pred[..., 7:8]
    conf_pred = K.sigmoid(conf_pred)
    predict_trust = conf_pred
    #y_pred = tf.cast(y_pred,dtype="float64")
    box_true = y_true[..., 3:7]
    box_pred = y_pred[..., 3:7]
    box_pred = K.sigmoid(box_pred)
    
    xy_min_pred, xy_max_pred = xywh2minmax(128*box_pred[..., :2],128*box_pred[..., 2:4])
    xy_min_true, xy_max_true = xywh2minmax(128*box_true[..., :2],128*box_true[..., 2:4])
    print(xy_min_pred.shape)
    print(xy_min_true)
    #################################################################
    
    response_mask = y_true[..., 7]  # ? * 7 * 7
    response_mask = K.expand_dims(response_mask)  # ? * 7 * 7 * 1
    
    iou_scores = iou(xy_min_pred, xy_max_pred, xy_min_true, xy_max_true)
    iou_scores = K.expand_dims(iou_scores)
    iou_scores = K.expand_dims(iou_scores)
    
    best_ious = K.max(iou_scores, axis=4)  # ? * 7 * 7 * 2
    best_box = K.max(best_ious, axis=3, keepdims=True)  # ? * 7 * 7 * 1

    box_mask = K.cast(best_ious >= best_box, K.dtype(best_ious))  # ? * 7 * 7 * 2
    
    no_object_loss = 0.5 * (1 - box_mask * response_mask) * K.square(0 - predict_trust)
    object_loss = box_mask * response_mask * K.square(1 - predict_trust)
    confidence_loss = no_object_loss + object_loss
    confidence_loss = K.sum(confidence_loss)

    class_true = y_true[..., :3]
    class_pred = y_pred[..., :3]
    class_pred = K.softmax(class_pred)
    
    class_loss = response_mask * K.square(class_true - class_pred)
    class_loss = K.sum(class_loss)
    
    
    box_mask = K.expand_dims(box_mask)
    response_mask = K.expand_dims(response_mask)
    
    label_xy = 128*box_true[..., 3:5]
    label_wh = 128*box_true[..., 5:7]
    
    predict_xy = 128*box_pred[..., 3:5]
    predict_wh = 128*box_pred[..., 5:7]

    box_loss = 5 * box_mask * response_mask * K.square((label_xy - predict_xy) / 128)
    box_loss += 5 * box_mask * response_mask * K.square((K.sqrt(label_wh) - K.sqrt(predict_wh)) / 128)
    box_loss = K.sum(box_loss)
    
    loss = box_loss + confidence_loss + class_loss
    
    return loss

In [None]:
LABELS[0][..., 7:8]

In [None]:
LABELS[0][...,7]

In [None]:
IMAGES, LABELS = label_img("images","labels")

In [None]:
LABELS[0][0][

In [None]:
label_matrix = np.zeros([4, 4, 8])
#for l in label:
  
#xmin = 156
#ymin = 97
#xmax = 351
#ymax = 270
#cls = 6
#x = (xmin + xmax) / 2 / 500
#y = (ymin + ymax) / 2 /333
#w = (xmax - xmin) / 500
#h = (ymax - ymin) / 333
#print(x,y)
#print(w,h)
loc = [4 * x, 4 * y]
loc_i = int(loc[1])
loc_j = int(loc[0])
y = loc[1] - loc_i
x = loc[0] - loc_j
print(x,y)
print(w,h)

if label_matrix[loc_i, loc_j, 7] == 0:
   label_matrix[loc_i, loc_j, cls] = 1
   label_matrix[loc_i, loc_j, 3:7] = [x, y, w, h]
   label_matrix[loc_i, loc_j, 7] = 1  # response

In [None]:
def original_img(img_path,label_path):
    
    IMAGES = []
    
    LABELS = []
    
    img_list = os.listdir(img_path)
    
    for image_path in img_list:
        
        # Read image
        img = cv2.imread(img_path + "/" + image_path)
        #print(img_path + "/" + img_path)
        #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        #gray_resize = cv2.resize(gray,(128,128))
        #gray_resize = np.expand_dims(gray_resize, axis=2)
        #IMAGES.append(gray_resize)
        #try 3 channel
        img = cv2.resize(img,(128,128))
        IMAGES.append(img)
        
        # Read Labels
        label_pth = label_path + "/" + image_path.split(".")[0]+".txt"
        
        with open(label_pth) as file:
            lines = file.readlines()
            lines = [line.rstrip() for line in lines]
        
        label_matrix = np.zeros([4, 4, 8])
        
        for line in lines:
    
            #g_t = [0,0,0,0,0,0,0,0]
    
            p=line.split(" ")
    
            x = float(p[1])
            y = float(p[2])
            w = float(p[3])
            h = float(p[4])
            #g_t[4] = float(1.0)
    
            if p[0]=="0":
                cls = 0
            if p[0]=="1":
                cls = 1
            if p[0]=="2":
                cls = 2
            loc = [4 * x, 4 * y]
            loc_i = int(loc[1])
            loc_j = int(loc[0])
            y = loc[1] - loc_i
            x = loc[0] - loc_j
            #print(x,y)
            #print(w,h)

            if label_matrix[loc_i, loc_j, 7] == 0:
               label_matrix[loc_i, loc_j, cls] = 1
               label_matrix[loc_i, loc_j, 3:7] = [x, y, w, h]
               label_matrix[loc_i, loc_j, 7] = 1  # response
                
            
    
            #G_T[math.ceil(4*float(p[1]))-1,math.ceil(4*float(p[2]))-1] = g_t
        
        LABELS.append(label_matrix)
    
    return IMAGES, LABELS

In [None]:
IMAGES, LABELS = original_img("images","labels")

In [None]:
LABELS[0][..., :2]

In [None]:
xywh2minmax(128*np.array([0.26,0.47]),128*np.array([0.17,0.17]))

In [None]:
image = np.array(IMAGES[0])

start_point = (22, 49)
end_point = ( 44 , 71)
color = (255, 0, 0)
thickness = 2

image = cv2.rectangle(image, start_point, end_point, color, thickness)

plt.imshow(image)

In [None]:
IMAGES = tf.cast(np.array(IMAGES)/255.,dtype="float32")
LABELS = tf.cast(LABELS,dtype="float32")

In [None]:
LABELS.shape

In [None]:
model.compile(optimizer='adam', loss=my_loss_fn)

In [None]:
model.compile(optimizer='adam', loss=yolo_loss)

In [None]:
model.fit(x=IMAGES, y=LABELS, batch_size=1, epochs=30)

In [None]:
def predict_img(image):
    output = model.predict(image)
    box_pred = output[..., :4]
    box_pred = K.sigmoid(box_pred)
    
    conf_pred = output[..., 4:5]
    conf_pred = K.sigmoid(conf_pred)
    
    class_pred = output[..., 5:8]
    class_pred = K.softmax(class_pred)
    
    out = K.concatenate((box_pred, conf_pred),axis=3)
    out = K.concatenate((out, class_pred),axis=3)
    
    return out

In [None]:
K.softmax(model.predict(IMAGES[0:1])[..., :3])

In [None]:
K.sigmoid(model.predict(IMAGES[0:1])[..., 3:8])

In [None]:
LABELS[0]

In [None]:
def predict_img_new(image):
    output = model.predict(image)
    box_pred = output[..., 3:7]
    box_pred = K.sigmoid(box_pred)
    
    conf_pred = output[..., 7:8]
    conf_pred = K.sigmoid(conf_pred)
    
    class_pred = output[..., :3]
    class_pred = K.softmax(class_pred)
    
    out = K.concatenate((box_pred, conf_pred),axis=3)
    out = K.concatenate((out, class_pred),axis=3)
    
    return out

In [None]:
predict_img_new(IMAGES[0:1])

In [None]:
prediction = predict_img_new(IMAGES[100:101])

predict_class = prediction[..., 0:3]  # 1 * 7 * 7 * 20
predict_trust = prediction[..., 7:8]  # 1 * 7 * 7 * 2
predict_box = prediction[..., 3:7]  # 1 * 7 * 7 * 8

predict_class = np.reshape(predict_class, [4, 4, 1, 3])
predict_trust = np.reshape(predict_trust, [4, 4, 1, 1])
predict_box = np.reshape(predict_box, [4, 4, 1, 4])

predict_scores = predict_class * predict_trust  # 7 * 7 * 2 * 20
#print(predict_scores)

box_classes = np.argmax(predict_scores, axis=-1)  # 7 * 7 * 2
box_class_scores = np.max(predict_scores, axis=-1)  # 7 * 7 * 2
best_box_class_scores = np.max(box_class_scores, axis=-1, keepdims=True)  # 7 * 7 * 1

box_mask = box_class_scores >= best_box_class_scores  # ? * 7 * 7 * 2

filter_mask = box_class_scores >= 0.1  # 7 * 7 * 2
filter_mask *= box_mask  # 7 * 7 * 2

filter_mask = np.expand_dims(filter_mask, axis=-1)  # 7 * 7 * 2 * 1


predict_scores *= filter_mask  # 7 * 7 * 2 * 20
pred = predict_box.copy()
pred *= filter_mask  # 7 * 7 * 2 * 4

box_classes = np.expand_dims(box_classes, axis=-1)
box_classes *= filter_mask  # 7 * 7 * 2 * 1

#box_xy, box_wh = yolo_head(predict_box)  # 7 * 7 * 2 * 2
box_xy_min, box_xy_max = xywh2minmax(128*pred[..., :2],128*pred[..., 2:4])  # 7 * 7 * 2 * 2

In [None]:
prediction = predict_img(IMAGES[50:51])

predict_class = prediction[..., 5:8]  # 1 * 7 * 7 * 20
predict_trust = prediction[..., 4:5]  # 1 * 7 * 7 * 2
predict_box = prediction[..., :4]  # 1 * 7 * 7 * 8

predict_class = np.reshape(predict_class, [4, 4, 1, 3])
predict_trust = np.reshape(predict_trust, [4, 4, 1, 1])
predict_box = np.reshape(predict_box, [4, 4, 1, 4])

predict_scores = predict_class * predict_trust  # 7 * 7 * 2 * 20
#print(predict_scores)

box_classes = np.argmax(predict_scores, axis=-1)  # 7 * 7 * 2
box_class_scores = np.max(predict_scores, axis=-1)  # 7 * 7 * 2
best_box_class_scores = np.max(box_class_scores, axis=-1, keepdims=True)  # 7 * 7 * 1

box_mask = box_class_scores >= best_box_class_scores  # ? * 7 * 7 * 2

filter_mask = box_class_scores >= 0.1  # 7 * 7 * 2
filter_mask *= box_mask  # 7 * 7 * 2

filter_mask = np.expand_dims(filter_mask, axis=-1)  # 7 * 7 * 2 * 1


predict_scores *= filter_mask  # 7 * 7 * 2 * 20
pred = predict_box.copy()
pred *= filter_mask  # 7 * 7 * 2 * 4

box_classes = np.expand_dims(box_classes, axis=-1)
box_classes *= filter_mask  # 7 * 7 * 2 * 1

#box_xy, box_wh = yolo_head(predict_box)  # 7 * 7 * 2 * 2
box_xy_min, box_xy_max = xywh2minmax(128*pred[..., :2],128*pred[..., 2:4])  # 7 * 7 * 2 * 2

In [None]:
pred_t = predict_trust.copy()
pred_t *= filter_mask  # 7 * 7 * 2 * 1
nms_mask = np.zeros_like(filter_mask)  # 7 * 7 * 2 * 1
predict_trust_max = np.max(predict_trust)  # 找到置信度最高的框
max_i = max_j = max_k = 0
while predict_trust_max > 0:
    for i in range(nms_mask.shape[0]):
        for j in range(nms_mask.shape[1]):
            for k in range(nms_mask.shape[2]):
                if predict_trust[i, j, k, 0] == predict_trust_max:
                    nms_mask[i, j, k, 0] = 1
                    filter_mask[i, j, k, 0] = 0
                    max_i = i
                    max_j = j
                    max_k = k
    for i in range(nms_mask.shape[0]):
        for j in range(nms_mask.shape[1]):
            for k in range(nms_mask.shape[2]):
                if filter_mask[i, j, k, 0] == 1:
                    iou_score = iou(box_xy_min[max_i, max_j, max_k, :],
                                    box_xy_max[max_i, max_j, max_k, :],
                                    box_xy_min[i, j, k, :],
                                    box_xy_max[i, j, k, :])
                    if iou_score > 0.2:
                        filter_mask[i, j, k, 0] = 0
    pred_t *= filter_mask  # 7 * 7 * 2 * 1
    predict_trust_max = np.max(pred_t)  # 找到置信度最高的框

box_xy_min *= nms_mask
box_xy_max *= nms_mask

In [None]:
image = np.array(IMAGES[100])

In [None]:
detect_shape = filter_mask.shape

for i in range(detect_shape[0]):
    for j in range(detect_shape[1]):
        for k in range(detect_shape[2]):
            if nms_mask[i, j, k, 0]:
                cv2.rectangle(image, (int(box_xy_min[i, j, k, 0]), int(box_xy_min[i, j, k, 1])),
                                (int(box_xy_max[i, j, k, 0]), int(box_xy_max[i, j, k, 1])),(0, 0, 255))
                cv2.putText(image, classes_name[box_classes[i, j, k, 0]],
                            (int(box_xy_min[i, j, k, 0]), int(box_xy_min[i, j, k, 1])),1, 1, (0, 0, 255))

In [None]:
all_prediction = predict_img(IMAGES)
    

In [None]:
for i in range(250):
    
    prediction = all_prediction[i] 
    
    predict_class = prediction[..., 5:8]  # 1 * 7 * 7 * 20
    predict_trust = prediction[..., 4:5]  # 1 * 7 * 7 * 2
    predict_box = prediction[..., :4]  # 1 * 7 * 7 * 8

    predict_class = np.reshape(predict_class, [4, 4, 1, 3])
    predict_trust = np.reshape(predict_trust, [4, 4, 1, 1])
    predict_box = np.reshape(predict_box, [4, 4, 1, 4])

    predict_scores = predict_class * predict_trust  # 7 * 7 * 2 * 20
    #print(predict_scores)

    box_classes = np.argmax(predict_scores, axis=-1)  # 7 * 7 * 2
    box_class_scores = np.max(predict_scores, axis=-1)  # 7 * 7 * 2
    best_box_class_scores = np.max(box_class_scores, axis=-1, keepdims=True)  # 7 * 7 * 1

    box_mask = box_class_scores >= best_box_class_scores  # ? * 7 * 7 * 2

    filter_mask = box_class_scores >= 0.1# 7 * 7 * 2
    filter_mask *= box_mask  # 7 * 7 * 2

    filter_mask = np.expand_dims(filter_mask, axis=-1)  # 7 * 7 * 2 * 1


    predict_scores *= filter_mask  # 7 * 7 * 2 * 20
    pred = predict_box.copy()
    pred *= filter_mask  # 7 * 7 * 2 * 4

    box_classes = np.expand_dims(box_classes, axis=-1)
    box_classes *= filter_mask  # 7 * 7 * 2 * 1

    #box_xy, box_wh = yolo_head(predict_box)  # 7 * 7 * 2 * 2
    box_xy_min, box_xy_max = xywh2minmax(128*pred[..., :2],128*pred[..., 2:4])  # 7 * 7 * 2 * 2
    
    pred_t = predict_trust.copy()
    pred_t *= filter_mask  # 7 * 7 * 2 * 1
    nms_mask = np.zeros_like(filter_mask)  # 7 * 7 * 2 * 1
    predict_trust_max = np.max(predict_trust)  # 找到置信度最高的框
    max_i = max_j = max_k = 0
    while predict_trust_max > 0:
        for i in range(nms_mask.shape[0]):
            for j in range(nms_mask.shape[1]):
                for k in range(nms_mask.shape[2]):
                    if predict_trust[i, j, k, 0] == predict_trust_max:
                        nms_mask[i, j, k, 0] = 1
                        filter_mask[i, j, k, 0] = 0
                        max_i = i
                        max_j = j
                        max_k = k
        for i in range(nms_mask.shape[0]):
            for j in range(nms_mask.shape[1]):
                for k in range(nms_mask.shape[2]):
                    if filter_mask[i, j, k, 0] == 1:
                        iou_score = iou(box_xy_min[max_i, max_j, max_k, :],
                                        box_xy_max[max_i, max_j, max_k, :],
                                        box_xy_min[i, j, k, :],
                                        box_xy_max[i, j, k, :])
                        if iou_score > 0.2:
                            filter_mask[i, j, k, 0] = 0
        pred_t *= filter_mask  # 7 * 7 * 2 * 1
        predict_trust_max = np.max(pred_t)  # 找到置信度最高的框

    box_xy_min *= nms_mask
    box_xy_max *= nms_mask
    
    image = np.array(IMAGES[i])
    
    detect_shape = filter_mask.shape

    for i in range(detect_shape[0]):
        for j in range(detect_shape[1]):
            for k in range(detect_shape[2]):
                if nms_mask[i, j, k, 0]:
                    cv2.rectangle(image, (int(box_xy_min[i, j, k, 0]), int(box_xy_min[i, j, k, 1])),
                                    (int(box_xy_max[i, j, k, 0]), int(box_xy_max[i, j, k, 1])),(0, 0, 255))
                    cv2.putText(image, classes_name[box_classes[i, j, k, 0]],
                                (int(box_xy_min[i, j, k, 0]), int(box_xy_min[i, j, k, 1])),1, 1, (0, 0, 255))
                    
    plt.figure()
    plt.imshow(image)

In [None]:
plt.imshow(image)

In [None]:
LABELS[0]

In [None]:
for i in IMAGES:
    plt.figure()
    plt.imshow(np.array(i))

In [None]:
import numpy as np

x = np.random.randint(255, size=(4,3))
y = np.random.randint(255, size=(4,3))
print(x)
print(y)
np.sum(np.sum(np.abs(x-y), axis=1))

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

def process(filename: str=None) -> None:
    """
    View multiple images stored in files, stacking vertically

    Arguments:
        filename: str - path to filename containing image
    """
    image = mpimg.imread(filename)
    # <something gets done here>
    plt.figure()
    plt.imshow(image)

for file in IMAGES:
    process(file)

In [None]:
import tensorflow.keras.backend as K


def xywh2minmax(xy, wh):
    xy_min = xy - wh / 2
    xy_max = xy + wh / 2

    return xy_min, xy_max


def iou(pred_mins, pred_maxes, true_mins, true_maxes):
    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_wh = pred_maxes - pred_mins
    true_wh = true_maxes - true_mins
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    return iou_scores


def yolo_head(feats):
    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(
        K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    box_xy = (feats[..., :2] + conv_index) / conv_dims * 128
    box_wh = feats[..., 2:4] * 128

    return box_xy, box_wh


def yolo_loss(y_true, y_pred):
    label_class = y_true[..., :3]  # ? * 7 * 7 * 20
    label_box = y_true[..., 3:7]  # ? * 7 * 7 * 4
    response_mask = y_true[..., 7]  # ? * 7 * 7
    response_mask = K.expand_dims(response_mask)  # ? * 7 * 7 * 1

    predict_class = y_pred[..., :3]  # ? * 7 * 7 * 20
    predict_trust = y_pred[..., 3:4]  # ? * 7 * 7 * 2
    predict_box = y_pred[..., 4:]  # ? * 7 * 7 * 8

    _label_box = K.reshape(label_box, [-1, 4, 4, 1, 4])
    _predict_box = K.reshape(predict_box, [-1, 4, 4, 1, 4])

    label_xy, label_wh = yolo_head(_label_box)  # ? * 7 * 7 * 1 * 2, ? * 7 * 7 * 1 * 2
    label_xy = K.expand_dims(label_xy, 3)  # ? * 7 * 7 * 1 * 1 * 2
    label_wh = K.expand_dims(label_wh, 3)  # ? * 7 * 7 * 1 * 1 * 2
    label_xy_min, label_xy_max = xywh2minmax(label_xy, label_wh)  # ? * 7 * 7 * 1 * 1 * 2, ? * 7 * 7 * 1 * 1 * 2

    predict_xy, predict_wh = yolo_head(_predict_box)  # ? * 7 * 7 * 2 * 2, ? * 7 * 7 * 2 * 2
    predict_xy = K.expand_dims(predict_xy, 4)  # ? * 7 * 7 * 2 * 1 * 2
    predict_wh = K.expand_dims(predict_wh, 4)  # ? * 7 * 7 * 2 * 1 * 2
    predict_xy_min, predict_xy_max = xywh2minmax(predict_xy, predict_wh)  # ? * 7 * 7 * 2 * 1 * 2, ? * 7 * 7 * 2 * 1 * 2

    iou_scores = iou(predict_xy_min, predict_xy_max, label_xy_min, label_xy_max)  # ? * 7 * 7 * 2 * 1
    best_ious = K.max(iou_scores, axis=4)  # ? * 7 * 7 * 2
    best_box = K.max(best_ious, axis=3, keepdims=True)  # ? * 7 * 7 * 1

    box_mask = K.cast(best_ious >= best_box, K.dtype(best_ious))  # ? * 7 * 7 * 2

    no_object_loss = 0.5 * (1 - box_mask * response_mask) * K.square(0 - predict_trust)
    object_loss = box_mask * response_mask * K.square(1 - predict_trust)
    confidence_loss = no_object_loss + object_loss
    confidence_loss = K.sum(confidence_loss)

    class_loss = response_mask * K.square(label_class - predict_class)
    class_loss = K.sum(class_loss)

    _label_box = K.reshape(label_box, [-1, 4, 4, 1, 4])
    _predict_box = K.reshape(predict_box, [-1, 4, 4, 1, 4])

    label_xy, label_wh = yolo_head(_label_box)  # ? * 7 * 7 * 1 * 2, ? * 7 * 7 * 1 * 2
    predict_xy, predict_wh = yolo_head(_predict_box)  # ? * 7 * 7 * 2 * 2, ? * 7 * 7 * 2 * 2

    box_mask = K.expand_dims(box_mask)
    response_mask = K.expand_dims(response_mask)

    box_loss = 5 * box_mask * response_mask * K.square((label_xy - predict_xy) / 128)
    box_loss += 5 * box_mask * response_mask * K.square((K.sqrt(label_wh) - K.sqrt(predict_wh)) / 128)
    box_loss = K.sum(box_loss)

    loss = confidence_loss + class_loss + box_loss

    return loss

In [None]:
yolo_loss(LABELS[0:1],LABELS[0:1])

In [None]:
K.sigmoid(prediction[...,3:7])

In [None]:
LABELS[100]

In [None]:
def yolo_head(feats):
    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = np.shape(feats)[0:2]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = np.arange(0, stop=conv_dims[0])
    conv_width_index = np.arange(0, stop=conv_dims[1])
    conv_height_index = np.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    conv_width_index = np.tile(np.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
    conv_width_index = np.reshape(np.transpose(conv_width_index), [conv_dims[0] * conv_dims[1]])
    conv_index = np.transpose(np.stack([conv_height_index, conv_width_index]))
    conv_index = np.reshape(conv_index, [conv_dims[0], conv_dims[1], 1, 2])

    conv_dims = np.reshape(conv_dims, [1, 1, 1, 2])

    box_xy = (feats[..., :2] + conv_index) / conv_dims * 128
    box_wh = feats[..., 2:4] * 128

    return box_xy, box_wh

In [None]:
import cv2 as cv

prediction = model.predict(IMAGES[40:41])

predict_class = prediction[..., :3]  # 1 * 7 * 7 * 20
predict_trust = prediction[..., 3:4]  # 1 * 7 * 7 * 2
predict_box = prediction[..., 4:]  # 1 * 7 * 7 * 8

predict_class = np.reshape(predict_class, [4, 4, 1, 3])
predict_trust = np.reshape(predict_trust, [4, 4, 1, 1])
predict_box = np.reshape(predict_box, [4, 4, 1, 4])

predict_scores = predict_class * predict_trust  # 7 * 7 * 2 * 20

box_classes = np.argmax(predict_scores, axis=-1)  # 7 * 7 * 2
box_class_scores = np.max(predict_scores, axis=-1)  # 7 * 7 * 2
best_box_class_scores = np.max(box_class_scores, axis=-1, keepdims=True)  # 7 * 7 * 1

box_mask = box_class_scores >= best_box_class_scores  # ? * 7 * 7 * 2

filter_mask = box_class_scores >= 0.6 # 7 * 7 * 2
filter_mask *= box_mask  # 7 * 7 * 2

filter_mask = np.expand_dims(filter_mask, axis=-1)  # 7 * 7 * 2 * 1

predict_scores *= filter_mask  # 7 * 7 * 2 * 20
predict_box *= filter_mask  # 7 * 7 * 2 * 4

box_classes = np.expand_dims(box_classes, axis=-1)
box_classes *= filter_mask  # 7 * 7 * 2 * 1

box_xy, box_wh = yolo_head(predict_box)  # 7 * 7 * 2 * 2
box_xy_min, box_xy_max = xywh2minmax(box_xy, box_wh)  # 7 * 7 * 2 * 2

predict_trust *= filter_mask  # 7 * 7 * 2 * 1
nms_mask = np.zeros_like(filter_mask)  # 7 * 7 * 2 * 1
predict_trust_max = np.max(predict_trust)  # 找到置信度最高的框
max_i = max_j = max_k = 0
while predict_trust_max > 0:
    for i in range(nms_mask.shape[0]):
        for j in range(nms_mask.shape[1]):
            for k in range(nms_mask.shape[2]):
                if predict_trust[i, j, k, 0] == predict_trust_max:
                    nms_mask[i, j, k, 0] = 1
                    filter_mask[i, j, k, 0] = 0
                    max_i = i
                    max_j = j
                    max_k = k
    for i in range(nms_mask.shape[0]):
        for j in range(nms_mask.shape[1]):
            for k in range(nms_mask.shape[2]):
                if filter_mask[i, j, k, 0] == 1:
                    iou_score = iou(box_xy_min[max_i, max_j, max_k, :],
                                    box_xy_max[max_i, max_j, max_k, :],
                                    box_xy_min[i, j, k, :],
                                    box_xy_max[i, j, k, :])
                    if iou_score > 0.2:
                        filter_mask[i, j, k, 0] = 0
    predict_trust *= filter_mask  # 7 * 7 * 2 * 1
    predict_trust_max = np.max(predict_trust)  # 找到置信度最高的框

box_xy_min *= nms_mask
box_xy_max *= nms_mask

image = np.array(IMAGES[40])

detect_shape = filter_mask.shape

for i in range(detect_shape[0]):
    for j in range(detect_shape[1]):
        for k in range(detect_shape[2]):
            if nms_mask[i, j, k, 0]:
                cv.rectangle(image, (int(box_xy_min[i, j, k, 0]), int(box_xy_min[i, j, k, 1])),
                                (int(box_xy_max[i, j, k, 0]), int(box_xy_max[i, j, k, 1])),
                                (0, 0, 255))
                cv.putText(image, classes_name[box_classes[i, j, k, 0]],
                            (int(box_xy_min[i, j, k, 0]), int(box_xy_min[i, j, k, 1])),
                            1, 1, (0, 0, 255))

plt.imshow(image)

In [None]:
import tensorflow.keras.backend as K

class Yolo_Reshape(Layer):
    def __init__(self, target_shape, **kwargs):
        super(Yolo_Reshape, self).__init__(**kwargs)
        self.target_shape = tuple(target_shape)

    def compute_output_shape(self, input_shape):
        return (input_shape[0],) + self.target_shape

    def call(self, inputs, **kwargs):
        S = [self.target_shape[0], self.target_shape[1]]
        print(S)
        C = 3
        B = 1
        idx1 = S[0] * S[1] * C
        print(idx1)
        idx2 = idx1 + S[0] * S[1] * B
        print(idx2)
        # class prediction
        class_probs = K.reshape(inputs[:, :idx1], (K.shape(inputs)[0],) + tuple([S[0], S[1], C]))
        print((K.shape(inputs)[0],) + tuple([S[0], S[1], C]))
        class_probs = K.softmax(class_probs)
        # confidence
        confs = K.reshape(inputs[:, idx1:idx2], (K.shape(inputs)[0],) + tuple([S[0], S[1], B]))
        print((K.shape(inputs)[0],) + tuple([S[0], S[1], B]))
        confs = K.sigmoid(confs)
        # boxes
        boxes = K.reshape(
            inputs[:, idx2:], (K.shape(inputs)[0],) + tuple([S[0], S[1], B * 4]))
        boxes = K.sigmoid(boxes)
        # return np.array([class_probs, confs, boxes])
        outputs = K.concatenate([class_probs, confs, boxes])
        return outputs

In [None]:
x=np.random.rand(1,128)

In [None]:
K.sigmoid(Reshape((4,4,4))(x[:,64:]))

In [None]:
Yolo_Reshape((4,4,8))(x)

In [None]:
t = Reshape((4, 4, 8))(x)

In [None]:
K.softmax(t[...,:3])

In [None]:
x

In [None]:
y_pred = x

In [None]:
class_probs = K.reshape(y_pred[:, :48], ([4,4,3]))
class_probs = K.softmax(class_probs)
    
confs = K.reshape(y_pred[:, 48:64], ([4,4,1]))            
confs = K.sigmoid(confs)
    
boxes = K.reshape(y_pred[:, 64:], ([4,4,4]))
boxes = K.sigmoid(boxes)
    
y_pred = K.concatenate([class_probs, confs, boxes])
    

In [None]:
np.expand_dims(y_pred,axis=0)

In [None]:
def yolo_loss(y_true, y_pred):
    
    class_probs = K.reshape(y_pred[:, :48], ([4,4,3]))
    class_probs = K.softmax(class_probs)
    
    confs = K.reshape(y_pred[:, 48:64], ([4,4,1]))            
    confs = K.sigmoid(confs)
    
    boxes = K.reshape(y_pred[:, 64:], ([4,4,4]))
    boxes = K.sigmoid(boxes)
    
    y_pred = K.concatenate([class_probs, confs, boxes])
    
    label_class = y_true[..., :3]  # ? * 7 * 7 * 20
    label_box = y_true[..., 3:7]  # ? * 7 * 7 * 4
    response_mask = y_true[..., 7]  # ? * 7 * 7
    response_mask = K.expand_dims(response_mask)  # ? * 7 * 7 * 1

    predict_class = y_pred[..., :3]  # ? * 7 * 7 * 20
    predict_trust = y_pred[..., 3:4]  # ? * 7 * 7 * 2
    predict_box = y_pred[..., 4:]  # ? * 7 * 7 * 8

    _label_box = K.reshape(label_box, [-1, 4, 4, 1, 4])
    _predict_box = K.reshape(predict_box, [-1, 4, 4, 1, 4])

    label_xy, label_wh = yolo_head(_label_box)  # ? * 7 * 7 * 1 * 2, ? * 7 * 7 * 1 * 2
    label_xy = K.expand_dims(label_xy, 3)  # ? * 7 * 7 * 1 * 1 * 2
    label_wh = K.expand_dims(label_wh, 3)  # ? * 7 * 7 * 1 * 1 * 2
    label_xy_min, label_xy_max = xywh2minmax(label_xy, label_wh)  # ? * 7 * 7 * 1 * 1 * 2, ? * 7 * 7 * 1 * 1 * 2

    predict_xy, predict_wh = yolo_head(_predict_box)  # ? * 7 * 7 * 2 * 2, ? * 7 * 7 * 2 * 2
    predict_xy = K.expand_dims(predict_xy, 4)  # ? * 7 * 7 * 2 * 1 * 2
    predict_wh = K.expand_dims(predict_wh, 4)  # ? * 7 * 7 * 2 * 1 * 2
    predict_xy_min, predict_xy_max = xywh2minmax(predict_xy, predict_wh)  # ? * 7 * 7 * 2 * 1 * 2, ? * 7 * 7 * 2 * 1 * 2

    iou_scores = iou(predict_xy_min, predict_xy_max, label_xy_min, label_xy_max)  # ? * 7 * 7 * 2 * 1
    best_ious = K.max(iou_scores, axis=4)  # ? * 7 * 7 * 2
    best_box = K.max(best_ious, axis=3, keepdims=True)  # ? * 7 * 7 * 1

    box_mask = K.cast(best_ious >= best_box, K.dtype(best_ious))  # ? * 7 * 7 * 2

    no_object_loss = 0.5 * (1 - box_mask * response_mask) * K.square(0 - predict_trust)
    object_loss = box_mask * response_mask * K.square(1 - predict_trust)
    confidence_loss = no_object_loss + object_loss
    confidence_loss = K.sum(confidence_loss)

    class_loss = response_mask * K.square(label_class - predict_class)
    class_loss = K.sum(class_loss)

    _label_box = K.reshape(label_box, [-1, 4, 4, 1, 4])
    _predict_box = K.reshape(predict_box, [-1, 4, 4, 1, 4])

    label_xy, label_wh = yolo_head(_label_box)  # ? * 7 * 7 * 1 * 2, ? * 7 * 7 * 1 * 2
    predict_xy, predict_wh = yolo_head(_predict_box)  # ? * 7 * 7 * 2 * 2, ? * 7 * 7 * 2 * 2

    box_mask = K.expand_dims(box_mask)
    response_mask = K.expand_dims(response_mask)

    box_loss = 5 * box_mask * response_mask * K.square((label_xy - predict_xy) / 128)
    box_loss += 5 * box_mask * response_mask * K.square((K.sqrt(label_wh) - K.sqrt(predict_wh)) / 128)
    box_loss = K.sum(box_loss)

    loss = confidence_loss + class_loss + box_loss

    return loss

In [None]:
model.predict(IMAGES[0:1])

In [None]:
yolo_loss(LABELS[1:2],model.predict(IMAGES[1:2]))

In [None]:


def model_tiny_yolov1(inputs):
    x = Conv2D(16, (1, 1))(inputs)
    x = Conv2D(32, (3, 3))(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.3)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(16, (3, 3))(x)
    x = Conv2D(32, (3, 3))(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.3)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    #x = Dropout(0.25)(x)

    x = Flatten()(x)
    #x = Dense(256, activation='sigmoid')(x)
    x = Dense(128, activation='linear')(x)
    x = Reshape((4,4,8))(x)

    return x

In [None]:
def model_tiny_yolov1(inputs):
    x = Conv2D(16, (1, 1))(inputs)
    x = Conv2D(32, (3, 3))(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.3)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(16, (3, 3))(x)
    x = Conv2D(32, (3, 3))(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.3)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    #x = Dropout(0.25)(x)
    x = Conv2D(16, (3, 3))(x)
    x = Conv2D(32, (3, 3))(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.3)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(16, (3, 3))(x)
    x = Conv2D(32, (3, 3))(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.3)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Flatten()(x)
    #x = Dense(256, activation='sigmoid')(x)
    x = Dense(128, activation='linear')(x)
    x = Reshape((4,4,8))(x)

    return x

In [None]:
input_shape = (1, 128, 128, 1)

inputs = Input(input_shape[1:4])

model = model_tiny_yolov1(inputs)

outputs = model_tiny_yolov1(inputs)
model = Model(inputs=inputs, outputs=outputs)

model.summary()