Import library

In [1]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers,Model,Sequential
from tensorflow.keras.applications import ResNet50

Create Embedding model

In [2]:
base_cnn = ResNet50(weights="imagenet", input_shape=(200,200,3), include_top=False)

custom_layers = Sequential([
    layers.Flatten(),
    layers.Dense(512, activation="relu"),
    layers.BatchNormalization(),

    layers.Dense(256, activation="relu"),
    layers.BatchNormalization(),

    layers.Dense(256)

])

output = custom_layers(base_cnn.output)

embedding = Model(inputs=base_cnn.input, outputs=output, name="Embedding")

In [3]:
# Transfer learning
trainable = False
for layer in base_cnn.layers:
    if layer.name == "conv5_block1_out":
        trainable = True
    layer.trainable = trainable

Create threshold data

In [4]:
sample = cv2.resize(cv2.imread('positive_pic.png'),(200,200))
test_pic = cv2.resize(cv2.imread('positive_pic_2.png'),(200,200))
test_pic_2 = cv2.resize(cv2.imread('positive_pic_3.png'),(200,200))

In [5]:
sample = embedding(np.array([sample]))
test = embedding(np.array([test_pic]))
test_2 = embedding(np.array([test_pic_2]))

In [6]:
# Calculate similarity using Euclidean distance
def similarity(imgA,imgB):
    dist = tf.reduce_sum(tf.square(imgA - imgB), -1)
    return dist

In [7]:
threshold = min(similarity(sample,test),similarity(sample,test_2))
threshold

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([337.30383], dtype=float32)>

Read videoframe and extract. Draw rect if embedded value lower than threshold

In [8]:
# Create a VideoCapture object and read the frames from an input file
cap = cv2.VideoCapture('sample.mp4')

# Check if the video is loaded successfully
if (cap.isOpened()== False):
    print("Error opening video file")
    
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)
size = (frame_width, frame_height)
result = cv2.VideoWriter('result.mp4',cv2.VideoWriter_fourcc(*'MP4V'),fps, size)    
    
# Mixture of Gaussian model
MOG2 = cv2.createBackgroundSubtractorMOG2()
 
# Loop over every single frame
while(cap.isOpened()):
    # Read current frame
    ret, frame = cap.read()

    # Extract foreground mask
    mask = MOG2.apply(frame)
    
    # Extract contours which is the boundary pixels that have the same color and intensity
    contours, _ = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

    # Loop through each contours
    for c in contours:
        
        # Small contours might be people or other things that is not vehicle
        # Remaining loop is skip 
        if cv2.contourArea(c) < 3500:
            continue
            
        # Big contours should be vehicle that need to be highlighted   
        x, y, w, h = cv2.boundingRect(c)
        
        ## frame shape is 720 960 3
        roi = cv2.resize(frame[y:y+h,x:x+w],(200,200))
        diff = similarity(sample,embedding(np.array([roi])))
        
        if diff < threshold:
            cv2.putText(frame, str((x,y)), (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
            cv2.rectangle(frame,(x,y), (x+w, y+h), (0,255,0), 2)
    
    # Display mask and video
    cv2.imshow('Original Video', frame)
    result.write(frame)
    
    # wait for keypressed to exit the while loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release video object
cap.release()
result.release()
cv2.destroyAllWindows()