In [1]:
import numpy as np
import cv2
import time
from tensorflow.keras.models import load_model
from skimage import transform
from skimage import exposure
from skimage import io

In [4]:
# define function to process detected label image
def process_images(image):
    """ the function resizes & normalises detected image
    then return it as a numpy array
    """
    # resize the image to be 32x32 pixels, ignoring aspect ratio
    img = transform.resize(image, (32,32))
    img = exposure.equalize_adapthist(img, clip_limit=0.1)
    
    # preprocess the image by scaling it to the range [0, 1]
    img = img.astype("float32") / 255.0
    
    # convert the data and labels to NumPy arrays
    image = np.expand_dims(img, axis=0)
        
    return image

In [22]:
# load models
recognition_model = load_model('trained_model/sign_recognition_final_model/model.pb')

yolo_model = cv2.dnn.readNetFromDarknet('trained_model/cov_yolov4.cfg','trained_model/cov_yolov4_2000.weights')
# Get all layers from the yolo network
yolo_layers = yolo_model.getLayerNames()
# Loop and find the last layer (output layer) of the yolo network 
yolo_output_layer = [yolo_layers[yolo_layer[0] - 1] for yolo_layer in yolo_model.getUnconnectedOutLayers()]

In [8]:
# load the label names
labelNames = open("signnames.csv").read().strip().split("\n")[1:]
labelNames = [l.split(",")[1] for l in labelNames]

In [None]:
# load video
video = cv2.VideoCapture('test.mp4')

#create a while loop 
while (video.isOpened):
    #get the current frame from video stream
    ret,frame = video.read()
    if ret == True:
        height = frame.shape[0]
        width = frame.shape[1]

        # convert to blob to pass into model
        #recommended by yolo authors, scale factor is 0.003922=1/255, width,height of blob is 320,320
        #accepted sizes are 320×320,416×416,609×609. More size means more accuracy but less speed
        img_blob = cv2.dnn.blobFromImage(frame, 0.003922, (416, 416), swapRB=True, crop=False)
        
        # pass the blob to Yolo model
        yolo_model.setInput(img_blob)

        # get the detection from YOLO model using forward()
        obj_detection_layers = yolo_model.forward(yolo_output_layer)
       
        # initialization for non-max suppression (NMS)
        # declare list for [class id], [box center, width & height[], [confidences]
        class_ids_list = []
        boxes_list = []
        confidences_list = []

        # loop over each of the layer outputs
        for object_detection_layer in obj_detection_layers:
            # loop over the detections
            for object_detection in object_detection_layer:
                i = 0
                # obj_detections[1 to 4] => will have the two center points, box width and box height
                # obj_detections[5] => will have scores for all objects within bounding box
                all_scores = object_detection[5:]
                predicted_class_id = np.argmax(all_scores)
                prediction_confidence = all_scores[predicted_class_id]

                # take only predictions with confidence more than 70%
                if prediction_confidence > 0.70:
                    #get the predicted label
                    predicted_class_label = class_labels[predicted_class_id]
                    #obtain the bounding box co-oridnates for actual image from resized image size
                    bounding_box = object_detection[0:4] * np.array([width, height, width, height])
                    (box_center_x_pt, box_center_y_pt, box_width, box_height) = bounding_box.astype("int")
                    start_x_pt = int(box_center_x_pt - (box_width / 2))
                    start_y_pt = int(box_center_y_pt - (box_height / 2))

                    class_ids_list.append(predicted_class_id)
                    confidences_list.append(float(prediction_confidence))
                    boxes_list.append([start_x_pt, start_y_pt, int(box_width), int(box_height)])

        # Applying the NMS will return only the selected max value ids while suppressing the non maximum (weak) overlapping bounding boxes      
        # Non-Maxima Suppression confidence set as 0.5 & max_suppression threhold for NMS as 0.4 (adjust and try for better perfomance)
        max_value_ids = cv2.dnn.NMSBoxes(boxes_list, confidences_list, 0.5, 0.4)

        # loop through the final set of detections remaining after NMS and draw bounding box and write text
        for max_valueid in max_value_ids:
            max_class_id = max_valueid[0]
            box = boxes_list[max_class_id]
            start_x_pt = box[0]
            start_y_pt = box[1]
            box_width = box[2]
            box_height = box[3]

            #get the predicted class id and label
            predicted_class_id = class_ids_list[max_class_id]
            predicted_class_label = class_labels[predicted_class_id]
            prediction_confidence = confidences_list[max_class_id]

            end_x_pt = start_x_pt + box_width
            end_y_pt = start_y_pt + box_height
            
            # crop image to the boundary box
            croped_img = frame[start_y_pt : end_y_pt, start_x_pt : end_x_pt]
            if croped_img.shape[0] != 0 and croped_img.shape[1] != 0:
                croped_img = process_images(croped_img)
                preds = recognition_model.predict(croped_img)
                j = preds.argmax(axis=1)[0]
                label = labelNames[j]

            # draw rectangle and text in the image
            cv2.rectangle(frame, (start_x_pt, start_y_pt), (end_x_pt, end_y_pt), [0,255,0], 1)
            cv2.putText(frame, label, (start_x_pt, start_y_pt-5), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255,0], 1)
            
        cv2.imshow("Detection Output", frame)
    
        #terminate while loop if 'q' key is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        #cv2.destroyAllWindows()
        break
        
cv2.destroyAllWindows()