In [1]:
# Importing Libraries

import numpy as np
import time
import threading 
import cv2
from pygame import mixer 
from math import pow, sqrt

pygame 2.0.0 (SDL 2.0.12, python 3.8.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
path_model = './SSD_MobileNet.caffemodel'
path_prototxt = './SSD_MobileNet_prototxt.txt'
path_label = './class_labels.txt'
cap = cv2.VideoCapture(0)
################################################################################################################################
confidence_req = 0.8      ##### Minimum confidence for human detection #####
h_source = 11             #####      Acutal length of reference        #####
h_target = 28             #####      Acutal length of Target           #####
F = 800                   #####      Focal length of camera            #####
################################################################################################################################

In [3]:
# Reading Labels for network
labels = [line.strip() for line in open(path_label)]
print(labels)
bounding_box_color = np.random.uniform(0, 255, size=(len(labels), 3))

['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']


In [4]:
# Loading Netowork
network = cv2.dnn.readNetFromCaffe(path_prototxt, path_model)

In [5]:
def alert():
    # Starting the mixer 
    mixer.init() 

    # Loading the song 
    mixer.music.load('./Alarm.mp3') 

    # Setting the volume 
    mixer.music.set_volume(0.7) 

    # Start playing the song 
    mixer.music.play() 
    
    # Sleep for 5 second
    time.sleep(5)
    
    #Pause the sound and close the mixer
    mixer.music.pause()
    mixer.music.stop()
    return

In [6]:
def detect(pri = True,dis = False):

    frame_no = 0
    first = True
    source = 0

    while cap.isOpened():
        
        frame_no = frame_no+1

        # Capture one frame after another
        ret, frame = cap.read()

        frame = cv2.flip(frame, 1)

        if not ret:
            break

        (h, w) = frame.shape[:2]

        # Resize the frame to suite the model requirements. Resize the frame to 300X300 pixels
        blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 0.007843, (300, 300), 127.5)

        network.setInput(blob)
        detections = network.forward()

        pos_dict = dict()
        coordinates = dict()

        for i in range(detections.shape[2]):

            confidence = detections[0, 0, i, 2]

            if confidence > confidence_req:

                class_id = int(detections[0, 0, i, 1])

                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (startX, startY, endX, endY) = box.astype('int')

                # Filtering only persons detected in the frame. Class Id of 'person' is 15
                if class_id == 15.00:
                    
                    if dis:
                        # Draw bounding box for the object
                        cv2.rectangle(frame, (startX, startY), (endX, endY), bounding_box_color[class_id], 2)
                    
                    label = "{}: {:.2f}%".format(labels[class_id], confidence * 100)
                    if pri:
                        print("{}".format(label))


                    coordinates[i] = (startX, startY, endX, endY)

                    # Mid point of bounding box
                    x_mid = round((startX+endX)/2,4)
                    y_mid = round((startY+endY)/2,4)

                    height = round(endY-startY,4)
                    # Distance from camera based on triangle similarity
                    
                    # For Source
                    if first:
                        distance = (h_source * F)/height
                        first = False
                        if pri:
                            print("Distance from camera(cm):{dist}\n".format(dist=distance))

                        # Mid-point of bounding boxes (in cm) based on triangle similarity technique
                        x_mid_cm = (x_mid * distance) / F
                        y_mid_cm = (y_mid * distance) / F
                        source = (x_mid_cm,y_mid_cm,distance)
                        
                    # For Targets
                    else:
                        distance = (h_target * F)/height
                        if pri:
                            print("Distance from camera(cm):{dist}\n".format(dist=distance))

                        # Mid-point of bounding boxes (in cm) based on triangle similarity technique
                        x_mid_cm = (x_mid * distance) / F
                        y_mid_cm = (y_mid * distance) / F
                        pos_dict[i] = (x_mid_cm,y_mid_cm,distance)

        # Distance between Source and every Target detected in a frame
        close_objects = set()
        for i in pos_dict.keys():
            
            dist = sqrt(pow(abs(pos_dict[i][0]-source[0]),2) + pow(abs(pos_dict[i][1]-source[1]),2) + pow(abs(pos_dict[i][2]-source[2]),2))
            
            COLOR = (0,255,0)
            if pri:
                print(dist)
            if dist <= 50:
                close_objects.add(i)
                COLOR = (0,0,255)
            if dis:
                cv2.putText(frame, 'Distance from source: {i} cm'.format(i=round(dist,4)), (startX, startY+200),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLOR, 2)
                
        # Check if any Target is too close to the Source
        if len(close_objects) > 1:
            alert()
        if dis:
            for i in pos_dict.keys():

                if i in close_objects:
                    COLOR = (0,0,255)
                else:
                    COLOR = (0,255,0)
                (startX, startY, endX, endY) = coordinates[i]

                cv2.rectangle(frame, (startX, startY), (endX, endY), COLOR, 2)
                y = startY - 15 if startY - 15 > 15 else startY + 15
                # Convert cms to feet
                cv2.putText(frame, 'Distance from camera: {i} cm'.format(i=round(pos_dict[i][2],4)), (startX, y),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLOR, 2)

            cv2.namedWindow('Frame',cv2.WINDOW_NORMAL)

            # Show frame
            cv2.imshow('Frame', frame)
            cv2.resizeWindow('Frame',800,600)

            key = cv2.waitKey(1) & 0xFF

            # Press `q` to exit
            if key == ord("q"):
                break
    if dis:
        # Clean
        cap.release()
        cv2.destroyAllWindows()

In [7]:
# pri - Print Information
# dis - Display Information

detect(pri = True,dis = True)

person: 98.95%
Distance from camera(cm):16.761904761904763

person: 99.04%
Distance from camera(cm):42.50474383301708

35.17042877769235
person: 99.15%
Distance from camera(cm):42.58555133079848

35.34261131458417
person: 98.99%
Distance from camera(cm):42.74809160305343

35.58669851122787
person: 98.39%
Distance from camera(cm):42.994241842610364

35.970892474952855
person: 98.83%
Distance from camera(cm):42.911877394636015

36.00521841477991
person: 98.95%
Distance from camera(cm):43.07692307692308

36.19500645046831
person: 98.68%
Distance from camera(cm):42.666666666666664

35.26263652499451
person: 98.74%
Distance from camera(cm):42.50474383301708

35.17042877769235
person: 98.62%
Distance from camera(cm):42.829827915869984

35.678717153884676
person: 98.48%
Distance from camera(cm):42.58555133079848

35.07058483477543
person: 98.35%
Distance from camera(cm):42.829827915869984

35.47008293952061
person: 98.04%
Distance from camera(cm):42.74809160305343

35.38452258202023
person: 9

person: 99.92%
Distance from camera(cm):31.11111111111111

18.864275002718255
person: 99.92%
Distance from camera(cm):31.067961165048544

18.842208953193197
person: 99.92%
Distance from camera(cm):31.372549019607842

19.262750254009703
person: 99.93%
Distance from camera(cm):31.024930747922436

18.742169571125125
person: 99.92%
Distance from camera(cm):31.32867132867133

18.938829054709103
person: 99.92%
Distance from camera(cm):31.32867132867133

18.846357207980496
person: 99.94%
Distance from camera(cm):31.32867132867133

18.777562023438406
person: 99.92%
Distance from camera(cm):31.32867132867133

18.91563186259842
person: 99.92%
Distance from camera(cm):31.32867132867133

18.938829054709103
person: 99.91%
Distance from camera(cm):31.372549019607842

19.2270333166445
person: 99.89%
Distance from camera(cm):31.32867132867133

19.25705130021426
person: 99.92%
Distance from camera(cm):31.372549019607842

19.144137556379537
person: 99.90%
Distance from camera(cm):31.32867132867133

19.0

person: 99.98%
Distance from camera(cm):34.5679012345679

23.982484509943095
person: 99.98%
Distance from camera(cm):34.514637904468415

23.917297130705602
person: 99.99%
Distance from camera(cm):34.514637904468415

23.904387499873856
person: 99.98%
Distance from camera(cm):34.46153846153846

23.800804120072616
person: 99.98%
Distance from camera(cm):34.46153846153846

23.826557370524633
person: 99.99%
Distance from camera(cm):34.514637904468415

23.839891109741036
person: 99.98%
Distance from camera(cm):34.40860215053763

23.697593445958415
person: 99.99%
Distance from camera(cm):34.40860215053763

23.659098794330617
person: 99.98%
Distance from camera(cm):34.40860215053763

23.633627066217105
person: 99.98%
Distance from camera(cm):34.46153846153846

23.7108983305515
person: 99.98%
Distance from camera(cm):34.46153846153846

23.77510102262097
person: 99.98%
Distance from camera(cm):34.46153846153846

23.787946292053537
person: 99.97%
Distance from camera(cm):34.46153846153846

23.813

In [8]:
#start()