# Part 1: Importing the Required Libraries

In [1]:
from scipy.spatial import distance as dist
import numpy as np #pip install numpy
import imutils #pip install imutils
import cv2 #pip install opencv-python
import os

# Part 2: Importing YOLOv3 configs

In [2]:
# Base path to YOLO directory 
MODEL_PATH = "yolo-coco"

MIN_CONF = 0.3
NMS_THRESH = 0.3

#boolean indication if NVIDIA CUDA GPU should be used
USE_GPU = False

#define the minimum safe distance (in pixels) that two people can be from each other.
MIN_DISTANCE = 50

In [3]:
#load the coco class labels our YOLO was trained on
labelsPath = os.path.sep.join([MODEL_PATH,"coco.names"])
LABELS = open(labelsPath).read().strip().split("\n")

print(LABELS)

print(len(LABELS))

#derive the paths to the YOLO weights and model configuration
weightsPath = os.path.sep.join([MODEL_PATH,"yolov3.weights"])
configPath = os.path.sep.join([MODEL_PATH,"yolov3.cfg"])

#load our YOLO object detector trained on Coco dataset(80 classes)
print("loading YOLO from disk")
net = cv2.dnn.readNetFromDarknet(configPath,weightsPath)


['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
80
loading YOLO from disk


# Part 3: Using GPU for preprocessing

In [4]:
#checking whether to use GPU or not
if USE_GPU:
    #setting CUDA as the preferable backend and target 
    print("Setting prefferable backend and target to CUDA...")
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

In [5]:
# determine only the "output" layer names that we need from YOLO
ln = net.getLayerNames()
ln = [ln[i[0]-1] for i in net.getUnconnectedOutLayers()]

# Part 4: Input access(Video File and Live Footage)

In [6]:
print("Accesing Video stream")
#upload the video file you want to check the social distancing
vs = cv2.VideoCapture("2.mp4")

#Number of frames per second
fps = vs.get(cv2.CAP_PROP_FPS)
print("FPS of current video frame - ",fps)

num_frames = vs.get(cv2.CAP_PROP_FRAME_COUNT)
print("Number of frames in the video:",num_frames)

writer= None

Accesing Video stream
FPS of current video frame -  25.0
Number of frames in the video: 531.0


# Algorithm Detecting People

In [7]:
def detect_people(frame,net,ln,personIdx=0):
    # grab the dimensions of the frame and initialize the list of results
    (H,W) = frame.shape[:2];
    results = []
    
    #constructing a blob from the input frame and then pass a forward pass of the YOLO object detector,giving us our bounding boxes
    #and the associated probabilities.
    blob = cv2.dnn.blobFromImage(frame,1/255.0,(416,416),swapRB=True,crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(ln)
    
    
    # Initialize our lists of detected bounding boxes,centroids and confidences respectively. 
    boxes=[]
    centroids=[]
    confidences=[]
    
    #looping over each of the layer outputs
    for output in layerOutputs:
        #loop each of the detections
        for detection in output:
            #extract the class id and confidences(ie.,probabibility) of the current object detection
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]
            
            #filter detections by (1) ensuring that the object detected was a person and 
            #(2)that the minimum confidence is net 
            if classID == personIdx and confidence >MIN_CONF:
                #scale the bounding box coordinates back relative to the size of the image,keeping in mind the YOLO 
                #actually returns the center (x,y)- coordinates of the bounding box  followed by the boxes width and height.
                box = detection[0:4]*np.array([W,H,W,H])
                (centerX,centerY,width,height) = box.astype("int")
                
                #use the center(x,y) coordinates to derive the top and left corner of the bounding boxes
                x = int(centerX-(width/2))
                y = int(centerY-(height/2))
                
                #update our list of bounding box coordinates, centroids and confidences
                boxes.append([x,y,int(width),int(height)])
                centroids.append((centerX,centerY))
                confidences.append(float(confidence))
    #apply non-maxima suppresion to suppress weak, overlapping bounding boxes 
    idxs = cv2.dnn.NMSBoxes(boxes,confidences,MIN_CONF,NMS_THRESH)
    
    #ensure that at least one detection exist
    if len(idxs)>0:
        #loop over the indices we are keeping 
        for i in idxs.flatten():
            #extract the bounding box coordinates
            (x,y) = (boxes[i][0],boxes[i][1])
            (w,h) = (boxes[i][2],boxes[i][3])
            
            #update our results list to consist of the person prediction probability bounding box coordiantes and the centroid.
            r = (confidences[i],(x,y,x+w,y+h),centroids[i])
            results.append(r)
    #return the list of results
    return results

In [8]:
display = 1
output = "Output.avi"

In [9]:
while True:
    #read the next frame from the file
    (grabbed,frame ) = vs.read()
    
    #if the frame is not grabbed then we can say that we have reached the end of the stream
    if not grabbed:
        break
    #resize the frame and detect people in it
    frame = imutils.resize(frame,width=700)
    results = detect_people(frame,net,ln,personIdx=LABELS.index("person"))
    
    #initialize the set of indexes that violate social distance
    violate = set()
    if len(results) >=2:
        #extract all the centroids from the results and compute the euclidean distances between all the pairs of centroids
        centroids = np.array([r[2] for r in results])
        D = dist.cdist(centroids,centroids,metric="euclidean")
        
        #loop over the upper traiangular of the distance matrix
        for i in range(0,D.shape[0]):
            for j in range(i+1,D.shape[1]):
                #check to see if the distance between any two centroid pairs is less than the configured number of pixels
                if D[i,j] <MIN_DISTANCE:
                    #update our violations set with the indexes of the centroid pairs 
                    violate.add(i)
                    violate.add(j)
                    
    #loop over the results
    for (i,(prob,bbox,centroid)) in enumerate(results):
        #extract the bounding boxes and centroid coordinates,then initialize the color of annotation
        (startX,startY,endX,endY) = bbox
        (cX,cY) = centroid
        color = (0,255,0)
        
        #if the index pair exists within the violation set then update the color
        if i in violate:
            color=(0,0,255)
        
        #draw (1) a bounding box around the person and
        #(2) the centroid coordinates of the person
        cv2.rectangle(frame,(startX,startY),(endX,endY),color,2)
        cv2.circle(frame,(cX,cY),5,color,1)
    
    #display the total number of violations on the output frame
    text = "Social distance violations:{}".format(len(violate))
    cv2.putText(frame,text,(10,frame.shape[0]-25),cv2.FONT_HERSHEY_SIMPLEX,0.85,(0,0,255),3)
    
    #check to see if the output frame is to be displayed or not
    if display > 0:
        #show the output frame
        cv2.imshow("Frame",frame)
        key = cv2.waitKey(1) & 0xFF
        
        #if the key 'q' is pressed it will end the frame
        if key == ord('q'):
            break
    
    #if an output video file has been passed and the video writer has not been initialised then
    if output !="" and writer is None:
        #initializing our video writer
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        writer = cv2.VideoWriter(output,fourcc,25,(frame.shape[1],frame.shape[0]),True)
    #if the video writer is not none,write the frame to the output
    #video file
    if writer is not None:
        writer.write(frame)
        