In [None]:
#Download the yolov3 weights
!wget https://pjreddie.com/media/files/yolov3.weights

In [None]:
!wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg

In [None]:
!wget https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names

### Saved sample test vedio file in my google drive. Mount the google drive to access that file

In [5]:
# mount my google drive to access mp4 vedio file stored in my google drive
from google.colab import drive
drive.mount("/content/mydrive")

Mounted at /content/mydrive


In [6]:
testmp4="/content/mydrive/MyDrive/Yolo_ObjectDetection/pedestrians.mp4"

In [7]:
# Setting minimum value for the confidence score of boundary box
#setting minimum threshold value for non-max suppression to remove the overlapping bounding boxes
MIN_CONF = 0.3
NMS_THRESH = 0.3

# define the minimum safe distance (in pixels) that two people can be
# from each other
MIN_DISTANCE = 50

### Defining people detection function using yolov3

In [8]:
import numpy as np
import cv2

In [9]:
"""People Detection function to get coordinates of bounding boxes resizing  to cover the persons. 
It returns the list of list of confidence scores for the class,left-top and bottom-right x y coordinates of 
the bounding box and centeroid points for a bounding box."""

def detect_people(frame,net,ln,personIdx):
  H,W=frame.shape[:2]
  results=[]

  blob=cv2.dnn.blobFromImage(frame,1/255.0,(416,416),swapRB=True,crop=False)
  net.setInput(blob)
  layerOutputs = net.forward(ln)

  boxes=[]
  centroids=[]
  confidences=[]

  for output in layerOutputs:
    for detection in output:
      scores=detection[5:]
      classId=np.argmax(scores)
      confidence=scores[classId]

      if classId==personIdx and confidence >MIN_CONF:
        # defining the bounding box to fit the image
        box=detection[0:4]*np.array([W,H,W,H])
        center_x,center_y,width,height=box.astype("int")

        leftx=int(center_x-(width/2))
        lefty=int(center_y-(height/2))

        boxes.append([leftx,lefty,int(width),int(height)])
        centroids.append((center_x,center_y))
        confidences.append(float(confidence))


    

    # apply non-maxima suppression to suppress weak, overlapping
	# bounding boxes
  idxs=cv2.dnn.NMSBoxes(boxes,confidences,MIN_CONF,NMS_THRESH)

  if len(idxs)>0:
    for i in idxs.flatten():
      x,y=(boxes[i][0],boxes[i][1])
      w,h=(boxes[i][2],boxes[i][3])

      r=(confidences[i],(x,y,x+w,y+h), centroids[i])
      results.append(r)

  return results





In [10]:
from google.colab.patches import cv2_imshow
from scipy.spatial import distance as dist
import numpy as np
import argparse
import imutils
import cv2
import os

In [11]:
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", type=str, default="",
	help="path to (optional) input video file")

ap.add_argument("-o", "--output", type=str, default="",
	help="path to (optional) output video file")

ap.add_argument("-d", "--display", type=int, default=1,
	help="whether or not output frame should be displayed")



_StoreAction(option_strings=['-d', '--display'], dest='display', nargs=None, const=None, default=1, type=<class 'int'>, choices=None, help='whether or not output frame should be displayed', metavar=None)

In [12]:
args = vars(ap.parse_args(["--input","/content/mydrive/MyDrive/Yolo_ObjectDetection/pedestrians.mp4","--output","my_output.avi","--display","1"]))

In [18]:
# load the COCO class labels our YOLO model was trained on
labelsPath = "/content/coco.names"
LABELS = open(labelsPath).read().strip().split("\n")

In [20]:
print(LABELS)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [22]:
## loading pretrained yolov3 model
net=cv2.dnn.readNetFromDarknet("/content/yolov3.cfg","/content/yolov3.weights")

In [23]:
# determine only the *output* layer names that we need from YOLO
ln = net.getLayerNames()

In [25]:
print(net.getUnconnectedOutLayers())

[[200]
 [227]
 [254]]


In [26]:
## detection layers name
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

In [27]:
ln

['yolo_82', 'yolo_94', 'yolo_106']

In [28]:
# initialize the video stream and pointer to output video file
print("[INFO] accessing video stream...")
vs = cv2.VideoCapture(args["input"] if args["input"] else 0)
writer = None

[INFO] accessing video stream...


In [None]:
# loop over the frames from the video stream
while True:
	# read the next frame from the file
	(grabbed, frame) = vs.read()

	# if the frame was not grabbed, then we have reached the end
	# of the stream
	if not grabbed:
		break

	# resize the frame and then detect people (and only people) in it
	frame = imutils.resize(frame, width=700)
	results = detect_people(frame, net, ln,personIdx=LABELS.index("person"))

	# initialize the set of indexes that violate the minimum social
	# distance
	violate = set()

	# ensure there are *at least* two people detections (required in
	# order to compute our pairwise distance maps)
	if len(results) >= 2:
		# extract all centroids from the results and compute the
		# Euclidean distances between all pairs of the centroids
		centroids = np.array([r[2] for r in results])
		D = dist.cdist(centroids, centroids, metric="euclidean")

		# loop over the upper triangular of the distance matrix
		for i in range(0, D.shape[0]):
			for j in range(i + 1, D.shape[1]):
				# check to see if the distance between any two
				# centroid pairs is less than the configured number
				# of pixels
				if D[i, j] < MIN_DISTANCE:
					# update our violation set with the indexes of
					# the centroid pairs
					violate.add(i)
					violate.add(j)

	# loop over the results
	for (i, (prob, bbox, centroid)) in enumerate(results):
		# extract the bounding box and centroid coordinates, then
		# initialize the color of the annotation
		(startX, startY, endX, endY) = bbox
		(cX, cY) = centroid
		color = (0, 255, 0)

		# if the index pair exists within the violation set, then
		# update the color
		if i in violate:
			color = (0, 0, 255)

		# draw (1) a bounding box around the person and (2) the
		# centroid coordinates of the person,
		cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
		cv2.circle(frame, (cX, cY), 2, color, 1)

	# draw the total number of social distancing violations on the
	# output frame
	text = "Social Distancing Violations: {}".format(len(violate))
	cv2.putText(frame, text, (10, frame.shape[0] - 25),
		cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 0, 255), 3)

	# check to see if the output frame should be displayed to our
	# screen
	if args["display"] > 0:
		# show the output frame
		cv2_imshow(frame)
		key = cv2.waitKey(1) & 0xFF

		# if the `q` key was pressed, break from the loop
		if key == ord("q"):
			break

	# if an output video file path has been supplied and the video
	# writer has not been initialized, do so now
	if args["output"] != "" and writer is None:
		# initialize our video writer
		fourcc = cv2.VideoWriter_fourcc(*"MJPG")
		writer = cv2.VideoWriter(args["output"], fourcc, 25,
			(frame.shape[1], frame.shape[0]), True)

	# if the video writer is not None, write the frame to the output
	# video file
	if writer is not None:
		writer.write(frame)

Sources:

https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-2/

https://github.com/abd-shoumik/Social-distance-detection