## INFO 634- DATA MINING TERM PROJECT
#### GROUP -5

#### Madhu Bandru (mb4236)
#### Vuthej (vv334)
#### Likhil Rachuri (lkr46)

In [None]:
#Installing required packages. TensorFlow, pycocotools and also Compile protobufs and install the object_detection package
#https://github.com/cocodataset/cocoapi/issues/169#issuecomment-462528628
!pip install -U --pre tensorflow=="2.*"
!pip install pycocotools


In [1]:
#importing required packages in the below code.

import cv2 
import imutils 
import os
import pathlib
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from IPython.display import display
import numpy
from numpy import linalg as la

from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

# patch tf1 into `utils.ops`
utils_ops.tf = tf.compat.v1
# Patch the location of gfile
tf.gfile = tf.io.gfile



#We can either download these models from tensorflow or clone them from the git.
if "models" in pathlib.Path.cwd().parts:
    while "models" in pathlib.Path.cwd().parts:
        os.chdir('..')
elif not pathlib.Path('models').exists():
    !git clone --depth 1 https://github.com/tensorflow/models



In [2]:
#Defining the function to load the model, input: model name, Output: model

def load_model(model_selection):
    url = 'http://download.tensorflow.org/models/object_detection/'
    file_name = model_selection + '.tar.gz'
    model_dir = tf.keras.utils.get_file(fname=model_selection, origin=url + file_name, untar=True)
    model_dir = pathlib.Path(model_dir)/"saved_model"
    model = tf.saved_model.load(str(model_dir))
    model = model.signatures['serving_default']
    return model

In [3]:
# for the training the labels are required to download and initialize. set path for labels "mscoco_label_map.pbtxt" and creating the index accordingly
## Confirming the "person" is at index 1.

LABELS_PATH = 'models/research/object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(LABELS_PATH, use_display_name=True)
category_index[1]

{'id': 1, 'name': 'person'}

In [4]:
# initializing the model. Loaded the coco model named 'ssd_mobilenet_v1_coco_2017_11_17'. This dataset is obtained from TensorFlow.

model_name = 'ssd_mobilenet_v1_coco_2017_11_17'
detection_model = load_model(model_name)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [5]:
# Checking the inputs of the model laoded in detection_model
detection_model.inputs

[<tf.Tensor 'image_tensor:0' shape=(None, None, None, 3) dtype=uint8>]

In [6]:
# Checking for the output produced by the detection_model
# Dictionary with four elements are produced as output. detection_scores is of float datatype, defines the confidence score in percentage of the object identified
# detection_classes is of float, defines objects of what classes are identified in the image
# num_detections: defines the number of objects identified.
#detection_boxes: Location of the objects identified in the image are given here with co-ordinates of 4 point array/list.
detection_model.output_dtypes

{'detection_scores': tf.float32,
 'detection_classes': tf.float32,
 'num_detections': tf.float32,
 'detection_boxes': tf.float32}

In [7]:
## important to know the output produced by the selected model.
#detection scores, detection classes, detection boxes are of default size in 100. detection boxes are each of size 4.
# num_detections are the exact number of objects identified. So, while we use the output of the model, we need to filter out the scores, classes
# boxes according to the number in num_detections.
detection_model.output_shapes

{'detection_scores': TensorShape([None, 100]),
 'detection_classes': TensorShape([None, 100]),
 'num_detections': TensorShape([None]),
 'detection_boxes': TensorShape([None, 100, 4])}

In [8]:
# Below function is to identify the objects in an image using the TensorFlow DNN model loaded above.
# Input: initialized model variable name, image, width of image, height of the image, threshold.
## threshold is for the scores obtained from model output. objects passing the minimum threshold value are only considered further.
### Threshold is set default to 0.35

def obj_detection(model, image ,width,height,threshold=0.35):
    #converting the image to numpy array
    image = np.asarray(image)
    # Converting the numpy array to tensor.
    input_tensor = tf.convert_to_tensor(image)
    # Adding an axis to the tensor as model would be expecting n images. so adding empty dimension.
    input_tensor = input_tensor[tf.newaxis,...]

    # Pass the tensor obtained above to the model for output.
    output_dict = model(input_tensor)
    # collecting the output obtained from the model into boxes, their scores and classes.  
    boxes = np.squeeze(output_dict['detection_boxes'])
    scores = np.squeeze(output_dict['detection_scores'])
    # converting as integer as labels would be only in integers.
    classes = np.squeeze(output_dict['detection_classes']).astype(np.int32)
    # Filtering the objects identified to get the persons label.
    ## label "person" is with class 1.
    ## collecting the indices who has class as 1.
    indices = np.argwhere(classes == 1)
    # filtering out the boxes,scores and classes for only those indices obtained above.
    boxes = np.squeeze(boxes[indices], axis=1)
    scores = np.squeeze(scores[indices], axis=1)
    classes = np.squeeze(classes[indices], axis=1)
    #setting a min threshold to 0.35 from the function parameters.
    min_score_thresh = threshold
    ## filtering out those objects obtained form model whose scores passed the min threshold value
    bboxes = boxes[scores > min_score_thresh]
    #obtained box values are in normalized form, so we need to multiply them with the image width and height to get the exact values.
    im_width, im_height = (width , height)
    final_box = []
    #Converting Standardised values to normal values
    for box in bboxes:
        xmin, ymin, xmax, ymax = box
        ## re-generating the boxes with regular format from normalized form.
        final_box.append([xmin * im_height, ymin *im_width , xmax * im_height, ymax *im_width ])
    # returning the final_box with coordinates of boxes.
    return final_box

In [9]:
# Core method to calculate the social distances.
## input: cam_input_port is set default to zero, it is the port for the web camera in case of live recording.
## path: Takes in the path for the video in which social distance is to be calculated
## model: This function can be performed by two models. model 1: Using Hog, model 2: Using TF DNN model.
def social_distance_monitoring(cam_input_port=0, path="", model=2):
    if path == "":
        cv2.startWindowThread()
        # opening the webcam for the live video input
        cap = cv2.VideoCapture(cam_input_port)
        # output is also written into a file. initializing the output file for live video
        save_video = cv2.VideoWriter('Live_video_output.avi',cv2.VideoWriter_fourcc(*'MJPG'),15.,(640,480))
    else:
        cv2.startWindowThread()
        # providing the path to cv2(opencv) to get the video data.
        cap = cv2.VideoCapture(path)
        width = cap.get(cv2.CAP_PROP_FRAME_WIDTH )
        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT )
        # output is also written into a file. initializing the output file for recorded video
        save_video = cv2.VideoWriter('Recorded_video_output.avi',cv2.VideoWriter_fourcc(*'MJPG'),15.,(int(width),int(height)))
    ## initializing the HOGDescriptor for hog based model.
    if model == 1:
        feature_descriptor = cv2.HOGDescriptor()
        ## passing through a SVM detector.
        feature_descriptor.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
    #variables to count the total persons identified and total violations predicted.
    human_count_in_video=0
    total_violator_count=0
    ## using an while loop to trigger the frames throughout the video.
    while cap.isOpened():
        # Reading the Video Stream input as frames, video is trimmed into frmaes and each frame is processed below.
        ret, image = cap.read()
        ## font styles and sizes and color for the text to write on the video
        font = cv2.FONT_HERSHEY_SIMPLEX 
        fontScale = 1
        color = (0, 255, 255)
        ## Text position adjusted on the output video
#         if ret:
#             text_position_x = image.shape[1] - 550
#         else:
#             break
        text_position_y=20
        text_position_y2=50
        ## thickness of the letters
        thickness = 2
        
        ## Condition to check if the image is returned.
        if ret: 
            text_position_x = image.shape[1] - 550
            ## deciding which model to choose, here this block is for TensorFlow DNN model.
            if model == 2:
                ## input image is resized according to our customization.
                image = imutils.resize(image,width=min(4000, image.shape[1]),height=min(4000, image.shape[0])) 
                # Detecting all the regions in the Image that has a pedestrians inside it 
                width=min(4000, image.shape[1])
                height=min(4000, image.shape[0])
                ## Image obtained from frame is passed into the model to get the locations of bounding boxes and also passing the required 
                ## parameters for the function, threshold for scores obtained from model is given as 0.35
                regions1=obj_detection(detection_model,image ,width,height,0.35)
                ## converting the bounding box coordinates to near by integers as we would get output in float and below code requires it in int.
                regions=[]
                for i in regions1:
                    regions.append([int(j) for j in i])
                ## count of 'person' objects detected in each frame is stored into variable named human_count_in_frame
                human_count_in_frame=len(regions)
                ## initializing the violator and non-violator counts to zero for the frame.
                frame_violator_count=0
                frame_non_violator_count=0

                # Calculating the distance between two objects. 
                for person in regions:
                    ## calculating the person's center. So, that distance between two points can be calculated
                    person_center = numpy.array([(person[1] + int((person[3] - person[1])/2), person[0] + int((person[2] - person[0])/2))])
                    person_violated = False
                    ## checking the distance from this point to all the other points
                    for other_person in regions:
                        ##ignoring the self check
                        if list(person) != list(other_person):
                            ## calculating the center of the other object identified
                            other_person_center = numpy.array([(other_person[1] + int((other_person[3] - other_person[1])/2), other_person[0] + int((other_person[2] - other_person[0])/2))])
                            ## calculating the distance by "EUCLIDEAN DISTANCE".
                            ### checking whether the distance is following the minimum threshold
                            ## threshold is 'person[3]' gives us the height which is assumed to be 6 feet and calculating the social distancing based on that.
                            if (la.norm(person_center - other_person_center)) < (person[2] - person[0]):
                                ## setting the violated flag to true for the object as it doesnt maintain distance with threshold given
                                person_violated = True
                                break
                    ## if person/object violated the distance, plotting the red colored rectangular boxes around the object.
                    if person_violated:
                        ## plotting the rectangle around the object
                        cv2.rectangle(image, (person[1], person[0]), (person[3], person[2]), (0, 0, 255), 2)
                        ## finding the center and plotting the centers
                        cv2.circle(image, (person[1] + int((person[3] - person[1])/2), person[0] + int((person[2] - person[0])/2)), 1, (0, 0, 255), 10)
                        ## incrementing the vioaltors count as 'person_violated' flag is triggered
                        frame_violator_count+=1
                    ## case if social distance is followed
                    else:
                        ## plotting the green rectangles around the object
                        cv2.rectangle(image, (person[1], person[0]), (person[3], person[2]), (0,255, 0), 2)
                        ## plotting the centers of the rectangle on the image frame
                        cv2.circle(image, (person[1] + int((person[3] - person[1])/2), person[0] + int((person[2] - person[0])/2)), 1, (0,255, 0), 10)
                        ## non-violator count is increased.
                        frame_non_violator_count+=1
                ## appending the human objects identified in the frame to the total number calculated throughout the video.
                human_count_in_video+=human_count_in_frame
                total_violator_count+=frame_violator_count
                ## setting the text for violator count in the frame.
                text1="Violaters in the Frame: %d" %frame_violator_count
                ## to avoid divide by zero error if no object is identified in the first frame. human_count_in_video is set to 1, if no objects are identified.
                if human_count_in_video==0:
                    human_count_in_video=1
                ## setting the text to print the violations percentage on the video.
                text2="Overall violations percentage: %.2f" %(total_violator_count/human_count_in_video)
                ## printing the text for violators count and percentage on each frame using the opencv
                cv2.putText(image, text1, (text_position_x,text_position_y), font, fontScale,color, thickness, cv2.LINE_AA, False)
                cv2.putText(image, text2, (text_position_x,text_position_y2), font, fontScale,color, thickness, cv2.LINE_AA, False)
                ## writing the each frame into the output video.
                save_video.write(image)
                # Showing the output Image 
                cv2.imshow("Image", image) 
                ## setting the waitkey to pull back if struck.
                if cv2.waitKey(25) & 0xFF == ord('q'): 
                    break
            ## if the model is hog model from opencv. This is based on intensities.
            elif model == 1:
                ## image is resized accordingly.
                image = imutils.resize(image, width = min(4000, image.shape[1])) 
                ## each frame from the video is taken as image and fed to the feature descriptor to identify the objects and bounding boxes
                (regions, _) = feature_descriptor.detectMultiScale(image, winStride=(4, 4), padding=(4, 4), scale=1.05) 
                ## total objects identified in the frame are the length of the output returned from the model.
                human_count_in_frame=len(regions)
                ## counts for violator and non-violator are set to zero.
                frame_violator_count=0
                frame_non_violator_count=0
                # Calculating the social distance
                for person in regions:
                    ## calculating the centre of object identified
                    ## output coordinates obtained from hog and TF DNN model are different so calculations are different.
                    person_center = numpy.array([int(person[0] + (person[2]/2)), int(person[1] + (person[3]/2))])
                    ## initializing the flag to False
                    person_violated = False
                    for other_person in regions:
                        if list(person) != list(other_person):
                            ## calculating the center for other objects
                            other_person_center = numpy.array([int(other_person[0] + (other_person[2]/2)), int(other_person[1] + (other_person[3]/2))])
                            ## calculating the distance between the objects and checking whether the distance is less than the threshold
                            ## threshold is set to the height of the box assuming it to be 6 feet.
                            ## person[3] is height of the box.
                            if (la.norm(person_center - other_person_center)) < person[3]:
                                ## triggering the flag to TRUE
                                person_violated = True
                                break
                    ## visualizations if person has violated the social distance
                    if person_violated:
                        ## plotting the rectangle  box around the object with red color as person is violating the social distance
                        cv2.rectangle(image, (person[0], person[1]), (person[0] + person[2], person[1] + person[3]), (0, 0, 255), 2)
                        ## plotting the center of the person
                        cv2.circle(image, (int(person[0] + (person[2]/2)), int(person[1] + (person[3]/2))), 1, (0, 0, 255), 10)
                        ## increasing the count of violators
                        frame_violator_count+=1
                    ## visualizations for the objects identified and observing the social distance between them.
                    else:
                        cv2.rectangle(image, (person[0], person[1]), (person[0] + person[2], person[1] + person[3]), (0, 255, 0), 2)
                        cv2.circle(image, (int(person[0] + (person[2]/2)), int(person[1] + (person[3]/2))), 1, (0, 255, 0), 10)
                        ## incrementing the non-violators count
                        frame_non_violator_count+=1
                ##appending the counts obtained in this frame to the total obtained over the video
                human_count_in_video+=human_count_in_frame
                total_violator_count+=frame_violator_count
                ## setting the text for the violator count
                text1="Violaters in the Frame: %d" %frame_violator_count
                ## to avoid zero division error, human_count_in_video is set to 1,
                if human_count_in_video==0:
                    human_count_in_video=1
                ## setting text for violators percentage value
                text2="Overall violations percentage: %.2f" %(total_violator_count/human_count_in_video)
                ## printing the text on to the image frame
                cv2.putText(image, text1, (text_position_x,text_position_y), font, fontScale,color, thickness, cv2.LINE_AA, False)
                cv2.putText(image, text2, (text_position_x,text_position_y2), font, fontScale,color, thickness, cv2.LINE_AA, False)
                ## writing the frame into the output video
                save_video.write(image)
                ## displaying the image frame
                cv2.imshow("Image", image) 
                ## setting the wait key to pull back if struck
                if cv2.waitKey(25) & 0xFF == ord('q'):
                    break
        else: 
            break
    # releasing the capture, when all the video is completed.
    cap.release()
    # save_video.release, finishes writing onto video and releases it.
    save_video.release()
    ## closes the windows opened.
    cv2.destroyAllWindows()

In [10]:
import cv2 
import imutils 
import numpy
## cam_input_port is set to zero, for webcam
cam_input_port=0
## path to be provided for the input video
video_input_path="vid_short.mp4"
##selecting the model
model={"hog_model": 1, "tf": 2}
## user selection for the model
user_input=input("Please select a model (hog_model or tf), ")
## calling the function to calculate the social distancing
social_distance_monitoring(cam_input_port, path=video_input_path, model= model[user_input])

Please select a model (hog_model or tf),  hog_model
