## Safety vest detection via person detection + color detection

In [1]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import cv2
import time

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

config = tf.ConfigProto()
config.gpu_options.allow_growth=True
 
class ObjectDetector:
    def __init__(self):
        # Models can bee found here: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
        MODEL_NAME = 'ssd_inception_v2_coco_2017_11_17'
#         MODEL_NAME = 'faster_rcnn_resnet101_lowproposals_coco_2018_01_28'
        # MODEL_NAME = 'faster_rcnn_nas_lowproposals_coco_2018_01_28'
        MODEL_FILE = MODEL_NAME + '.tar.gz'
        DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

        PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'

        PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')

        NUM_CLASSES = 90

        # Download Model
        if not os.path.isfile(PATH_TO_CKPT):
            opener = urllib.request.URLopener()
            opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
            tar_file = tarfile.open(MODEL_FILE)
            for file in tar_file.getmembers():
                file_name = os.path.basename(file.name)
                if 'frozen_inference_graph.pb' in file_name:
                    tar_file.extract(file, os.getcwd())

        self.detection_graph = tf.Graph()
        with self.detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        self.threshold = 0.7

        self.sess = tf.Session(config=config, graph=self.detection_graph)
        

    def findperson(self, image_np):
        with self.sess.graph.as_default():
            image_np_expanded = np.expand_dims(image_np, axis=0)

            image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0')
            boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0')
            scores = self.detection_graph.get_tensor_by_name('detection_scores:0')
            classes = self.detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')
            
            start_time = time.time()
            (boxes, scores, classes, num_detections) = self.sess.run(
                [boxes, scores, classes, num_detections],
                feed_dict={image_tensor: image_np_expanded})
            print("tf obj_det predict time: ", time.time() - start_time)

            personboxes = []
            for pred_idx in range(scores.shape[1]):
                if scores[0, pred_idx] > self.threshold:
                    if classes[0, pred_idx] == 1:
                        personboxes.append([classes[0, pred_idx], scores[0, pred_idx], boxes[0, pred_idx, 1], boxes[0, pred_idx, 0], boxes[0, pred_idx, 3], boxes[0, pred_idx, 2]])
                else:
                    break

            return personboxes



  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
import cv2
import numpy as np
import sys, os
import time
 

#function takes image of detected person and using color detection finds safety vest
def detect_vest(cv2Image):      #cv2Image - image of detected person
    #convertation to hsv palette, format - (Hue, Saturation, Value)
    hsv = cv2.cvtColor(cv2Image, cv2.COLOR_BGR2HSV) 
    lower_range = np.array([0, int(35/100*255), int(25/100*255)]) # lower range of safety vest color
    upper_range = np.array([80, 255, 255]) # upper range of safety vest color
    
    mask = cv2.inRange(hsv, lower_range, upper_range) #filter the area of needed color
    
    image_width = hsv.shape[1]
    image_height = hsv.shape[0]
    
    kernel_e = np.ones((image_height//20,image_height//20),np.uint8)  
    kernel_d = np.ones((image_height//10,image_height//10),np.uint8)  
    # eroding an image to remove small areas
    erosion = cv2.erode(mask,kernel_e,iterations = 1)
    #dilation will renew the size of areas, that left after erosion
    dilation = cv2.dilate(erosion,kernel_d,iterations = 1)
    detected=0
    x,y,w,h = 0,0,0,0
    if len(dilation[dilation>0])!=0:
        detected=1
        ret, thresh = cv2.threshold(dilation, 127, 255, 0)
        contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        areas = [cv2.contourArea(c) for c in contours]
        max_index = np.argmax(areas)
        cnt=contours[max_index]
        area = cv2.contourArea(cnt)
        x,y,w,h = cv2.boundingRect(cnt)
    return x,y,x+w,y+h, detected
    
    

obj_detector = ObjectDetector()

# capture video
if(len(sys.argv) > 1):
    video_path = sys.argv[1]
video_path = "./1.mp4"


try:
    video_capture = cv2.VideoCapture(video_path)
except:
    video_capture = cv2.VideoCapture(0)

 
ret, frame = video_capture.read()
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out_cap = cv2.VideoWriter(video_path[:-4] + "_res_nobb.avi", fourcc, 20.0, (int(frame.shape[1] ), int(frame.shape[0])))

person_boxes = []               # for storing predictions from object detector
 
xmin_v, ymin_v, xmax_v, ymax_v = 0,0,0,0
xmin, ymin, xmax, ymax = 0,0,0,0
detected = 0
shot_time_start = time.time()
shot_time = time.time()

while True:
    
    shot_time_start = time.time()

    ret, frame = video_capture.read()
    if not ret:
        break

    person_boxes = obj_detector.findperson(frame)
    for box in person_boxes:
        xmin = int(box[2] * frame.shape[1])
        ymin = int(box[3] * frame.shape[0])
        xmax = int(box[4] * frame.shape[1])
        ymax = int(box[5] * frame.shape[0])

        cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0))
        #detect vest
        xmin_v, ymin_v, xmax_v, ymax_v, detected = detect_vest(frame[ymin:ymax,xmin:xmax])
        #if vest detected - then draw it
        if detected:
            xmin_v, ymin_v, xmax_v, ymax_v = xmin_v+xmin, ymin_v+ymin, xmax_v+xmin, ymax_v+ymin
            cv2.rectangle(frame, (int(xmin_v), int(ymin_v)), (int(xmax_v), int(ymax_v)), (255, 255, 0))


    shot_time = time.time() - shot_time_start
    fps = int(1/shot_time)
    cv2.putText(frame,"FPS: "+str(fps), (30,50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,255))
    out_cap.write(frame)
    cv2.imshow('Video', frame)
    print("frame processing time: ", time.time() - shot_time_start)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

out_cap.release()
video_capture.release()
cv2.destroyAllWindows()


tf obj_det predict time:  3.617732286453247
frame processing time:  3.634870767593384
tf obj_det predict time:  0.06602311134338379
frame processing time:  0.07580089569091797
tf obj_det predict time:  0.051958322525024414
frame processing time:  0.06017875671386719
tf obj_det predict time:  0.052850961685180664
frame processing time:  0.05946850776672363
tf obj_det predict time:  0.04598689079284668
frame processing time:  0.051323890686035156
tf obj_det predict time:  0.05102682113647461
frame processing time:  0.05657339096069336
tf obj_det predict time:  0.04952287673950195
frame processing time:  0.05483651161193848
tf obj_det predict time:  0.053545475006103516
frame processing time:  0.05910944938659668
tf obj_det predict time:  0.05388951301574707
frame processing time:  0.05917096138000488
tf obj_det predict time:  0.0531158447265625
frame processing time:  0.058679819107055664
tf obj_det predict time:  0.049602508544921875
frame processing time:  0.05502486228942871
tf obj_de