In [1]:
import numpy as np
import cv2
import time
import os
import matplotlib.pyplot as plt
import pickle

from openvino.inference_engine import IENetwork, IECore, IEPlugin

In [2]:
from src.detector import Detector
from src.pose_estimator import HumanPoseEstimator
from src.track import Detection, Track
from src.utils import preprocess,  iou
from src.utils import draw_tracks

In [3]:
def run_demo(args):

    every_nth_frame = args.every_nth_frame
    rotate_frame = args.rotate_frame
    out_fps = args.out_fps
    sigma_iou= args.sigma_iou
    log = args.log
    in_video_path = args.in_video_path
    
    
    video_name = in_video_path.split('/')[-1].split('.')[0]

    exp_id = len(os.listdir('runs'))
    exp_dir = os.path.join('runs', 'exp_'+str(exp_id))
    os.mkdir(exp_dir)
    violation_dir = os.path.join(exp_dir, 'violations')
    os.mkdir(violation_dir)
    
    cap = cv2.VideoCapture(in_video_path)
    ret, frame = cap.read()

    # output video
    out = cv2.VideoWriter(os.path.join(exp_dir, video_name+'.avi'),cv2.VideoWriter_fourcc('M','J','P','G'), 
                          out_fps, (frame.shape[1],frame.shape[0] ))

    total_time = 0
    detector_time = 0
    pose_time = 0
    classification_time = 0
    tracking_time = 0
    operation_count = 0


    tracks_active = []


    t_id = 1
    frame_i = 0
    while(cap.isOpened()):
        # read a frame from video
        ret, frame = cap.read() 

        frame_i +=1

        # if valid frame read
        if ret == True:

            # sample frames
            if frame_i % every_nth_frame == 0:

                operation_count += 1
                start_time = time.time()

                if log:
                    print("====== Frame id : ", str(frame_i))

                if rotate_frame:
                    frame = cv2.rotate(frame, 0)

                s = time.time()
                # get key points and boxes
                boxes = detector_person.detect(frame)
                detector_time += time.time() - s

                s = time.time()
                key_points = [single_human_pose_estimator.estimate(frame, bbox) for bbox in boxes]

                pose_time += time.time() - s

                if log:
                    print("Detections : ", str(len(key_points)))

                s = time.time()
                # predict state and get detections
                detections_frame = []
                for box,k_p in zip(boxes, key_points):
                    features = preprocess(k_p)
                    state = classifier.predict(features)
                    det = Detection(box=box, state=state, frame=frame_i)
                    detections_frame.append(det)

                classification_time += time.time() - s

                dets = detections_frame

                s = time.time()

                updated_tracks = []
                for track in tracks_active:

                    if len(dets) > 0:

                        best_match = max(dets, key=lambda x: iou(track.position, x.box))
                        if iou(track.position, best_match.box) >= sigma_iou:
                            track.update(best_match.box, best_match.state,frame_i, frame)


                            updated_tracks.append(track)

                            # remove from best matching detection from detections
                            del dets[dets.index(best_match)]

                    # if track was not updated
                    if len(updated_tracks) == 0 or track is not updated_tracks[-1]:
                        # finish track when the conditions are met
                        track.miss_track(frame_i)

                        if track.miss_count < 4:
                            updated_tracks.append(track)



                # create new tracks
                new_tracks = []

                for det in dets:
                    new_tracks.append(Track(det.box, det.state, det.frame, frame_i, t_id, violation_dir))
                    t_id += 1


                tracks_active = updated_tracks + new_tracks

                tracking_time += time.time() - s

                if log:
                    print("Active Tracks : ", str(len(tracks_active)))


                frame = draw_tracks(tracks_active, frame)


                out.write(frame) 

                total_time += time.time() - start_time


        else:
            break


    cap.release()
    
    print("======= FPS Report =======")
    print("Total fps: " +str(float(operation_count)/total_time ))
    print("Detector fps: " +str(float(operation_count)/detector_time ))
    print("Pose estimation fps: " +str(float(operation_count)/pose_time ))
    print("Pose classification fps: " +str(float(operation_count)/classification_time ))
    print("Person Tracker fps:" +str(float(operation_count)/tracking_time ))

In [4]:
# model IR files
model_od = 'models/mobilenet_ssd/FP16/mobilenet-ssd.xml'
mode_pose = 'models/pose_estimation/FP16/single-human-pose-estimation-0001.xml'
cls_file = 'models/pose_classifier/classifier.sav'

ie = IECore()
detector_person = Detector(ie, path_to_model_xml=model_od,
                          device='CPU',
                          label_class=15)

single_human_pose_estimator = HumanPoseEstimator(ie, path_to_model_xml=mode_pose,
                                                  device='CPU')


classifier = pickle.load(open(cls_file, 'rb'))

In [5]:
class arguments:

    # process every nth frame
    every_nth_frame = 2

    # rotate video to make humans upright
    rotate_frame = True

    # result annotation video fps
    out_fps = 3

    # iou threshhold for person tracking
    sigma_iou=0.5

    log = True

    in_video_path = 'data/videos/cusp_jump.mp4'
    # in_video_path = 'custom_data/cusp_walk.mp4'
    # in_video_path = 'custom_data/Tanya.mp4'
    # in_video_path = 'custom_data/walk.mp4'
    # in_video_path = 'custom_data/jump.mp4'
    # in_video_path = 'custom_data/MTA.mp4'


In [6]:
args = arguments()
run_demo(args)

Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Active Tracks :  1
Detections :  1
Acti