In [1]:
# Import modules for the project.
import os
import cv2
import glob
import numpy as np
import math

In [2]:
# Define a tracker using the Kalman filter.
class Track:
    
    def __init__(self, tid, box, deleteCounter, visibleFrames):
        self._id = tid # ID for tracker
        self._box = box # Bounding box
        self._prevPos = []
        self._deleteCounter = deleteCounter # Variable to keep track of whether to delete or not
        self._visibleFrames = visibleFrames # Variable to keep track of the visible frames
        self._kalmanFilter = self.initialiseKalman() # Initialise a Kalman filter
        self._show = False # Don't show the box unless it's been visible for more than a specified number of frames
      
    def getPrevPos(self):
        return self._prevPos
    
    def appendPrevPos(self, coord):
        self._prevPos.append(coord)
        if(len(self._prevPos) > 15):
            self._prevPos.pop(0)
    
    def getID(self):
        return self._id
    
    def setID(self, num):
        self._id = num
        
    def getShow(self):
        return self._show
        
    def getBox(self):
        return self._box
        
    def setBox(self, box):
        self._box = box
        
    def getDeleteCounter(self):
        return self._deleteCounter
        
    def setDeleteCounter(self, num):
        self._deleteCounter = num
        
    def getVisibleFrames(self):
        return self._visibleFrames
        
    def setVisibleFrames(self, num):
        self._visibleFrames = num
        if(self._visibleFrames >= 10):
            self._show = True
        
    def getKalmanFilter(self):
        return self._kalmanFilter
    
    # Initialise the Kalman filter using matrices.
    def initialiseKalman(self):
        
        # Use discrete time unit of 1 between each frame.
        KF = cv2.KalmanFilter(6,2)
        KF.measurementMatrix = np.array([[1, 0, 0, 0, 0, 0],\
                                         [0, 1, 0, 0, 0, 0]], np.float32)
        KF.transitionMatrix = np.array([[1, 0, 1, 0, 0.5, 0],\
                                        [0, 1, 0, 1, 0, 0.5],\
                                        [0, 0, 1, 0, 1, 0],\
                                        [0, 0, 0, 1, 0, 1],\
                                        [0, 0, 0, 0, 1, 0],\
                                        [0, 0, 0, 0, 0, 1],], np.float32)
        # Q matrix.
        KF.processNoiseCov = np.array([[0.05, 0, 0, 0, 0, 0],\
                                       [0, 0.05, 0, 0, 0, 0],\
                                       [0, 0, 0.075, 0, 0, 0],\
                                       [0, 0, 0, 0.075, 0, 0],\
                                       [0, 0, 0, 0, 0.0015, 0],\
                                       [0, 0, 0, 0, 0, 0.0015]], np.float32)
        
        return KF

In [3]:
# Predicts the next track.
def predictTracks(tracks):
    for track in tracks:
        
        # Get the current box for the tracker.
        x, y, w, h = track.getBox()
        
        # Update track location.
        track.appendPrevPos((int(x+w/2), int(y+h/2)))
        
        # Get the centroid of the current box.
        measurement = np.array([[np.float32(x+w/2)], [np.float32(y+h/2)]])
        
        # Predict the next track.
        predictedState = track.getKalmanFilter().predict()
        
        # Get the predictions and update the box.
        Xc, Yc, Vx, Vy, Ax, Ay = predictedState
        track.setBox([(Xc-w/2), (Yc-h/2), w, h])
                
    return tracks

In [4]:
# Match the detections.
def matchDetections(boxes, tracks):
    newDetections = []
    matchedDetections = []
    matchedTracks = []
    unmatchedTracks = []
    
    nDetections = len(boxes)
    nTracks = len(tracks)
    
    # Make a cost matrix of the boxes and tracks.
    cost = np.zeros((nTracks, nDetections))
    
    # Calculate the Euclidean distance for each entry in the matrix.
    i = 0
    j = 0
    for track in tracks:
        x, y, w, h = track.getBox()
        pred_Xc = x+w/2
        pred_Yc = y+h/2

        for box in boxes:
            x, y, w, h = box
            centre_x = x+w/2
            centre_y = y+h/2

            cost[i, j] = math.sqrt((pred_Xc - centre_x)**2 + (pred_Yc - centre_y)**2)
            j = j + 1
            
        j = 0
        i = i + 1
            
    threshold = 50
    
    # Match lowest cost detection to track.
    i = 0
    for track in tracks:
        minCost_t = np.amin(cost[i, :])
        d_index = np.where(cost[i,:] == minCost_t)  
        minCost_d = np.amin(cost[:, d_index])

        if((minCost_t < threshold) and (minCost_t == minCost_d)):
            matchedDetections.append(boxes[d_index])
            matchedTracks.append((track, boxes[d_index]))
            x,y,w,h = boxes[d_index][0]
            measurement = np.array([[np.float32(x+w/2)], [np.float32(y+h/2)]])
            track.getKalmanFilter().correct(measurement)
        else:
            unmatchedTracks.append(track)
            
        i = i + 1
    
    # Find all the newest detections.
    for box in boxes:
        if np.array(box[0]) not in  np.array(matchedDetections):
            newDetections.append(box)
    
    return unmatchedTracks, matchedTracks, newDetections

In [5]:
# Assign new centres for the tracks.
def updateMatchedTracks(matchedTracks):
    for track in matchedTracks:
        
        # Set the box.
        track[0].setBox(track[1][0])
        
        # Reset track deleteCount.
        track[0].setDeleteCounter(0)
        
        # Increment consecutive visible frames
        track[0].setVisibleFrames(track[0].getVisibleFrames() + 1)
                

In [6]:
# Update the unmatched tracks.
def updateUnmatchedTracks(unmatchedTracks):
    for track in unmatchedTracks:
        # Increment track deleteCounter.
        track.setDeleteCounter(track.getDeleteCounter() + 1)
        
        # Increment consecutive visible frames.
        track.setVisibleFrames(0)

In [7]:
# Delete old tracks if they have not appeared.
def deleteOldTracks(unmatchedTracks, tracks):
    for track in unmatchedTracks:
        if track.getDeleteCounter() == 15:
            tracks.remove(track)
    return tracks

In [8]:
# Create new tracks for new detections.
def createNewTracks(newDetections, tracks):
    for detection in newDetections:
        x, y, w, h = detection
        t = Track(-1, detection, 0, 5)
        t.getKalmanFilter().statePost = np.array([x+w/2, y+h/2, 0, 0, 0, 0], np.float32)
        t.getKalmanFilter().statePre = np.array([x+w/2, y+h/2, 0, 0, 0, 0], np.float32)
        tracks.append(t)
    return tracks

In [9]:
# Function to detect mouse click down and release for user input when drawing the rectangle.
def onClick(event, x, y, flags, param):
    global refPt
    if event == cv2.EVENT_LBUTTONDOWN:
        refPt.append((x,y))
    if event == cv2.EVENT_LBUTTONUP:
        refPt.append((x,y))

In [10]:
# Checks if the previous point and current point of the pedestrian is in the boundary.
def inBoundary(x, y):
    
    # Get the coordinates of the rectangle.
    global refPt
    x1 = refPt[0][0]
    y1 = refPt[0][1]
    x2 = refPt[1][0]
    y2 = refPt[1][1]   
    
    # If the x and y values shift inbetween boundaries, return True.
    if (((x-x1)*(x-x2) <= 0) and ((y-y1)*(y-y2) <= 0)):
        return True
    else:
        return False

In [11]:
# # Load all the frames and resize for a larger frame.
# frames = glob.glob("sequence/*.jpg")
# frames.sort()
# frame_list = [cv2.resize(cv2.imread(img),(1280, 960)) for img in frames]

In [12]:
# # Get the size of the frame to make a video writer.
# fps = 24
# h, w = 960, 1280
# video = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (w,h), True)

In [13]:
# # Write every frame into the video and release the video.
# for frame in frame_list:
#     video.write(frame)
# video.release()

In [14]:
# Create a HOG descriptor with a different nlevels parameter (less computational power).
win_size = (64, 128)
block_size =(16, 16)
block_stride = (8, 8)
cell_size = (8, 8)
nbins = 9
deriv_aperture = 1
win_sigma = 4.
histogram_norm_type = 0
l2_hys_threshold = 2.0000000000000001e-01
gamma_correction = 0
nlevels = 5

# Instantiate an instance of the descriptor.
hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins, deriv_aperture, win_sigma, histogram_norm_type,
    l2_hys_threshold, gamma_correction, nlevels)

# Set the SVM detector to OpenCV's people detector (window size of 64x128).
hog.setSVMDetector(hog.getDefaultPeopleDetector())

In [15]:
# Stores the trackers.
tracks = []

# Initialise tracker ID to 0.
track_id = 0

# Counter for people inside a box.
enterCount = 0
exitCount = 0

# Reference point on window for user mouse click.
refPt = []
firstFrame = True

# Get the recently made video and step through the frames.
cap = cv2.VideoCapture('output.mp4')
cv2.namedWindow('frame', cv2.WINDOW_NORMAL)
cv2.setMouseCallback('frame', onClick)

# Process the video frame by frame.
while(True):
    _, frame = cap.read()

    if frame is None:
        break
    
    gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    blur = cv2.GaussianBlur(gray, (5,5), 0)
        
    # Get the bounding box dimensions.
    boxes, weights = hog.detectMultiScale(gray, winStride=(4, 4),padding=(8, 8), scale=1.15)
    
    # Predict the tracks for the new frame.
    tracks = predictTracks(tracks)
    
    # Update all existing tracks inside the list of trackers.
    unmatchedTracks, matchedTracks, newDetections = matchDetections(boxes, tracks)
    updateMatchedTracks(matchedTracks)   
    updateUnmatchedTracks(unmatchedTracks)
    tracks = deleteOldTracks(unmatchedTracks, tracks)
    tracks = createNewTracks(newDetections, tracks)
    
    if not firstFrame:
        # Display the detected boxes in the colour picture.
        for track in tracks:
            if track.getShow() is True:

                # If classifier decides to show a new pedestrian, allocate an ID for them.
                if track.getID() == -1:
                    track.setID(track_id)
                    track_id = track_id + 1

                # Get the box of the current tracker and set different colours for the boxes:
                # Red: Detected but not counted as pedestrian.
                # Blue: Purely prediction.
                # Green: Pedestrian.
                x, y, w, h = track.getBox()

                if track in unmatchedTracks:
                    color = (255, 0, 0)
                else:
                    color = (0, 255, 0)

                for i in range(0,len(track.getPrevPos())-1):
                    prevPoint = track.getPrevPos()[i]
                    nextPoint = track.getPrevPos()[i+1]
                    cv2.line(frame, prevPoint, nextPoint, (0, 255, 0), 4, 8, 0)
                
                Xc = int(x+w/2)
                Yc = int(y+h/2)
                cv2.line(frame, nextPoint, (Xc, Yc), (0, 255, 0), 4, 8, 0)
                cv2.rectangle(frame, (int(x), int(y)), (int(x+w), int(y+h)), color, 2)
                cv2.putText(frame, str(track.getID()), (int(x), int(y-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
                
                # Keep track of people entering/leaving.
                if inBoundary(Xc, Yc) and not inBoundary(nextPoint[0], nextPoint[1]):
                    cv2.putText(frame, ("ENTERING"), (Xc, Yc), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)
                    enterCount += 1
                elif not inBoundary(Xc, Yc) and inBoundary(nextPoint[0], nextPoint[1]):
                    cv2.putText(frame, ("LEAVING"), (Xc, Yc), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)
                    exitCount += 1
                else:
                    pass
                    
            else:
                if(track.getDeleteCounter() <= 1):
                    x, y, w, h = track.getBox()
                    cv2.rectangle(frame, (int(x), int(y)), (int(x+w), int(y+h)), (0, 0, 255), 2)
                    cv2.putText(frame, 'Detected Point', (int(x), int(y-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
            
            # Print relevant information.
            cv2.putText(frame, f"Pedestrians:", (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)
            cv2.putText(frame, f"{track_id}", (350, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)
            cv2.putText(frame, f"People entering box:", (10, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)
            cv2.putText(frame, f"{enterCount}", (350, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)
            cv2.putText(frame, f"People leaving box:", (10, 85), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)
            cv2.putText(frame, f"{exitCount}", (350, 85), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2)
            
    else:        
        cv2.imshow('frame',frame)
        cv2.resizeWindow('frame', (960, 720))
        key = cv2.waitKey(1)
        if len(refPt) != 2: # q to quit.
            cv2.waitKey(-1)
        else:
            firstFrame = False
    
    # Draw the user defined rectangle on the screen.
    cv2.rectangle(frame, refPt[0], refPt[1], (255, 0, 255), 4)

    # Display the resulting frame.
    cv2.imshow('frame',frame)
    cv2.resizeWindow('frame', (960, 720))
    
    key = cv2.waitKey(1)
    if key == ord('q'): # q to quit.
        break
    if key == ord('p'):
        cv2.waitKey(-1) # p to pause, press any key to play.

# Free the video and destroy videos.
cap.release()
cv2.destroyAllWindows()