In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from sklearn.cluster import MeanShift
!pip install opencv-python
import cv2
import numpy as np
from sklearn.cluster import estimate_bandwidth # to automatically estimate a good bandwidth for mean shift
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt

5.1

In [None]:
cap = cv2.VideoCapture('/content/drive/My Drive/KylianMbappe.mp4')
# capture one frame
ret,frame = cap.read()

# detect a face on the first frame
face_detector = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') 
face_boxes = face_detector.detectMultiScale(frame) 

if len(face_boxes)==0:
    print('no face detected')
    assert(False)

# initialize the tracing window around the (first) detected face
(x,y,w,h) = tuple(face_boxes[0]) 
track_window = (x,y,w,h)

#  region of interest for tracking
roi = frame[y:y+h, x:x+w]

# convert the roi to HSV so we can construct a histogram of Hue 
hsv_roi =  cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)

# why do we need this mask? (remember the cone?)
# read the description for Figure 3 in the original Cam Shift paper: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.14.7673 
mask = cv2.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))


# form histogram of hue in the roi
roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180])

# normalize the histogram array values so they are in the min=0 to max=255 range
cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)

# termination criteria for mean shift: 10 iteration or shift less than 1 pixel
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
IoUs = [] 
frame_numbers = []
frame_number = 2
while True:
    
    # grab a frame
    ret ,frame = cap.read()
    
    if ret == True: 
  
        # convert to HSV
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        
        # histogram back projection using roi_hist 
        dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180],1)

        # use meanshift to shift the tracking window
        ret, track_window = cv2.meanShift(dst, track_window, term_crit)
        
        # display tracked window
        img = cv2.rectangle(frame,(track_window[0],track_window[1]), (track_window[0]+track_window[2],track_window[1]+track_window[3]), (0,0,255),5)

        face_boxes = face_detector.detectMultiScale(frame)
        if face_boxes == ():
          continue
        if face_boxes[0][0] + face_boxes[0][2] <=  track_window[0] + track_window[2]:
          if face_boxes[0][0] >= track_window[0]:
            inner_width = face_boxes[0][0] + face_boxes[0][2] -  face_boxes[0][0]
          else:
            inner_width = face_boxes[0][0] + face_boxes[0][2] - track_window[0]
        else:
          if face_boxes[0][0] >= track_window[0]:
            inner_width = track_window[0] + track_window[2] -  face_boxes[0][0]
          else:
            inner_width = track_window[0] + track_window[2] - track_window[0]

        if face_boxes[0][1] + face_boxes[0][3] <= track_window[1] + track_window[3]:
          if face_boxes[0][1] >= track_window[1]:
            inner_length = face_boxes[0][1] + face_boxes[0][3] - face_boxes[0][1]
          else:
            inner_length = face_boxes[0][1] + face_boxes[0][3] - track_window[1]
        else:
          if face_boxes[0][1] >= track_window[1]:
            inner_length =  track_window[1] + track_window[3] - face_boxes[0][1]
          else:
            inner_length =  track_window[1] + track_window[3] - track_window[1]


        inner_area = inner_width * inner_length
        total_area = (face_boxes[0][2] * face_boxes[0][3]) + track_window[2] * track_window[3] - inner_area
        IoU = inner_area/total_area
        frame_numbers.append(frame_number)
        frame_number += 1
        IoUs.append(IoU)
        print(IoU)
        img = cv2.rectangle(frame, (face_boxes[0][0],face_boxes[0][1]), (face_boxes[0][0]+ face_boxes[0][2],face_boxes[0][1]+ face_boxes[0][3]), (0,255,0),5)
        cv2_imshow(img)
        
        if cv2.waitKey(33) & 0xFF == 27: # wait a bit and exit is ESC is pressed
            break  
    else:
        break
        
cv2.destroyAllWindows()
cap.release()


In [None]:
frame_number_array = np.array(frame_numbers)
IoU_array = np.array(IoUs)
plt.plot(frame_number_array, IoU_array)
plt.xlabel('frame number')
plt.ylabel('IoU')
plt.show()

5.2

In [None]:
cap = cv2.VideoCapture('/content/drive/My Drive/KylianMbappe.mp4')

# capture one frame
ret,frame = cap.read()

# detect a face on the first frame
face_detector = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') 
face_boxes = face_detector.detectMultiScale(frame) 

if len(face_boxes)==0:
    print('no face detected')
    assert(False)

# initialize the tracing window around the (first) detected face
(x,y,w,h) = tuple(face_boxes[0]) 
track_window = (x,y,w,h)

#  region of interest for tracking
roi = frame[y:y+h, x:x+w]

# # convert the roi to HSV so we can construct a histogram of Hue 
#hsv_roi =  cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)

# why do we need this mask? (remember the cone?)
# read the description for Figure 3 in the original Cam Shift paper: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.14.7673 

gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
img = cv2.GaussianBlur(gray_roi,(5,5),0)
sobelx = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5)  # x
sobely = cv2.Sobel(img,cv2.CV_64F,0,1,ksize=5)  # y
mag, ang = cv2.cartToPolar(sobelx, sobely, angleInDegrees=True)
max_magnitude = np.amax(mag)
mask = np.zeros((mag.shape[0],mag.shape[1]), dtype="uint8")
for i in range(mag.shape[0]):
  for j in range(mag.shape[1]):
    if 0.1 * max_magnitude < mag[i][j]:
       mask[i][j] = gray_roi[i][j]
# form histogram of hue in the roi
roi_hist = cv2.calcHist([gray_roi],[0],mask,[180],[0,180])

# normalize the histogram array values so they are in the min=0 to max=255 range
cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)

# termination criteria for mean shift: 10 iteration or shift less than 1 pixel
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )

frame_number = 2
frame_numbers = []
IoUs = []
while True:
    
    # grab a frame
    ret ,frame = cap.read() 
    if ret == True: 
        face_boxes = face_detector.detectMultiScale(frame)
        if face_boxes == ():
          continue
        (x_3,y_3,w_3,h_3) = tuple(face_boxes[0])
        roi = frame[y_3:y_3+h_3, x_3:x_3+w_3]
        gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        img = cv2.GaussianBlur(gray_roi,(5,5),0)
        sobelx = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5)  # x
        sobely = cv2.Sobel(img,cv2.CV_64F,0,1,ksize=5)  # y
        mag, ang = cv2.cartToPolar(sobelx, sobely, angleInDegrees=True)
        max_magnitude = np.amax(mag)
        mask = np.zeros((mag.shape[0],mag.shape[1]), dtype="uint8")
        for i in range(mag.shape[0]):
          for j in range(mag.shape[1]):
            if  0.1 * max_magnitude < mag[i][j]:
              mask[i][j] = gray_roi[i][j]
        # form histogram of hue in the roi
        roi_hist = cv2.calcHist([gray_roi],[0],mask,[180],[0,180])

        # # convert to HSV
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
  
        # histogram back projection using roi_hist 
        dst = cv2.calcBackProject([gray],[0],roi_hist,[0,180],1)
        
        # use meanshift to shift the tracking window
        ret, track_window = cv2.meanShift(dst, track_window, term_crit)
        face_boxes = face_detector.detectMultiScale(frame)
        if face_boxes[0][0] + face_boxes[0][2] <=  track_window[0] + track_window[2]:
          if face_boxes[0][0] >= track_window[0]:
            inner_width = face_boxes[0][0] + face_boxes[0][2] -  face_boxes[0][0]
          else:
            inner_width = face_boxes[0][0] + face_boxes[0][2] - track_window[0]
        else:
          if face_boxes[0][0] >= track_window[0]:
            inner_width = track_window[0] + track_window[2] -  face_boxes[0][0]
          else:
            inner_width = track_window[0] + track_window[2] - track_window[0]

        if face_boxes[0][1] + face_boxes[0][3] <= track_window[1] + track_window[3]:
          if face_boxes[0][1] >= track_window[1]:
            inner_length = face_boxes[0][1] + face_boxes[0][3] - face_boxes[0][1]
          else:
            inner_length = face_boxes[0][1] + face_boxes[0][3] - track_window[1]
        else:
          if face_boxes[0][1] >= track_window[1]:
            inner_length =  track_window[1] + track_window[3] - face_boxes[0][1]
          else:
            inner_length =  track_window[1] + track_window[3] - track_window[1]
        inner_area = inner_width * inner_length
        total_area = face_boxes[0][2] * face_boxes[0][3] + track_window[2] * track_window[3] - inner_area
        IoU = inner_area/total_area
        frame_numbers.append(frame_number)
        IoUs.append(IoU)
        print(IoU)
        x,y,w,h = track_window
        img = cv2.rectangle(frame, (track_window[0],track_window[1]), (track_window[0]+track_window[2],track_window[1]+track_window[3]), (0,0,255),5)
        img = cv2.rectangle(frame, (face_boxes[0][0],face_boxes[0][1]), (face_boxes[0][0]+ face_boxes[0][2],face_boxes[0][1]+ face_boxes[0][3]), (0,255,0),5)
        cv2_imshow(img)
        frame_number += 1

        if cv2.waitKey(33) & 0xFF == 27: # wait a bit and exit is ESC is pressed
            break
    else:
        break     
cv2.destroyAllWindows()
cap.release()

In [None]:
frame_number_array = np.array(frame_numbers)
IoU_array = np.array(IoUs)
plt.plot(frame_number_array, IoU_array)
plt.xlabel('frame number')
plt.ylabel('IoU')
plt.show()