## Mean Shift

## Question 1

In [None]:
import numpy as np
import cv2

roi_defined = False
 
def define_ROI(event, x, y, flags, param):
	global r,c,w,h,roi_defined
	# if the left mouse button was clicked, 
	# record the starting ROI coordinates 
	if event == cv2.EVENT_LBUTTONDOWN:
		r, c = x, y
		roi_defined = False
	# if the left mouse button was released,
	# record the ROI coordinates and dimensions
	elif event == cv2.EVENT_LBUTTONUP:
		r2, c2 = x, y
		h = abs(r2-r)
		w = abs(c2-c)
		r = min(r,r2)
		c = min(c,c2)  
		roi_defined = True

cap = cv2.VideoCapture('./Test-Videos/Antoine_Mug.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Ball.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Basket.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Car.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Sunshade.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Woman.mp4')

ret, frame = cap.read()
clone = frame.copy()
cv2.namedWindow("First image")
cv2.setMouseCallback("First image", define_ROI)
 
while True:
	# display the image and wait for a keypress
	cv2.imshow("First image", frame)
	key = cv2.waitKey(1) & 0xFF
	# if the ROI is defined, draw it!
	if (roi_defined):
		# draw a green rectangle around the region of interest
		cv2.rectangle(frame, (r,c), (r+h,c+w), (0, 255, 0), 2)
	# else reset the image...
	else:
		frame = clone.copy()
	# if the 'q' key is pressed, break from the loop
	if key == ord("q"):
		break
 
track_window = (r,c,h,w)
# set up the ROI for tracking
roi = frame[c:c+w, r:r+h]
# conversion to Hue-Saturation-Value space
# 0 < H < 180 ; 0 < S < 255 ; 0 < V < 255
hsv_roi =  cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
# computation mask of the histogram:
# Pixels with S<30, V<20 or V>235 are ignored 
mask = cv2.inRange(hsv_roi, np.array((0.,30.,20.)), np.array((180.,255.,235.)))


# Marginal histogram of the Hue component
roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180])
# Marginal histogram of the S component
#roi_hist = cv2.calcHist([hsv_roi],[1],mask,[180],[0,180])


# Histogram values are normalised to [0,255]
cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)

# Setup the termination criteria: either 10 iterations,
# or move by less than 1 pixel
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )

cpt = 1

cv2.namedWindow('Back Projection')
cv2.namedWindow('Sequence')
cv2.namedWindow('Hue')

while True:
    ret, frame = cap.read()
    if ret == True:
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        hsv_disp = hsv
        hsv_disp[:,:,1] = 255
        hsv_disp[:,:,2] = 255
        huedisp = cv2.cvtColor(hsv_disp, cv2.COLOR_HSV2BGR)

        dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180],1)
        ret, track_window = cv2.meanShift(dst, track_window, term_crit)
        r,c,h,w = track_window
        frame_tracked = cv2.rectangle(frame, (r,c), (r+h,c+w), (255,0,0) ,2)
        cv2.imshow('Sequence',frame_tracked)
        cv2.imshow('Hue', huedisp)

        # Show the back projection result in a separate window
        cv2.imshow('Back Projection', dst)

        k = cv2.waitKey(60) & 0xff
        if k == 27:
            break
        elif k == ord('s'):
            cv2.imwrite('Frame_%04d.png'%cpt,frame_tracked)
        cpt += 1
    else:
        break

cv2.destroyAllWindows()
cap.release()


## Mean shift with threshold

## Question 2

In [23]:
import numpy as np
import cv2

roi_defined = False

def define_ROI(event, x, y, flags, param):
    global r, c, w, h, roi_defined
    if event == cv2.EVENT_LBUTTONDOWN:
        r, c = x, y
        roi_defined = False
    elif event == cv2.EVENT_LBUTTONUP:
        r2, c2 = x, y
        h = abs(r2 - r)
        w = abs(c2 - c)
        r = min(r, r2)
        c = min(c, c2)
        roi_defined = True

cap = cv2.VideoCapture('./Test-Videos/Antoine_Mug.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Ball.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Basket.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Car.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Sunshade.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Woman.mp4')

ret, frame = cap.read()
clone = frame.copy()
cv2.namedWindow("First image")
cv2.setMouseCallback("First image", define_ROI)

while True:
    cv2.imshow("First image", frame)
    key = cv2.waitKey(1) & 0xFF
    if roi_defined:
        cv2.rectangle(frame, (r, c), (r + h, c + w), (0, 255, 0), 2)
    else:
        frame = clone.copy()
    if key == ord("q"):
        break

track_window = (r, c, h, w)
roi = frame[c:c + w, r:r + h]
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv_roi, np.array((0., 30., 20.)), np.array((180., 255., 235.)))
roi_hist = cv2.calcHist([hsv_roi], [0], mask, [180], [0, 180])
cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)

term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1)

while True:
    ret, frame = cap.read()
    if ret:
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        dst = cv2.calcBackProject([hsv], [0], roi_hist, [0, 180], 1)
        dst_thresholded = dst.copy()
        dst_thresholded[dst < 240] = 0
        ret, track_window = cv2.meanShift(dst_thresholded, track_window, term_crit)
        r, c, h, w = track_window
        frame_tracked = cv2.rectangle(frame, (r, c), (r + h, c + w), (255, 0, 0), 2)
        cv2.imshow('Sequence', frame_tracked)
        cv2.imshow('Thresholded Back Projection', dst_thresholded)
        k = cv2.waitKey(60) & 0xff
        if k == 27:
            break
    else:
        break

cv2.destroyAllWindows()
cap.release()


## Hough Transform

## Question 3

In [2]:
import numpy as np
import cv2

roi_defined = False
 
def define_ROI(event, x, y, flags, param):
    global r, c, w, h, roi_defined
    if event == cv2.EVENT_LBUTTONDOWN:
        r, c = x, y
        roi_defined = False
    elif event == cv2.EVENT_LBUTTONUP:
        r2, c2 = x, y
        h = abs(r2 - r)
        w = abs(c2 - c)
        r = min(r, r2)
        c = min(c, c2)  
        roi_defined = True

#cap = cv2.VideoCapture('./Test-Videos/Antoine_Mug.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Ball.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Basket.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Car.mp4')
cap = cv2.VideoCapture('./Test-Videos/VOT-Sunshade.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Woman.mp4')


ret, frame = cap.read()
clone = frame.copy()
cv2.namedWindow("First image")
cv2.setMouseCallback("First image", define_ROI)

while True:
    cv2.imshow("First image", frame)
    key = cv2.waitKey(1) & 0xFF
    if roi_defined:
        cv2.rectangle(frame, (r, c), (r + h, c + w), (0, 255, 0), 2)
    else:
        frame = clone.copy()
    if key == ord("q"):
        break

track_window = (r, c, h, w)
roi = frame[c:c+w, r:r+h]
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv_roi, np.array((0., 30., 20.)), np.array((180., 255., 235.)))
roi_hist = cv2.calcHist([hsv_roi], [0], mask, [180], [0, 180])
cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)
term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1)

cpt = 1

cv2.namedWindow('Original')
cv2.namedWindow('Gradient Orientation')
cv2.namedWindow('Gradient Magnitude')
cv2.namedWindow('Selected Orientations')

while True:
    ret, frame = cap.read()
    if ret:
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        hsv_disp = hsv.copy()
        hsv_disp[:,:,1] = 255
        hsv_disp[:,:,2] = 255
        huedisp = cv2.cvtColor(hsv_disp, cv2.COLOR_HSV2BGR)

        # Calculate gradient orientation and magnitude
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
        grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
        grad_mag, grad_ori = cv2.cartToPolar(grad_x, grad_y, angleInDegrees=True)

        # Apply threshold on gradient magnitude
        mask = grad_mag > 50  # Adjust threshold as needed

        # Create a copy of frame for visualization
        frame_with_orientation = frame.copy()

        # Mask pixels with insignificant gradient magnitudes
        frame_with_orientation[mask == 0] = [0, 0, 255]  # Set insignificant gradient pixels to red

        # Display images
        cv2.imshow('Original', frame)
        cv2.imshow('Gradient Orientation', grad_ori.astype(np.uint8))
        cv2.imshow('Gradient Magnitude', grad_mag.astype(np.uint8))
        cv2.imshow('Selected Orientations', frame_with_orientation)

        k = cv2.waitKey(60) & 0xff
        if k == 27:
            break
        elif k == ord('s'):
            cv2.imwrite('Frame_%04d.png' % cpt, frame)
        cpt += 1
    else:
        break

cv2.destroyAllWindows()
cap.release()


## Question 4

## Tracking using Hough Transform

In [19]:
import numpy as np
import cv2
from collections import defaultdict
import math

roi_defined = False

#cap = cv2.VideoCapture('./Test-Videos/Antoine_Mug.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Ball.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Basket.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Car.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Sunshade.mp4')
cap = cv2.VideoCapture('./Test-Videos/VOT-Woman.mp4')

def define_ROI(event, x, y, flags, param):
    global r, c, w, h, roi_defined
    if event == cv2.EVENT_LBUTTONDOWN:
        r, c = x, y
        roi_defined = False
    elif event == cv2.EVENT_LBUTTONUP:
        r2, c2 = x, y
        h = abs(r2 - r)
        w = abs(c2 - c)
        r = min(r, r2)
        c = min(c, c2)
        roi_defined = True

def calculate_gradient_orientation(frame, threshold):
    blue_channel = frame[..., 2]
    gradient_x, gradient_y = np.gradient(blue_channel)
    gradient_magnitude = np.sqrt(gradient_x ** 2 + gradient_y ** 2)
    orientation = np.arctan2(gradient_y, gradient_x)
    not_valid_indices = gradient_magnitude < threshold
    valid_indices = gradient_magnitude > threshold
    return gradient_magnitude, orientation, np.where(not_valid_indices), np.where(valid_indices)

def accumulate_hough_space(t_hough, orientation_map, r_table, valid_indices):
    for px, py in zip(valid_indices[0], valid_indices[1]):
        angle_index = int(orientation_map[px, py] * 90 / math.pi)
        if angle_index in r_table:
            for value in r_table[angle_index]:
                new_py, new_px = py + value[0], px + value[1]
                if 0 <= new_py < t_hough.shape[1] and 0 <= new_px < t_hough.shape[0]:
                    t_hough[new_px, new_py] += 1
    return t_hough

ret, frame = cap.read()
if ret is False:
    print("Unable to read the first frame from the video.")
    exit(1)

clone = frame.copy()
cv2.namedWindow("First image")
cv2.setMouseCallback("First image", define_ROI)

while True:
    cv2.imshow("First image", frame)
    key = cv2.waitKey(1) & 0xFF
    if roi_defined:
        cv2.rectangle(frame, (r, c), (r + h, c + w), (0, 255, 0), 2)
    else:
        frame = clone.copy()
    if key == ord("q"):
        break

track_window = (r, c, h, w)
roi = frame[c:c + w, r:r + h]
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv_roi, np.array((0., 30., 20.)), np.array((180., 255., 235.)))
roi_hist = cv2.calcHist([hsv_roi], [0], mask, [180], [0, 180])
cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)

term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1)

R = defaultdict(list)
thresh = 25
grad, orient, _, ind = calculate_gradient_orientation(hsv_roi, thresh)
centroid = np.array([int(r + (h // 2)), int(c + (w // 2))])
orient = (orient * 90 / np.pi).astype(int)

for x, y in zip(ind[0], ind[1]):
    distance = centroid - np.array([y + r, x + c])
    R[orient[x, y]].append(distance)

cpt = 1
while True:
    ret, frame = cap.read()
    if ret is False:
        print("End of video reached.")
        break

    frame_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    grad, orient, _, ind = calculate_gradient_orientation(frame_hsv, thresh)
    hough_transform = np.zeros_like(orient)
    hough_transform = accumulate_hough_space(hough_transform, orient, R, ind)

    # Find the peak in the Hough space
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(hough_transform)
    r, c = max_loc

    frame_tracker = cv2.rectangle(frame, (c, r), (c + w, r + h), (255, 0, 0), 2)
    hough_transform = np.uint8(hough_transform)
    hough_transform = cv2.normalize(hough_transform, hough_transform, 0, 255, cv2.NORM_MINMAX)

    cv2.imshow('Sequences', frame_tracker)
    cv2.imshow('Hough Transform', hough_transform)

    k = cv2.waitKey(60) & 0xFF
    if k == 27:
        break
    elif k == ord('s'):
        cv2.imwrite('./images/Frame_%04d.png' % cpt, frame_tracker)
        cv2.imwrite('./images/Hough_%04d.png' % cpt, hough_transform)
    cpt += 1

cv2.destroyAllWindows()
cap.release()


## Question 5
## Tracking using both Hough Transform and Mean Shift

In [30]:
import numpy as np
import cv2
from matplotlib import pyplot as plt
from collections import defaultdict
import math

roi_defined = False

cap = cv2.VideoCapture('./Test-Videos/Antoine_Mug.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Ball.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Basket.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Car.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Sunshade.mp4')
#cap = cv2.VideoCapture('./Test-Videos/VOT-Woman.mp4')


def define_ROI(event, x, y, flags, param):
	global r,c,w,h,roi_defined
	# if the left mouse button was clicked,
	# record the starting ROI coordinates
	if event == cv2.EVENT_LBUTTONDOWN:
		r, c = x, y
		roi_defined = False
	# if the left mouse button was released,
	# record the ROI coordinates and dimensions
	elif event == cv2.EVENT_LBUTTONUP:
		r2, c2 = x, y
		h = abs(r2-r)
		w = abs(c2-c)
		r = min(r,r2)
		c = min(c,c2)
		roi_defined = True
        
#implementing gradient orientation without using opencv
def calculate_gradient_orientation(frame, threshold):
    """
    Calculates gradient orientation based on the blue channel of the input frame.

    Args:
    - frame (numpy.ndarray): Input frame, assumed to be in BGR format.
    - threshold (float): Magnitude threshold for valid gradients.

    Returns:
    - gradient_magnitude (numpy.ndarray): Magnitude of gradients.
    - orientation (numpy.ndarray): Orientation of gradients in radians.
    - orientation_bgr (numpy.ndarray): Visualization of valid orientations in BGR format.
    - not_valid_indices (tuple of numpy.ndarray): Indices where gradient magnitude is below threshold.
    - valid_indices (tuple of numpy.ndarray): Indices where gradient magnitude is above threshold.
    """
    # Extract the blue channel directly for gradient calculation
    blue_channel = frame[..., 2]

    # Calculating the gradient
    gradient_x, gradient_y = np.gradient(blue_channel)
    gradient_magnitude = np.sqrt(gradient_x**2 + gradient_y**2)

    # Calculating the orientation
    orientation = np.arctan2(gradient_y, gradient_x)

    # Finding indices for valid and not valid gradients
    not_valid_indices = gradient_magnitude < threshold
    valid_indices = gradient_magnitude > threshold

    # Creating a visualization of valid orientations
    orientation_normalized = cv2.normalize(orientation, None, 0, 255, cv2.NORM_MINMAX)
    orientation_bgr = cv2.cvtColor(np.uint8(orientation_normalized), cv2.COLOR_GRAY2BGR)

    # Marking points not used as red
    orientation_bgr[not_valid_indices] = [0, 0, 255]

    return gradient_magnitude, orientation, orientation_bgr, np.where(not_valid_indices), np.where(valid_indices)



def accumulate_hough_space(t_hough, orientation_map, r_table, valid_indices):
    """
    Accumulates Hough space based on orientation and r-table.

    Args:
    - t_hough (numpy.ndarray): The Hough space accumulator array.
    - orientation_map (numpy.ndarray): Map of orientations.
    - r_table (dict): Dictionary containing R-table values.
    - valid_indices (tuple): Tuple containing arrays of valid indices.

    Returns:
    - numpy.ndarray: Accumulated Hough space.
    """
    for px, py in zip(valid_indices[0], valid_indices[1]):
        angle_index = int(orientation_map[px, py] * 90 / math.pi)
        if angle_index in r_table:
            for value in r_table[angle_index]:
                new_py, new_px = py + value[0], px + value[1]
                if 0 <= new_py < t_hough.shape[1] and 0 <= new_px < t_hough.shape[0]:
                    t_hough[new_px, new_py] += 1

    return t_hough

# take first frame of the video
ret,frame = cap.read()
# load the image, clone it, and setup the mouse callback function
clone = frame.copy()
cv2.namedWindow("First image")
cv2.setMouseCallback("First image", define_ROI)

# keep looping until the 'q' key is pressed
while True:
	# display the image and wait for a keypress
	cv2.imshow("First image", frame)
	key = cv2.waitKey(1) & 0xFF
	# if the ROI is defined, draw it!
	if (roi_defined):
		# draw a green rectangle around the region of interest
		cv2.rectangle(frame, (r,c), (r+h,c+w), (0, 255, 0), 2)
	# else reset the image...
	else:
		frame = clone.copy()
	# if the 'q' key is pressed, break from the loop
	if key == ord("q"):
		break

track_window = (r,c,h,w)
# set up the ROI for tracking
roi = frame[c:c+w, r:r+h]
# conversion to Hue-Saturation-Value space
# 0 < H < 180 ; 0 < S < 255 ; 0 < V < 255
hsv_roi =  cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
# computation mask of the histogram:
# Pixels with S<30, V<20 or V>235 are ignored
mask = cv2.inRange(hsv_roi, np.array((0.,30.,20.)), np.array((180.,255.,235.)))
# Marginal histogram of the Hue component
roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180])
# Histogram values are normalised to [0,255]
cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)

# Setup the termination criteria: either 10 iterations,
# or move by less than 1 pixel
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )



R = defaultdict(list)
thresh = 25
# Populating the table
grad, orient, _, _, ind = calculate_gradient_orientation(hsv_roi, thresh)
centroid = np.array([int(r + (h//2)), int(c + (w//2))])
orient = (orient * 90 / np.pi).astype(int)


for x, y in zip(ind[0], ind[1]):
    distance = centroid - np.array([y + r, x + c])
    R[orient[x, y]].append(distance)

cpt = 1
while(1):
	ret ,frame = cap.read()
	if ret == True:

		fram_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
		grad, orient , ori, _, ind = calculate_gradient_orientation(fram_hsv, thresh)
		hough_transform = np.zeros_like(orient)

		hough_transform = accumulate_hough_space(hough_transform, orient, R, ind)

		#mean shift
		ret, track_window = cv2.meanShift(hough_transform, track_window, term_crit)
		r,c,h,w = track_window

		# Draw a blue rectangle on the current image and normalize Hough
		frame_tracker = cv2.rectangle(frame, (r, c), (r + h, c + w), (255, 0, 0), 2)
		hough_transform = np.uint8(hough_transform)
		hough_transform = cv2.normalize(hough_transform, hough_transform, 0, 255, cv2.NORM_MINMAX)

		#Plotting all images
		cv2.imshow('Sequences', frame_tracker)
		cv2.imshow('Orientations', ori)
		cv2.imshow("Hough Transorm", hough_transform)

		k = cv2.waitKey(60) & 0xff
		if k == 27:
				break
		elif k == ord('s'):
				cv2.imwrite('./images/Question5_Frame_%04d.png'%cpt,frame_tracked)
				cv2.imwrite('./images/Question5_tHough_%04d.png'%cpt,tHough)
				cv2.imwrite('./images/Question5_Orientation_%04d.png'%cpt,ori)
		cpt += 1
	else:
		break

cv2.destroyAllWindows()
cap.release()

## Deep Features
## Question 6

In [1]:
import numpy as np
import cv2
from tensorflow.keras.applications import VGG16

roi_defined = False

model = VGG16(weights='imagenet', include_top=False)

def extract_features(frame):
    frame = cv2.resize(frame, (224, 224))
    frame = np.expand_dims(frame, axis=0)
    frame = frame.astype('float32')
    frame = frame / 255.0

    features = model.predict(frame)
    return features

def define_ROI(event, x, y, flags, param):
    global r, c, w, h, roi_defined
    if event == cv2.EVENT_LBUTTONDOWN:
        r, c = x, y
        roi_defined = False
    elif event == cv2.EVENT_LBUTTONUP:
        r2, c2 = x, y
        h = abs(r2-r)
        w = abs(c2-c)
        r = min(r,r2)
        c = min(c,c2)
        roi_defined = True
        print("ROI defined:", r, c, h, w)

cap = cv2.VideoCapture('./Test-Videos/Antoine_Mug.mp4')
ret, frame = cap.read()
clone = frame.copy()
cv2.namedWindow("First image")
cv2.setMouseCallback("First image", define_ROI)

while True:
    cv2.imshow("First image", frame)
    key = cv2.waitKey(1) & 0xFF
    if roi_defined:
        cv2.rectangle(frame, (r, c), (r + h, c + w), (0, 255, 0), 2)
    else:
        frame = clone.copy()
    if key == ord("q"):
        break

track_window = (r, c, h, w)

roi = frame[c:c+w, r:r+h]
target_features = extract_features(roi)

term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1)

cv2.namedWindow('Sequence')

while True:
    ret, frame = cap.read()
    if ret == True:
        frame_features = extract_features(frame)

        # Ensure the correct data type and continuity
        frame_features = frame_features.astype(np.float32)
        target_features = target_features.astype(np.float32)

        # Calculate similarity
        similarity = cv2.matchTemplate(frame_features[0], target_features[0], cv2.TM_CCOEFF_NORMED)

        # Find the maximum value of similarity
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(similarity)

        # Update track window based on maximum similarity location
        r, c = max_loc
        track_window = (r, c, h, w)

        # Draw bounding box
        cv2.rectangle(frame, (r, c), (r+h, c+w), (255, 0, 0), 2)

        cv2.imshow('Sequence', frame)

        k = cv2.waitKey(60) & 0xff
        if k == 27:
            break
    else:
        break

cv2.destroyAllWindows()
cap.release()





ROI defined: 107 88 68 97


error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\core\src\mean.dispatch.cpp:615: error: (-215:Assertion failed) dst.type() == CV_64F && dst.isContinuous() && (dst.cols == 1 || dst.rows == 1) && dcn >= cn in function 'cv::meanStdDev'
