In [9]:
import numpy as np
import cv2 as cv
import os
from sklearn.cluster import KMeans

video_path = "data/videos/test3_87s.mp4"
output_dir = "output/image/meanshift2"
sampling_frequency = 2
k = 8

cap = cv.VideoCapture(video_path)

# take first frame of the video
ret,frame = cap.read()

# get video information
fps = cap.get(cv.CAP_PROP_FPS)
height, width, channels = frame.shape

# window position, for now starting with middle 9th
window_x = int(width/3)
window_y = int(height/3)
window_w = int(width/3)
window_h = int(height/3)

# setup initial location of window
x, y, w, h = window_x, window_y, window_w, window_h
track_window = (x, y, w, h)

# set up the ROI for tracking
roi = frame[y:y+h, x:x+w]
hsv_roi =  cv.cvtColor(roi, cv.COLOR_BGR2HSV)
mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))     # hmm what is this
roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180])
cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX)        # ?

# Setup the termination criteria, either 10 iteration or move by at least 1 pt
term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 )

index = 0
while(1):
    ret, frame = cap.read()

    if not ret: 
        break

    # pixel_vals = frame.reshape((-1,3))
    # k_means = KMeans(n_clusters=k, random_state=42)
    # k_means.fit(pixel_vals)

    # centers = np.array(k_means.cluster_centers_, dtype=np.uint8)
    # labels = k_means.labels_
    # segmented_data = centers[labels.flatten()]

    # # reshape data into the original image dimensions
    # segmented_image = segmented_data.reshape((frame.shape))
    segmented_image = frame

    hsv = cv.cvtColor(segmented_image, cv.COLOR_BGR2HSV)  
    # this is only with reference to the first frame, maybe repeat for every frame
    # somewhere here is probably where a new roi could be specified
    # the question is mostly how to know when to specify a new roi
    dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1) 
    
    mask = cv.inRange(hsv, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
    roi_hist = cv.calcHist([hsv],[0],mask,[180],[0,180])
    cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX)        

    dst_file = os.path.join(output_dir, f"dst_{index}.jpg")
    cv.imwrite(dst_file, dst)
    # apply meanshift to get the new location
    ret, res_track_window = cv.meanShift(dst, track_window, term_crit)
    print(index, ": ", ret)

    # Draw it on image
    dx,dy,dw,dh = res_track_window
    img = cv.rectangle(segmented_image, (dx,dy), (dx+dw,dy+dh), 255,2)
    img = cv.rectangle(segmented_image, (x,y), (x+w,y+h), (0, 0, 255),2)
    # cv.imshow('img2',img2)
    img_file = os.path.join(output_dir, f"frame_{index}.jpg")
    cv.imwrite(img_file, img)

    index += 1

    cap.set(cv.CAP_PROP_POS_FRAMES, int((index * fps) / sampling_frequency))

0 :  10
1 :  8
2 :  0
3 :  10
4 :  10
5 :  0
6 :  10
7 :  10
8 :  8
9 :  8
10 :  7
11 :  7
12 :  10
13 :  5
14 :  10
15 :  9
16 :  10
17 :  10
18 :  10
19 :  10
20 :  10
21 :  10
22 :  10
23 :  10
24 :  10
25 :  10
26 :  10
27 :  10
28 :  10
29 :  10
30 :  10
31 :  10
32 :  10
33 :  10
34 :  10
35 :  10
36 :  10
37 :  10
38 :  10
39 :  10
40 :  4
41 :  10
42 :  10
43 :  10
44 :  10
45 :  10
46 :  10
47 :  10
48 :  10
49 :  10
50 :  10
51 :  10
52 :  6
53 :  10
54 :  10
55 :  10
56 :  10
57 :  5
58 :  5
59 :  5
60 :  6
61 :  6
62 :  6
63 :  6
64 :  5
65 :  5
66 :  10
67 :  10
68 :  10
69 :  10
70 :  10
71 :  10
72 :  10
73 :  10
74 :  10
75 :  10
76 :  10
77 :  10
78 :  10
79 :  10
80 :  10
81 :  9
82 :  2
83 :  10
84 :  10
85 :  10
86 :  10
87 :  10
88 :  10
89 :  10
90 :  10
91 :  10
92 :  10
93 :  10
94 :  10
95 :  10
96 :  10
97 :  10
98 :  10
99 :  10
100 :  10
101 :  10
102 :  10
103 :  10
104 :  10
105 :  10
106 :  8
107 :  10
108 :  10
109 :  10
110 :  10
111 :  10
112 :  7
113 