In [None]:
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt

In [None]:
bbox = pd.read_csv("../input/tensorflow-great-barrier-reef/train.csv")
bbox.head()

In [None]:
bbox.iloc[50]

In [None]:
i = 78
frame1 = cv2.imread(f'../input/tensorflow-great-barrier-reef/train_images/video_{bbox.iloc[i].video_id}/{bbox.iloc[i].video_frame}.jpg')
frame2 = cv2.imread(f'../input/tensorflow-great-barrier-reef/train_images/video_{bbox.iloc[i+1].video_id}/{bbox.iloc[i+1].video_frame}.jpg')
frame3 = cv2.imread(f'../input/tensorflow-great-barrier-reef/train_images/video_{bbox.iloc[i+2].video_id}/{bbox.iloc[i+2].video_frame}.jpg')
frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB)
frame2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB) 
frame3 = cv2.cvtColor(frame3, cv2.COLOR_BGR2RGB)

In [None]:
plt.imshow(frame1)
box1 = eval(bbox.iloc[i].annotations)
box1 = [[x["x"],x["y"],x["width"],x["height"]] for x in box1]
print(box1)

In [None]:
plt.imshow(frame2)
box2 = eval(bbox.iloc[i+1].annotations)
box2 = [[x["x"],x["y"],x["width"],x["height"]] for x in box2]
print(box2)

In [None]:
plt.imshow(frame3)
box3 = eval(bbox.iloc[i+2].annotations)
box3 = [[x["x"],x["y"],x["width"],x["height"]] for x in box3]
print(box3)

In [None]:
def draw_matches(img1, keypoints1, img2, keypoints2, matches):
    r, c = img1.shape[:2]
    r1, c1 = img2.shape[:2]

    # Create a blank image with the size of the first image + second image
    output_img = np.zeros((max([r, r1]), c+c1, 3), dtype='uint8')
    output_img[:r, :c, :] = np.dstack([img1, img1, img1])
    output_img[:r1, c:c+c1, :] = np.dstack([img2, img2, img2])

    # Go over all of the matching points and extract them
    for match in matches:
        img1_idx = match.queryIdx
        img2_idx = match.trainIdx
        (x1, y1) = keypoints1[img1_idx].pt
        (x2, y2) = keypoints2[img2_idx].pt

        # Draw circles on the keypoints
        cv2.circle(output_img, (int(x1),int(y1)), 4, (0, 255, 255), 1)
        cv2.circle(output_img, (int(x2)+c,int(y2)), 4, (0, 255, 255), 1)

        # Connect the same keypoints
        cv2.line(output_img, (int(x1),int(y1)), (int(x2)+c,int(y2)), (0, 255, 255), 1)
    
    return output_img

In [None]:
orb = cv2.ORB_create(nfeatures=2000)

keypoints1, descriptors1 = orb.detectAndCompute(frame1, None)
keypoints2, descriptors2 = orb.detectAndCompute(frame2, None)

k_frame1 = cv2.drawKeypoints(frame1, keypoints1, (255, 0, 0))
k_frame2 = cv2.drawKeypoints(frame2, keypoints2, (255, 0, 0))

plt.figure(figsize=(15,20))
plt.title("Key points on Frame1")
plt.imshow(k_frame1)
plt.show()

In [None]:
plt.figure(figsize=(15,20))
plt.title("Key points on Frame2")
plt.imshow(k_frame2)
plt.show()

In [None]:
bf = cv2.BFMatcher_create(cv2.NORM_HAMMING)
matches = bf.knnMatch(descriptors1, descriptors2,k=2)

In [None]:
frame1g = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
frame2g = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
frame3g = cv2.cvtColor(frame3, cv2.COLOR_RGB2GRAY)
good = []
for m, n in matches:
    if m.distance < 0.6 * n.distance:
        good.append(m)

img3 = draw_matches(frame1g, keypoints1, frame2g, keypoints2, good[:10])
plt.figure(figsize=(20,25))
plt.title("Good matching points btw F1 & F2")
plt.imshow(img3)
plt.show()

In [None]:
print(len(good))

In [None]:
MIN_MATCH_COUNT = 30
if len(good)>MIN_MATCH_COUNT:
    src_pts = np.float32([ keypoints1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
    dst_pts = np.float32([ keypoints2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
    H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC)#,5.0)
else:
    print( "Not enough matches are found - {}/{}".format(len(good), MIN_MATCH_COUNT) )
    matchesMask = None

In [None]:
H

In [None]:
def traform_point(point,H):
    x,y = point
    X = np.array([[x],[y],[1]])
    Y = H @ X
    Y = Y[:2]/Y[2]
    return float(Y[0]),float(Y[1])

traform_point([0,0],H)

In [None]:
!pip install ensemble-boxes

In [None]:
from ensemble_boxes import nms


class Tracker_custom:
    def __init__(self,debug=False):
        self.crurrent_frame = -1
        self.transf_H = []
        self.last_frame = None
        self.n_frame_bbox = []
        self.n_frame_conf = []
        self.debug = debug
    
    def new_frame(self,new_frame,bbox=[],confs=[]):
        self.crurrent_frame += 1
        
        if self.last_frame is None:
            self.last_frame = new_frame
            self.n_frame_bbox.append(bbox)
            self.n_frame_conf.append(confs)

        else : 
            # if not first frame :
            H = self.get_transfor(new_frame)
            if H is None:
                #new video => no prediction traking
                #no bbox => no prediction traking
                if self.debug :
                    print("no H")
                self.n_frame_bbox.append(bbox)
                self.n_frame_conf.append(confs)
                self.last_frame = new_frame
                return
                
            self.transf_H.append(H)
            last_boxes,last_confs = self.n_frame_bbox[-1],self.n_frame_conf[-1]
            pred_boxes,pred_confs = self.pred_box_H(last_boxes,last_confs,H)
            
            # fusion predbox && deteted box
            f_box,f_score = [],[]
            for box,conf in zip(self.valid_box(pred_boxes),pred_confs):
                f_box.append(box)
                f_score.append(conf)
                
            for box,conf in zip(bbox,confs):
                f_box.append(box)
                f_score.append(conf)
            
            if len(f_box)==0:
                # 0 bbox
                if self.debug :
                    print("no bbox")
                self.n_frame_bbox.append([])
                self.n_frame_conf.append([])
                self.last_frame = new_frame
                return
            labels = [1 for conf in f_score]
            
            image_size_x=self.last_frame.shape[1]
            image_size_y=self.last_frame.shape[0]
            f_box =  [[b[0]/image_size_x,b[1]/image_size_y,(b[0]+b[2])/image_size_x,(b[1]+b[3])/image_size_y] for b in f_box]
            f_box =  [[min(max(x,0),1) for x in b] for b in f_box]
            boxes, scores, labels = nms([f_box], [f_score], [labels], weights=None, iou_thr=.2)
            #boxes, scores, labels = nms([pred_boxes,bbox], [pred_confs,confs], [labels[:len(pred_boxes)],labels[len(pred_boxes):]], weights=[1,1], iou_thr=.2)
            fboxes = [[b[0]*image_size_x,b[1]*image_size_y,(b[2]-b[0])*image_size_x,(b[3]-b[1])*image_size_y] for b in boxes]
            
            
            self.n_frame_bbox.append(self.valid_box(fboxes))
            self.n_frame_conf.append(scores)
            self.last_frame = new_frame
    
    def get_transfor(self,new_frame,MIN_MATCH_COUNT = 30):
        if self.last_frame is None:
            return None
        
        orb = cv2.ORB_create(nfeatures=2000)

        keypoints1, descriptors1 = orb.detectAndCompute(self.last_frame, None)
        keypoints2, descriptors2 = orb.detectAndCompute(new_frame, None)
        
        bf = cv2.BFMatcher_create(cv2.NORM_HAMMING)
        matches = bf.knnMatch(descriptors1, descriptors2,k=2)
        
        good = []
        for m, n in matches:
            if m.distance < 0.5 * n.distance:
                good.append(m)
        
        if len(good)<MIN_MATCH_COUNT:
            #not enough matches
            # maybe new video ??
            return None
        
        src_pts = np.float32([ keypoints1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
        dst_pts = np.float32([ keypoints2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
        H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
        return H
    
    def pred_box_H(self,boxes,confs,H):
        if H is None:
            # new video ?? H not found:
            return [],[] # no predicition form traking
        new_boxes, new_confs = [],[]
        for i in range(len(boxes)):
            x,y,w,h = boxes[i]
            conf = confs[i]
            mx,my = x+w/2, y+h/2
            X = np.array([[mx],[my],[1]])
            Y = H @ X
            Y = Y[:2]/Y[2]
            new_mx,new_my = float(Y[0]),float(Y[1])
            new_conf = .75 * conf # momentum in conf
            start = np.array([mx,my])
            end = np.array([new_mx,new_my])
            dist = np.linalg.norm(end-start)
            if new_conf > .1 and dist<50: # threshold conf && dit old/new < -xx
                new_boxes.append([int(new_mx-w/2),int(new_my-h/2),int(w),int(h)])
                new_confs.append(new_conf)
                if self.debug :
                    print(f' from {mx,my} to {new_mx,new_my} dist {dist}')  
        return new_boxes,new_confs
    
    
    def valid_box(self,boxes):
        # keep the box in the img
        new_boxes = []
        for box in boxes:
            x,y,w,h = box
            x = int(max(1,min(x,self.last_frame.shape[1]-1)))
            y = int(max(1,min(y,self.last_frame.shape[0]-1)))
            w = int(max(5,min(w,self.last_frame.shape[1]-x-1)))
            h = int(max(5,min(h,self.last_frame.shape[0]-y-1)))
            area = w*h
            if area >500:
                new_boxes.append([x,y,w,h])
        return new_boxes            


In [None]:
tracker = Tracker_custom(debug=True)
tracker.new_frame(frame1,box1,[1,1])
tracker.n_frame_bbox

In [None]:
# white = bbox @ frame n-1
# red = bbox @ frame n (prediction)
# blue = bbox grundtruth @ frame n

In [None]:
img = frame1.copy()
for box in tracker.n_frame_bbox[-1]:
    x,y,w,h = box
    x,y,w,h = int(x),int(y),int(w),int(h)
    cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)
    
for box in box1:
    x,y,w,h = box
    x,y,w,h = int(x),int(y),int(w),int(h)
    cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,255), 2)
    
plt.figure(figsize=(20,25))
plt.title("frame 1 : detector gives bbox to the tracker")
plt.imshow(img)
plt.show()
# white = bbox @ frame n-1
# red = bbox @ frame n (prediction)
# pink = bbox grundtruth @ frame n

In [None]:
# white = bbox @ frame n-1
# red = bbox @ frame n (prediction)
# blue = bbox grundtruth @ frame n

In [None]:
tracker.new_frame(frame2)


img = frame2.copy()
for box in tracker.n_frame_bbox[-1]:
    x,y,w,h = box
    x,y,w,h = int(x),int(y),int(w),int(h)
    cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)
    
for box in box1:
    x,y,w,h = box
    x,y,w,h = int(x),int(y),int(w),int(h)
    cv2.rectangle(img, (x,y), (x+w,y+h), (255,255,255), 2)
    
for box in box2:
    x,y,w,h = box
    x,y,w,h = int(x),int(y),int(w),int(h)
    cv2.rectangle(img, (x,y), (x+w,y+h), (0,0,255), 2)
    
plt.figure(figsize=(20,25))
plt.title("frame 2 : no detection but tracker say there are still a bbox")
plt.imshow(img)
plt.show()


In [None]:
# white = bbox @ frame n-1
# red = bbox @ frame n (prediction)
# blue = bbox grundtruth @ frame n

In [None]:
tracker.new_frame(frame3)
img = frame3.copy()
for box in tracker.n_frame_bbox[-1]:
    x,y,w,h = box
    x,y,w,h = int(x),int(y),int(w),int(h)
    cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)
    
for box in box2:
    x,y,w,h = box
    x,y,w,h = int(x),int(y),int(w),int(h)
    cv2.rectangle(img, (x,y), (x+w,y+h), (255,255,255), 2)
    
for box in box3:
    x,y,w,h = box
    x,y,w,h = int(x),int(y),int(w),int(h)
    cv2.rectangle(img, (x,y), (x+w,y+h), (0,0,255), 2)
    
plt.figure(figsize=(20,25))
plt.title("frame 3 : no detection but tracker say there are still a bbox")
plt.imshow(img)
plt.show()

In [None]:
tracker.n_frame_conf

If i dont detect i predict with the tracker with conf = x * last_conf (.5 momentum)

In [None]:
i = 48
frame = cv2.imread(f'../input/tensorflow-great-barrier-reef/train_images/video_{bbox.iloc[i].video_id}/{bbox.iloc[i].video_frame}.jpg')
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
box = eval(bbox.iloc[i].annotations)
box = [[x["x"],x["y"],x["width"],x["height"]] for x in box]
tracker = Tracker_custom(debug=True)
tracker.new_frame(frame,box,[1,1])
box_prev = box
for j in range(1,4):
    frame = cv2.imread(f'../input/tensorflow-great-barrier-reef/train_images/video_{bbox.iloc[i+j].video_id}/{bbox.iloc[i+j].video_frame}.jpg')
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    box = eval(bbox.iloc[i+j].annotations)
    box = [[x["x"],x["y"],x["width"],x["height"]] for x in box]
    
    
    tracker.new_frame(frame)
    img = frame.copy()
    for b in tracker.n_frame_bbox[-1]:
        x,y,w,h = b
        x,y,w,h = int(x),int(y),int(w),int(h)
        cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)

    for b in box_prev:
        x,y,w,h = b
        x,y,w,h = int(x),int(y),int(w),int(h)
        cv2.rectangle(img, (x,y), (x+w,y+h), (255,255,255), 2)

    for b in box:
        x,y,w,h = b
        x,y,w,h = int(x),int(y),int(w),int(h)
        cv2.rectangle(img, (x,y), (x+w,y+h), (0,0,255), 2)
        
    box_prev = box[:][:]

    plt.figure(figsize=(20,25))
    plt.imshow(img)
    plt.show()
    

In [None]:
# re- detection simulation
i += j +5
frame = cv2.imread(f'../input/tensorflow-great-barrier-reef/train_images/video_{bbox.iloc[i].video_id}/{bbox.iloc[i].video_frame}.jpg')
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
box = eval(bbox.iloc[i].annotations)
box = [[x["x"],x["y"],x["width"],x["height"]] for x in box]
tracker.new_frame(frame,box,[1,1,1,1])
box_prev = box
for j in range(1,4):
    frame = cv2.imread(f'../input/tensorflow-great-barrier-reef/train_images/video_{bbox.iloc[i+j].video_id}/{bbox.iloc[i+j].video_frame}.jpg')
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    box = eval(bbox.iloc[i+j].annotations)
    box = [[x["x"],x["y"],x["width"],x["height"]] for x in box]
    
    
    tracker.new_frame(frame)
    img = frame.copy()
    for b in tracker.n_frame_bbox[-1]:
        x,y,w,h = b
        x,y,w,h = int(x),int(y),int(w),int(h)
        cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)

    for b in box_prev:
        x,y,w,h = b
        x,y,w,h = int(x),int(y),int(w),int(h)
        cv2.rectangle(img, (x,y), (x+w,y+h), (255,255,255), 2)

    for b in box:
        x,y,w,h = b
        x,y,w,h = int(x),int(y),int(w),int(h)
        cv2.rectangle(img, (x,y), (x+w,y+h), (0,0,255), 2)
        
    box_prev = box[:][:]

    plt.figure(figsize=(20,25))
    plt.imshow(img)
    plt.show()
    

In [None]:
tracker.n_frame_conf