In [25]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output

things to add and consider:
1. apply template matching only on smaller patch of image (ROI technically) based on previous location to reduce computational costs
2. add template bank
3. make template background 'transparent', so templatenya tuh bukan kotak but the actual foreground. this is to prevent objects in the background from moving the template's focus

In [22]:
def read_video_first_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    if not ret:
        print("Cannot read video.")
        cap.release()
        return None, None, None
    return cap, frame, ret

def load_template(initial_template_path, frame_gray):
    template = cv2.imread(initial_template_path, cv2.IMREAD_GRAYSCALE)
    if template is None:
        print("Template image not found or failed to load.")
        return None

    coords = cv2.findNonZero(template)
    if coords is None:
        print("No non-zero pixels in template.")
        return None

    x, y, w, h = cv2.boundingRect(coords)
    cropped_template = frame_gray[y:y+h, x:x+w]
    return cropped_template, (x, y, w, h)

def setup_video_writer(output_path, frame_shape, fps):
    frame_h, frame_w = frame_shape[:2]
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_w, frame_h))
    print(f"💾 Output video will be saved to: {output_path}")
    return out

def find_best_match_locations(frame_gray, template, threshold):
    res = cv2.matchTemplate(frame_gray, template, cv2.TM_CCOEFF_NORMED)
    loc = np.where(res >= threshold)
    boxes = []
    scores = []
    h, w = template.shape

    for pt in zip(*loc[::-1]):
        boxes.append([pt[0], pt[1], w, h])
        scores.append(res[pt[1], pt[0]])

    return boxes, scores, res.max()

def apply_nms(boxes, scores, threshold, nms_thresh):
    if not boxes:
        return []

    indices = cv2.dnn.NMSBoxes(boxes, scores, threshold, nms_thresh)
    if len(indices) == 0:
        return []

    # Flatten indices to list of integers
    return [i[0] if isinstance(i, (list, np.ndarray)) else i for i in indices]

def update_template_from_box(frame_gray, box):
    x, y, w, h = box
    new_template = frame_gray[y:y+h, x:x+w]
    coords = cv2.findNonZero(new_template)
    if coords is not None:
        x_, y_, w_, h_ = cv2.boundingRect(coords)
        return new_template[y_:y_+h_, x_:x_+w_]
    return new_template

In [29]:
def draw_tracking_box(frame, box):
    x, y, w, h = box
    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 255), 2)

def display_frame(frame, frame_idx):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    clear_output(wait=True)
    plt.figure(figsize=(10, 5))
    plt.imshow(frame_rgb)
    plt.title(f"Frame {frame_idx}")
    plt.axis('off')
    plt.show()

def get_patch(frame_gray, template_coords, padding=10):
    x, y, w, h = template_coords
    x_start = max(0, x - padding)
    y_start = max(0, y - padding)
    x_end = min(frame_gray.shape[1], x + w + padding)
    y_end = min(frame_gray.shape[0], y + h + padding)

    patch = frame_gray[y_start:y_end, x_start:x_end]
    patch_loc = (x_start, y_start)  # top-left of patch

    return patch, patch_loc

def display_patch(patch, frame_idx, padding):
    plt.figure(figsize=(4, 4))
    plt.imshow(patch, cmap='gray')
    plt.title(f"Patch (Frame {frame_idx}, Padding {padding})")
    plt.axis('off')
    plt.show()

def dynamic_template_matching(video_path, initial_template_path,
                              output_path="../../outputs/dttm_output.mp4",
                              threshold=0.7, nms_thresh=0.3):

    padding = 10  # initial padding
    max_padding = 50
    min_padding = 5
    padding_step = 5
    
    cap, frame, ret = read_video_first_frame(video_path)
    if not ret:
        return

    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    template, template_coords = load_template(initial_template_path, frame_gray)
    if template is None:
        cap.release()
        return

    fps = cap.get(cv2.CAP_PROP_FPS) or 30
    out = setup_video_writer(output_path, frame.shape, fps)

    frame_idx = 0

    while True:
        if frame_idx != 0:
            ret, frame = cap.read()
            if not ret:
                print("Video ended.")
                break
            frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        img_patch, patch_loc = get_patch(frame_gray, template_coords)
        # display_patch(img_patch, frame_idx, padding)  # ✅ visualize patch
        boxes, scores, max_val = find_best_match_locations(img_patch, template, threshold)
        
        print(f"[Frame {frame_idx}] Max match score: {max_val:.3f}")

        indices = apply_nms(boxes, scores, threshold, nms_thresh)
        if indices:
            padding = max(min_padding, padding - padding_step) # update padding
            
            best_idx = indices[0]
            best_box = boxes[best_idx]

            px, py = patch_loc
            global_box = (best_box[0] + px, best_box[1] + py, best_box[2], best_box[3])

            print(f"  ↪ Object tracked at {global_box}")
            draw_tracking_box(frame, global_box)

            new_template = update_template_from_box(frame_gray, global_box)
            if new_template is not None:
                template = new_template
                template_coords = global_box  # ✅ update coords for next patch
                print("  ↪ Template updated.\n")
            else:
                print("  ⚠️ No match above threshold or NMS removed all boxes.\n")
        else:
            padding = min(max_padding, padding + padding_step) # update padding

        out.write(frame)
        display_frame(frame, frame_idx)

        frame_idx += 1

    cap.release()
    out.release()
    print("🎥 Video saved successfully.")


In [28]:
video_path = "../../videos/ficen_trimmed.mp4"
template_path = "../../outputs/segmented/ficen_trimmed_segmented.png"

dynamic_template_matching(
    video_path=video_path,
    initial_template_path=template_path,
    threshold=0.3,     # Matching confidence
    nms_thresh=0.3     # NMS IoU threshold
)

KeyboardInterrupt: 