# Gathering data

In [11]:
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np 
import os 
import pandas as pd 
import cv2

In [12]:
dataset = []
input_path = 'dataset/baseline/pedestrians/input'

for dirname, _, filenames in os.walk(input_path):
    filenames.sort()  
    for filename in filenames:
        img_path = os.path.join(dirname, filename)
        dataset.append(cv2.imread(img_path))

# Conversion to grayscale

In [13]:
gray_dataset = [cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in dataset]

# Granule Formation with quadtree decomposition

In [14]:
regions_of_interest = []

def quadtree_decomposition(I, threshold, minSize, regions):
    intensityDiff = I.max() - I.min()
    h, w = I.shape

    if intensityDiff > threshold and w > minSize:
        for quadrant in split_image(I):
            quadtree_decomposition(quadrant, threshold, minSize, regions)
    else:
        regions.append(I)

def split_image(I):
    h, w = I.shape
    return (
        I[0:h//2, 0:w//2],
        I[0:h//2, w//2:],
        I[h//2:, 0:w//2],
        I[h//2:, w//2:]
    )

# Rough Entropy Basen Threshold generation

In [15]:
def generate_rough_entropy_based_threshold(granules, prev_threshold=None):
    BASE = 10
    gray_values = [pixel for granule in granules for pixel in granule.flatten()]
    max_gray, min_gray = int(np.max(gray_values)), int(np.min(gray_values))
    gray_range = (max_gray - min_gray) + 1
    offset = min_gray

    object_lower = np.zeros(gray_range)
    object_upper = np.zeros(gray_range)
    background_lower = np.zeros(gray_range)
    background_upper = np.zeros(gray_range)

    if prev_threshold:
        start_threshold = max(prev_threshold - 10, min_gray)
        end_threshold = min(prev_threshold + 10, max_gray)
    else:
        start_threshold = min_gray
        end_threshold = max_gray

    size = granules[0].size

    for g in granules:
        g_min, g_max = int(g.min()), int(g.max())
        for j in range(g_max, end_threshold + 1):
            object_lower[j - offset] += size
        for j in range(g_min, end_threshold + 1):
            object_upper[j - offset] += size
        for j in range(start_threshold, g_min):
            background_lower[j - offset] += size
        for j in range(start_threshold, g_max):
            background_upper[j - offset] += size

    entropy = np.zeros(gray_range)
    for l in range(start_threshold, end_threshold + 1):
        idx = l - offset
        o_rough = 1 - (object_lower[idx] / object_upper[idx]) if object_upper[idx] != 0 else 0
        b_rough = 1 - (background_lower[idx] / background_upper[idx]) if background_upper[idx] != 0 else 0

        o_entropy = 1 if o_rough <= 1 / BASE or o_rough == 0 else o_rough * np.log(o_rough) / np.log(BASE)
        b_entropy = 1 if b_rough <= 1 / BASE or b_rough == 0 else b_rough * np.log(b_rough) / np.log(BASE)
        entropy[idx] = - (BASE / 2) * (o_entropy + b_entropy)

    T_star = np.argmax(entropy) + offset
    return T_star

# Temporal Segmentation Through Background Estimation

In [16]:
def temporal_segmentation(frame_t, f1, f2, f3, E=3):
    f1, f2, f3 = f1.astype(np.float32), f2.astype(np.float32), f3.astype(np.float32)

    optimistic = np.maximum.reduce([f1, f2, f3])
    pessimistic = np.minimum.reduce([f1, f2, f3])
    most_likely = np.median(np.stack([f1, f2, f3], axis=0), axis=0)

    mu = (optimistic + 4 * most_likely + pessimistic) / 6
    sigma = (optimistic - pessimistic) / 6
    diff = np.abs(frame_t.astype(np.float32) - mu)

    return diff > (E * sigma)

# Object tracking initialization


In [17]:
import math

def calculate_tracker_speed(center1, center2):
    dx = center2[0] - center1[0]
    dy = center2[1] - center1[1]
    return math.sqrt(dx**2 + dy**2), dx, dy

for frame in gray_dataset:
    quadtree_decomposition(frame, threshold=10, minSize=10, regions=regions_of_interest)

ground_truth_array = []
gt_path = 'dataset/baseline/pedestrians/groundtruth'
count = 0
for dirname, _, filenames in os.walk(gt_path):
    filenames.sort()
    for filename in filenames:
        if filename.endswith(('.png', '.jpg')):
            gt_img = cv2.imread(os.path.join(dirname, filename), 0)
            ground_truth_array.append(gt_img)
            count += 1
        if count >= 2:
            break
    if count >= 2:
        break

def get_center(mask):
    ys, xs = np.where(mask > 0)
    if len(xs) == 0 or len(ys) == 0:
        return None
    return (int(np.mean(xs)), int(np.mean(ys)))

def init_tracking(gt_array):
    if len(gt_array) < 2:
        return None, None, None

    center_t_minus_1 = get_center(gt_array[1])
    center_t_minus_2 = get_center(gt_array[0])

    if center_t_minus_1 and center_t_minus_2:
        s, dx, dy = calculate_tracker_speed(center_t_minus_2, center_t_minus_1)
        print(f"[Init] speed={s:.2f}, dx={dx}, dy={dy}")
        return center_t_minus_2, center_t_minus_1, (dx, dy)
    else:
        print("[Init] Failed to find object centers.")
        return None, None, None
    
def compute_iou(mask_pred, mask_gt):
    intersection = np.logical_and(mask_pred, mask_gt).sum()
    union = np.logical_or(mask_pred, mask_gt).sum()
    return intersection / union if union > 0 else 0

# Object Tracking Iteration

In [None]:
def semi_supervised_tracking(threshold=10, minSize=10, E=3):
    tracked_frames = []
    bounding_boxes = []
    iou_scores = []
    prev_threshold = None

    center_t_minus_2, center_t_minus_1, motion = init_tracking(ground_truth_array)
    prev_tracker = ground_truth_array[1]
    prev_prev_tracker = ground_truth_array[0]

    for idx in range(3, len(gray_dataset)):
        frame = gray_dataset[idx]
        f1, f2, f3 = gray_dataset[idx - 1], gray_dataset[idx - 2], gray_dataset[idx - 3]
        current_color = dataset[idx]

        granules = []
        quadtree_decomposition(frame, threshold=threshold, minSize=minSize, regions=granules)

        T_star = generate_rough_entropy_based_threshold(granules, prev_threshold)
        _, spatial_segment = cv2.threshold(frame, T_star, 255, cv2.THRESH_BINARY)

        temporal_segment = temporal_segmentation(frame, f1, f2, f3, E=E).astype(np.uint8) * 255

        combined_segment = cv2.bitwise_and(spatial_segment, temporal_segment)

        mask_indices = np.where(combined_segment > 0)
        if len(mask_indices[0]) == 0:
            print(f"[Frame {idx}] No object detected.")
            tracked_frames.append(current_color)
            bounding_boxes.append(None)
            iou_scores.append(0)
            continue

        object_pixels = current_color[mask_indices]
        mean_color = np.mean(object_pixels, axis=0)
        max_dev = np.std(object_pixels, axis=0)

        result_mask = np.zeros_like(frame)
        for y, x in zip(*mask_indices):
            if np.all(np.abs(current_color[y, x] - mean_color) <= max_dev):
                result_mask[y, x] = 255

        ys, xs = np.where(result_mask > 0)
        if len(xs) > 0 and len(ys) > 0:
            x_min, x_max = xs.min(), xs.max()
            y_min, y_max = ys.min(), ys.max()
            bbox_img = cv2.rectangle(current_color.copy(), (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            tracked_frames.append(bbox_img)
            bounding_boxes.append(((x_min, y_min), (x_max, y_max)))

            if idx < len(ground_truth_array):
                iou = compute_iou(result_mask > 0, ground_truth_array[idx] > 0)
                iou_scores.append(iou)
        else:
            tracked_frames.append(current_color)
            bounding_boxes.append(None)
            iou_scores.append(0)

        prev_threshold = T_star
        prev_prev_tracker = prev_tracker
        prev_tracker = result_mask

    return tracked_frames, bounding_boxes, iou_scores


# Usage


In [19]:
results, bboxes, ious = semi_supervised_tracking(threshold=10, minSize=10, E=3)


[Init] speed=0.00, dx=0, dy=0
