1) wycięcie sylwetek

In [None]:
import cv2
import numpy as np
import os

cap = cv2.VideoCapture('vid1_IR.avi')

os.makedirs('samples', exist_ok=True)

iPedestrian = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    G = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    _, binary = cv2.threshold(G, 35, 255, cv2.THRESH_BINARY)
    binary = cv2.medianBlur(binary, 5)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)

    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary, connectivity=8)

    output = frame.copy()
    boxes = []

    min_area = 1500
    min_aspect_ratio = 1.5
    max_width = 150

    for i in range(1, num_labels):
        x, y, w, h, area = stats[i]

        if area < min_area:
            continue

        aspect_ratio = h / float(w)
        if aspect_ratio < min_aspect_ratio or w > max_width:
            continue

        boxes.append([x, y, x + w, y + h])

    boxes.sort(key=lambda b: b[1])
    merged = []

    for i in range(len(boxes)):
        x1, y1, x2, y2 = boxes[i]
        merged_box = [x1, y1, x2, y2]

        for j in range(i + 1, len(boxes)):
            x1b, y1b, x2b, y2b = boxes[j]
            vertical_gap = y1b - merged_box[3]

            if 0 <= vertical_gap <= 20:
                overlap_w = min(merged_box[2], x2b) - max(merged_box[0], x1b)
                if overlap_w > 0:
                    min_width = min(merged_box[2] - merged_box[0], x2b - x1b)
                    overlap_ratio = overlap_w / float(min_width)

                    if overlap_ratio >= 0.6:
                        merged_box[0] = min(merged_box[0], x1b)
                        merged_box[1] = min(merged_box[1], y1b)
                        merged_box[2] = max(merged_box[2], x2b)
                        merged_box[3] = max(merged_box[3], y2b)

        merged.append(merged_box)

    for box in merged:
        x1, y1, x2, y2 = box

        cv2.rectangle(output, (x1, y1), (x2, y2), (255, 0, 0), 2)

        ROI = G[y1:y2, x1:x2]
        resized_ROI = cv2.resize(ROI, (64, 192))

        filename = 'samples/sample_%06d.png' % iPedestrian
        cv2.imwrite(filename, resized_ROI)
        iPedestrian += 1

    cv2.imshow('IR', output)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

-1

2. stworzenie wzorca

In [19]:
import cv2
import numpy as np
import os

folder = 'samples'
files = sorted([f for f in os.listdir(folder) if f.endswith('.png')])

height, width = 192, 64
PDM = np.zeros((height, width), dtype=np.float32)

num_images = 0

for file in files:
    path = os.path.join(folder, file)
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

    if img is None:
        print(f"Nie udało się wczytać {path}")
        continue

    img = cv2.resize(img, (width, height))
    _, binary = cv2.threshold(img, 40, 1, cv2.THRESH_BINARY)

    PDM += binary.astype(np.float32)
    num_images += 1

if num_images > 0:
    PDM /= num_images
else:
    print("Brak poprawnie wczytanych obrazów!")

PDM_display = (PDM * 255).astype(np.uint8)
cv2.imshow('PDM', PDM_display)
cv2.waitKey(0)
cv2.destroyAllWindows()

cv2.imwrite('pdm_model.png', PDM_display)
np.save('pdm_model.npy', PDM)


3.1. działanie na przykładowej ramce

In [23]:
import cv2
import numpy as np

win_h, win_w = 192, 64
step = 8 

PDM = np.load('pdm_model.npy')
PDM1 = PDM.astype(np.float32)
PDM0 = 1.0 - PDM1

# Wideo
cap = cv2.VideoCapture('vid1_IR.avi')

frame_number = 3100  # numer klatki do analizy
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)

ret, frame = cap.read()
if not ret:
    print("Nie udało się wczytać klatki.")
    cap.release()
    exit()

G = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
_, binary = cv2.threshold(G, 35, 1, cv2.THRESH_BINARY)

h, w = binary.shape
result = np.zeros((h, w), dtype=np.float32)

for y in range(0, h - win_h, step):
    for x in range(0, w - win_w, step):
        window = binary[y:y+win_h, x:x+win_w]
        score = np.sum(window * PDM1 + (1 - window) * PDM0)
        result[y, x] = score

# normalizacja
max_val = np.max(result)
result_norm = (result / max_val * 255).astype(np.uint8)

cv2.imshow('Score Map', result_norm)

# detekcja maksimów
threshold = 0.85 * max_val
result_copy = result.copy()
detections = []

while True:
    min_val, max_score, min_loc, max_loc = cv2.minMaxLoc(result_copy)
    if max_score < threshold:
        break

    x_max, y_max = max_loc
    detections.append((x_max, y_max, max_score))
    cv2.rectangle(frame, (x_max, y_max), (x_max + win_w, y_max + win_h), (0, 255, 0), 2)

    x1 = max(0, x_max - win_w)
    y1 = max(0, y_max - win_h)
    x2 = min(w, x_max + win_w)
    y2 = min(h, y_max + win_h)
    result_copy[y1:y2, x1:x2] = 0

cv2.imshow('Detections', frame)

cv2.waitKey(0)
cv2.destroyAllWindows()
cap.release()


3.2. działanie na sekwencji

In [24]:
import cv2
import numpy as np

win_h, win_w = 192, 64
step = 10

PDM = np.load('pdm_model.npy') 
PDM1 = PDM.astype(np.float32)
PDM0 = 1.0 - PDM1

cap = cv2.VideoCapture('vid1_IR.avi')

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    G = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    _, binary = cv2.threshold(G, 35, 1, cv2.THRESH_BINARY)
    h, w = binary.shape

    # mapa wynikowa
    result = np.zeros((h, w), dtype=np.float32)

    for y in range(0, h - win_h, step):
        for x in range(0, w - win_w, step):
            window = binary[y:y+win_h, x:x+win_w]

            # oblicznie wyniku
            score = np.sum(window * PDM1 + (1 - window) * PDM0)
            result[y, x] = score

    # normalizacja
    max_val = np.max(result)
    if max_val > 0:
        result_norm = (result / max_val * 255).astype(np.uint8)
    else:
        result_norm = result.astype(np.uint8)

    # cv2.imshow('Score Map', result_norm)


    # minimalny próg detekcji
    threshold = 0.85 * max_val
    result_copy = result.copy()
    detections = []

    while True:
        min_val, max_score, min_loc, max_loc = cv2.minMaxLoc(result_copy)

        if max_score < threshold:
            break  

        x_max, y_max = max_loc
        detections.append((x_max, y_max, max_score))

        cv2.rectangle(frame, (x_max, y_max), (x_max + win_w, y_max + win_h), (0, 255, 0), 2)

        x1 = max(0, x_max - win_w)
        y1 = max(0, y_max - win_h)
        x2 = min(w, x_max + win_w)
        y2 = min(h, y_max + win_h)
        result_copy[y1:y2, x1:x2] = 0

    cv2.imshow('Detections', frame)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
