In [None]:
import numpy as np
import scipy.ndimage
import cv2
from sklearn import svm
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

def hog(image, cell_size=8, num_bins=9, angle_range=180):
    if image.ndim == 2:
        image = np.stack([image] * 3, axis=-1)

    gx_kernel = np.array([[-1, 0, 1]])
    gy_kernel = np.array([[-1], [0], [1]])

    dx, dy = np.zeros_like(image, dtype=np.float32), np.zeros_like(image, dtype=np.float32)
    for c in range(3):
        dx[:, :, c] = scipy.ndimage.convolve(image[:, :, c], gx_kernel, mode='reflect')
        dy[:, :, c] = scipy.ndimage.convolve(image[:, :, c], gy_kernel, mode='reflect')

    magnitude = np.sqrt(dx**2 + dy**2)
    max_channel = np.argmax(magnitude, axis=2)

    Gx = np.take_along_axis(dx, max_channel[..., None], axis=2).squeeze(axis=2)
    Gy = np.take_along_axis(dy, max_channel[..., None], axis=2).squeeze(axis=2)

    M = np.sqrt(Gx**2 + Gy**2)
    theta = (np.arctan2(Gy, Gx) * (180 / np.pi)) % angle_range

    height, width = M.shape
    n_cells_y = height // cell_size
    n_cells_x = width // cell_size
    bin_width = angle_range / num_bins

    histograms = np.zeros((n_cells_y, n_cells_x, num_bins), dtype=np.float32)
    for y in range(n_cells_y):
        for x in range(n_cells_x):
            cell_mag = M[y*cell_size:(y+1)*cell_size, x*cell_size:(x+1)*cell_size]
            cell_ori = theta[y*cell_size:(y+1)*cell_size, x*cell_size:(x+1)*cell_size]
            for m, o in zip(cell_mag.flatten(), cell_ori.flatten()):
                bin_idx = int(np.floor(o / bin_width)) % num_bins
                next_bin = (bin_idx + 1) % num_bins
                ratio = (o - bin_idx * bin_width) / bin_width
                histograms[y, x, bin_idx] += m * (1 - ratio)
                histograms[y, x, next_bin] += m * ratio

    eps = 1e-10
    features = []
    for y in range(n_cells_y - 1):
        for x in range(n_cells_x - 1):
            block = np.concatenate((
                histograms[y, x, :],
                histograms[y, x + 1, :],
                histograms[y + 1, x, :],
                histograms[y + 1, x + 1, :]
            ))
            norm = np.sqrt(np.sum(block**2) + eps)
            features.append(block / norm)
    return np.concatenate(features)

HOG_data = []
labels = []

for i in range(100):
    pos_img = cv2.imread(f"pedestrians/pos/per{str(i+1).zfill(5)}.ppm")
    neg_img = cv2.imread(f"pedestrians/neg/neg{str(i+1).zfill(5)}.png")

    if pos_img is not None:
        HOG_data.append(hog(pos_img))
        labels.append(1)

    if neg_img is not None:
        HOG_data.append(hog(neg_img))
        labels.append(0)

HOG_data = np.array(HOG_data)
labels = np.array(labels)


X_temp, X_test, y_temp, y_test = train_test_split(HOG_data, labels, test_size=0.2, stratify=labels, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, stratify=y_temp, random_state=42)

C_values = [0.01, 0.1, 1.0, 10.0, 100.0]
best_C, best_acc = None, 0

print("Hyperparameter tuning on validation set:")
for C in C_values:
    model = svm.SVC(kernel='linear', C=C)
    model.fit(X_train, y_train)
    val_acc = model.score(X_val, y_val)
    print(f"C={C}, Validation Accuracy={val_acc:.4f}")
    if val_acc >= best_acc:
        best_acc, best_C = val_acc, C

print(f"\nBest C = {best_C} with validation accuracy = {best_acc:.4f}")

clf_final = svm.SVC(kernel='linear', C=best_C)
clf_final.fit(X_train, y_train)

print("\nTest set evaluation:")
y_pred = clf_final.predict(X_test)
print(f"Accuracy: {np.mean(y_pred == y_test):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

def pyramid(image, scale=1.5, min_size=(64, 128)):
    yield image
    while True:
        w = int(image.shape[1] / scale)
        h = int(image.shape[0] / scale)
        image = cv2.resize(image, (w, h))
        if w < min_size[0] or h < min_size[1]:
            break
        yield image


def non_max_suppression(boxes, overlapThresh=0.3):
    if len(boxes) == 0:
        return []

    boxes = np.array(boxes).astype("float")
    pick = []

    x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(y2) 
    while len(idxs) > 0:
        last = idxs[-1]
        pick.append(last)

        xx1 = np.maximum(x1[last], x1[idxs[:-1]])
        yy1 = np.maximum(y1[last], y1[idxs[:-1]])
        xx2 = np.minimum(x2[last], x2[idxs[:-1]])
        yy2 = np.minimum(y2[last], y2[idxs[:-1]])

        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
        inter = w * h
        iou = inter / (area[last] + area[idxs[:-1]] - inter)

        idxs = np.delete(idxs, np.concatenate(([len(idxs) - 1], np.where(iou > overlapThresh)[0])))

    return boxes[pick].astype("int")

def detect_and_display(img, score_thresh=0.75):
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_copy = img_rgb.copy()

    window_size = (64, 128)
    stride = 16
    detections = []

    scale_factor = 1.5
    detections = []

    for resized in pyramid(img_rgb, scale=scale_factor):
        scale = img_rgb.shape[1] / resized.shape[1]
        for y in range(0, resized.shape[0] - window_size[1], stride):
            for x in range(0, resized.shape[1] - window_size[0], stride):
                window = resized[y:y + window_size[1], x:x + window_size[0]]
                if window.shape[:2] != (128, 64):
                    continue
                try:
                    descriptor = hog(window)
                    descriptor = descriptor.reshape(1, -1)
                    score = clf_final.decision_function(descriptor)[0]
                    if score > score_thresh:
                        detections.append((int(x*scale), int(y*scale), int((x + 64)*scale), int((y + 128)*scale)))
                except Exception as e:
                    continue


    print(f"Liczba detekcji przed NMS: {len(detections)}")

    if len(detections) > 0:
        final_boxes = non_max_suppression(np.array(detections), overlapThresh=0.3)
        print(f"Liczba detekcji po NMS: {len(final_boxes)}")
    else:
        final_boxes = []
        print("Brak detekcji do przetworzenia przez NMS.")

    for (x1, y1, x2, y2) in final_boxes:
        cv2.rectangle(img_copy, (x1, y1), (x2, y2), (0, 255, 0), 2)

    plt.figure(figsize=(10, 10))
    plt.imshow(img_copy)
    plt.title(f"Wykryte sylwetki (threshold={score_thresh})")
    plt.axis('off')
    plt.show()

In [None]:
image = cv2.imread('test/testImage4.png')
detect_and_display(image, score_thresh=0.85)

In [None]:
image = cv2.imread('test/testImage1.png')
detect_and_display(image, score_thresh=1.25)
