# Week 3 â€“ HOG Feature Extraction

This notebook demonstrates classical HOG features and PCA visualization.

In [None]:

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

from skimage.feature import hog
from skimage import exposure
from sklearn.decomposition import PCA


In [None]:

IMG_DIR = "image/image"
img_name = os.listdir(IMG_DIR)[0]
img_path = os.path.join(IMG_DIR, img_name)

img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.imshow(img)
plt.axis("off")
plt.title("Original Image")
plt.show()


In [None]:

gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

plt.imshow(gray, cmap="gray")
plt.axis("off")
plt.title("Grayscale Image")
plt.show()


In [None]:

hog_features, hog_image = hog(
    gray,
    orientations=9,
    pixels_per_cell=(8, 8),
    cells_per_block=(2, 2),
    block_norm="L2-Hys",
    visualize=True,
    feature_vector=True
)

print("HOG feature vector length:", hog_features.shape)


In [None]:

hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

plt.figure(figsize=(6,6))
plt.imshow(hog_image_rescaled, cmap="gray")
plt.axis("off")
plt.title("HOG Visualization")
plt.show()


In [None]:

hog_vectors = []

for img_name in os.listdir(IMG_DIR)[:300]:
    path = os.path.join(IMG_DIR, img_name)
    img = cv2.imread(path)
    if img is None:
        continue
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(img, (128,128))

    features = hog(
        img,
        orientations=9,
        pixels_per_cell=(8,8),
        cells_per_block=(2,2),
        block_norm="L2-Hys",
        feature_vector=True
    )
    hog_vectors.append(features)

hog_vectors = np.array(hog_vectors)
print("HOG dataset shape:", hog_vectors.shape)


In [None]:

pca = PCA(n_components=2)
hog_pca = pca.fit_transform(hog_vectors)

plt.figure(figsize=(6,6))
plt.scatter(hog_pca[:,0], hog_pca[:,1], alpha=0.5)
plt.title("PCA of HOG Features")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.show()



## Observations

- HOG captures gradient and edge-based features.
- Features are hand-crafted and fixed.
- PCA shows limited structure compared to CNN embeddings.
- CNN embeddings are more expressive and data-driven.
