In [5]:
import cv2
import numpy as np
from skimage.feature import hog
import os

In [11]:
def extract_rgb_histogram(image, bins=(32, 32, 32)):
    """
    Compute the RGB histogram for an image.
    Args:
        image: Input image as a NumPy array.
        bins: Number of bins for each channel.
    Returns:
        Flattened and normalized histogram as a NumPy array.
    """
    # Compute histogram for each channel
    hist = cv2.calcHist([image], [0, 1, 2], None, bins, [0, 256, 0, 256, 0, 256])
    # Normalize the histogram
    hist = cv2.normalize(hist, hist).flatten()
    return hist


image = cv2.imread("data_normalized/train/1-front/IMG_2031_jpg.rf.14a279d79af06b2a455146268550e03e.jpg")
features = extract_rgb_histogram(image)
print(features)

[0.         0.         0.00199491 ... 0.00299237 0.00099746 0.00199491]


In [7]:
def extract_hog_features(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), orientations=9):
    """
    Compute HOG features for an image.
    Args:
        image: Input image as a NumPy array.
        pixels_per_cell: Size of the cell in pixels.
        cells_per_block: Number of cells per block.
        orientations: Number of orientation bins.
    Returns:
        HOG feature vector as a NumPy array.
    """
    # Convert image to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Compute HOG features
    hog_features = hog(
        gray_image,
        orientations=orientations,
        pixels_per_cell=pixels_per_cell,
        cells_per_block=cells_per_block,
        block_norm='L2-Hys',
        transform_sqrt=True,
        feature_vector=True
    )
    return hog_features

In [8]:
def extract_combined_features(image):
    """
    Combine RGB histogram and HOG features for an image.
    Args:
        image: Input image as a NumPy array.
    Returns:
        Combined feature vector as a NumPy array.
    """
    rgb_hist = extract_rgb_histogram(image)
    hog_features = extract_hog_features(image)
    combined_features = np.concatenate([rgb_hist, hog_features])
    return combined_features


In [9]:
def process_dataset(input_dir, feature_extractor):
    """
    Process all images in a dataset directory and extract features.
    Args:
        input_dir: Path to the dataset directory.
        feature_extractor: Feature extraction function (e.g., extract_rgb_histogram or extract_combined_features).
    Returns:
        A tuple (features, labels) where:
            features: Feature matrix as a NumPy array.
            labels: List of corresponding labels.
    """
    features = []
    labels = []

    for root, _, files in os.walk(input_dir):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg')):
                img_path = os.path.join(root, file)
                image = cv2.imread(img_path)
                # Extract features
                feature_vector = feature_extractor(image)
                features.append(feature_vector)
                # Use the folder name as the label
                label = os.path.basename(root)
                labels.append(label)

    return np.array(features), np.array(labels)

# Example usage
train_features, train_labels = process_dataset("data_normalized/train", extract_combined_features)
val_features, val_labels = process_dataset("data_normalized/val", extract_combined_features)
test_features, test_labels = process_dataset("data_normalized/test", extract_combined_features)

In [12]:
# Define directories for train, val, and test
train_dir = "data_normalized/train"
val_dir = "data_normalized/val"
test_dir = "data_normalized/test"

# Extract features for all splits
train_features, train_labels = process_dataset(train_dir, extract_combined_features)
val_features, val_labels = process_dataset(val_dir, extract_combined_features)
test_features, test_labels = process_dataset(test_dir, extract_combined_features)

# Save the features and labels to files
import pickle

with open("features_train.pkl", "wb") as f:
    pickle.dump((train_features, train_labels), f)

with open("features_val.pkl", "wb") as f:
    pickle.dump((val_features, val_labels), f)

with open("features_test.pkl", "wb") as f:
    pickle.dump((test_features, test_labels), f)

print("Feature extraction complete and saved!")


Feature extraction complete and saved!
