In [1]:
# HOG + KNN dataset handled.

In [2]:
pip install scikit-image




In [3]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [7]:
import os
import numpy as np
from skimage import io, color, transform
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Path to the AT&T dataset
dataset_path = './dataset/'

# Load and preprocess the dataset
def load_and_preprocess_atnt_dataset(dataset_path):
    images = []
    labels = []
    
    for subject_dir in os.listdir(dataset_path):
        subject_path = os.path.join(dataset_path, subject_dir)
        if os.path.isdir(subject_path):
            label = int(subject_dir.replace("s", ""))  # Convert folder name to label
            for img_name in os.listdir(subject_path):
                img_path = os.path.join(subject_path, img_name)
                img = io.imread(img_path)
                # if len(img.shape) == 3:  # Convert to grayscale if needed
                #     img = color.rgb2gray(img)
                
                # Resize and normalize
                img = transform.resize(img, (112, 92), mode='reflect')  # Resize to desired shape
                # img = img / 255.0  # Normalize to [0, 1]
                
                images.append(img)
                labels.append(label)
    
    return np.array(images), np.array(labels)

# Split the dataset into train, validation, and test sets
from sklearn.model_selection import train_test_split

def split_dataset(images, labels):
    # Split dataset into train and test
    X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.4, random_state=42, stratify=labels)
    # Verify the proportions
    print(f"Training set size: {X_train.shape[0]} samples")
    print(f"Test set size: {X_test.shape[0]} samples")
    # Return the split datasets
    return X_train, X_test, y_train, y_test


# Extract HOG features from the dataset
def extract_hog_features(images):
    hog_features = []
    
    for img in images:
        feature, _ = hog(img, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys', visualize=True)
        hog_features.append(feature)
    
    return np.array(hog_features)

# Evaluate the model on validation and test sets
def evaluate_model(model, X, y_true):
    y_pred = model.predict(X)
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    
    return accuracy, precision, recall, f1

# Main function to run HOG + KNN on the AT&T dataset
def main():
    # Load and preprocess the dataset
    images, labels = load_and_preprocess_atnt_dataset(dataset_path)
    
    # Split the dataset
    X_train, X_test, y_train, y_test = split_dataset(images, labels)
    
    # Extract HOG features
    X_train_hog = extract_hog_features(X_train)
    # X_val_hog = extract_hog_features(X_val)
    X_test_hog = extract_hog_features(X_test)
    
    # Train KNN classifier
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_train_hog, y_train)
    
    # Evaluate on validation set
    # val_acc, val_prec, val_recall, val_f1 = evaluate_model(knn, X_val_hog, y_val)
    # print(f"Validation Set - Accuracy: {val_acc:.4f}, Precision: {val_prec:.4f}, Recall: {val_recall:.4f}, F1 Score: {val_f1:.4f}")
    
    # Evaluate on test set
    test_acc, test_prec, test_recall, test_f1 = evaluate_model(knn, X_test_hog, y_test)
    print(f"Test Set - Accuracy: {test_acc:.4f}, Precision: {test_prec:.4f}, Recall: {test_recall:.4f}, F1 Score: {test_f1:.4f}")

if __name__ == "__main__":
    main()


Training set size: 240 samples
Test set size: 160 samples
Test Set - Accuracy: 0.8187, Precision: 0.8612, Recall: 0.8187, F1 Score: 0.8084
