In [1]:
pip install scikit-image

Collecting scikit-image
  Downloading scikit_image-0.24.0-cp312-cp312-win_amd64.whl.metadata (14 kB)
Collecting networkx>=2.8 (from scikit-image)
  Downloading networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting imageio>=2.33 (from scikit-image)
  Downloading imageio-2.35.1-py3-none-any.whl.metadata (4.9 kB)
Collecting tifffile>=2022.8.12 (from scikit-image)
  Downloading tifffile-2024.8.30-py3-none-any.whl.metadata (31 kB)
Collecting lazy-loader>=0.4 (from scikit-image)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Downloading scikit_image-0.24.0-cp312-cp312-win_amd64.whl (12.9 MB)
   ---------------------------------------- 0.0/12.9 MB ? eta -:--:--
    --------------------------------------- 0.3/12.9 MB ? eta -:--:--
   -- ------------------------------------- 0.8/12.9 MB 2.6 MB/s eta 0:00:05
   ---- ----------------------------------- 1.3/12.9 MB 2.7 MB/s eta 0:00:05
   ----- ---------------------------------- 1.8/12.9 MB 2.7 MB/s eta 0:00:05
   ------- --

In [2]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import numpy as np
from skimage import io, color
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Path to the AT&T dataset
dataset_path = './dataset/'

# Load the dataset
def load_atnt_dataset(dataset_path):
    images = []
    labels = []
    
    for subject_dir in os.listdir(dataset_path):
        subject_path = os.path.join(dataset_path, subject_dir)
        if os.path.isdir(subject_path):
            label = int(subject_dir.replace("s", ""))  # Convert folder name to label
            for img_name in os.listdir(subject_path):
                img_path = os.path.join(subject_path, img_name)
                img = io.imread(img_path)
                if len(img.shape) == 3:  # Convert to grayscale if needed
                    img = color.rgb2gray(img)
                images.append(img)
                labels.append(label)
    
    return np.array(images), np.array(labels)

# Split the dataset into train, validation, and test sets
def split_dataset(images, labels):
    # 60% for training, 20% for validation, 20% for testing
    X_train, X_temp, y_train, y_temp = train_test_split(images, labels, test_size=0.6, stratify=labels, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)
    
    return X_train, X_val, X_test, y_train, y_val, y_test

# Extract HOG features from the dataset
def extract_hog_features(images):
    hog_features = []
    
    for img in images:
        feature, _ = hog(img, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys', visualize=True)
        hog_features.append(feature)
    
    return np.array(hog_features)

# Evaluate the model on validation and test sets
def evaluate_model(model, X, y_true):
    y_pred = model.predict(X)
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    
    return accuracy, precision, recall, f1

# Main function to run HOG + KNN on the AT&T dataset
def main():
    # Load the dataset
    images, labels = load_atnt_dataset(dataset_path)
    
    # Split the dataset
    X_train, X_val, X_test, y_train, y_val, y_test = split_dataset(images, labels)
    
    # Extract HOG features
    X_train_hog = extract_hog_features(X_train)
    X_val_hog = extract_hog_features(X_val)
    X_test_hog = extract_hog_features(X_test)
    
    # Train KNN classifier
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_train_hog, y_train)
    
    # Evaluate on validation set
    val_acc, val_prec, val_recall, val_f1 = evaluate_model(knn, X_val_hog, y_val)
    print(f"Validation Set - Accuracy: {val_acc:.4f}, Precision: {val_prec:.4f}, Recall: {val_recall:.4f}, F1 Score: {val_f1:.4f}")
    
    # Evaluate on test set
    test_acc, test_prec, test_recall, test_f1 = evaluate_model(knn, X_test_hog, y_test)
    print(f"Test Set - Accuracy: {test_acc:.4f}, Precision: {test_prec:.4f}, Recall: {test_recall:.4f}, F1 Score: {test_f1:.4f}")

if __name__ == "__main__":
    main()


Validation Set - Accuracy: 0.6667, Precision: 0.6855, Recall: 0.6667, F1 Score: 0.6239
Test Set - Accuracy: 0.7667, Precision: 0.8175, Recall: 0.7667, F1 Score: 0.7580


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
