In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay,classification_report
# If running in Colab keep the next import, otherwise comment it out when running locally
try:
    from google.colab import files
except Exception:
    files = None
import zipfile
import os
import cv2
from skimage.feature import hog
from skimage import exposure

In [None]:
# In Colab: use files.upload() to upload Faces.zip; locally, place Faces.zip in the repo or update paths
if files is not None:
    uploaded = files.upload()
else:
    uploaded = {}


In [None]:
# adjust paths if running locally
zip_path = "/content/Faces.zip"  # Colab default
extract_path = "/content/images"

# If running locally and Faces.zip exists in repo root, uncomment the next two lines and update paths:
# zip_path = "Faces.zip"
# extract_path = "./images"

if os.path.exists(zip_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
else:
    print('Zip file not found at', zip_path)


In [None]:
# Load images and compute HOG visualizations
image_folder = "/content/images/Faces"
images = []
labels = []

# If running locally change image_folder accordingly, e.g. './images/Faces'
if os.path.exists(image_folder):
    for filename in os.listdir(image_folder):
        img_path = os.path.join(image_folder, filename)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            continue
        hog_features, hog_image = hog(img,orientations=10, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)
        hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))
        images.append(hog_image_rescaled)
        labels.append(filename.split("_")[0])
else:
    print('Image folder not found at', image_folder)


In [None]:
# Quick visual check (only if images were loaded)
if len(images) > 0:
    X = np.array(images)
    y = np.array(labels)
    fig, axes = plt.subplots(2, min(5, X.shape[0]//2 or 1), figsize=(15, 5))
    for i, ax in enumerate(axes.flat):
        if i >= X.shape[0]:
            break
        ax.imshow(X[i], cmap="gray")
        ax.set_title(f"{y[i]}")
        ax.axis("off")
    plt.show()
else:
    print('No images to display')


In [None]:
# PCA exploration and dimensionality reduction (if images exist)
if len(images) > 0:
    pca = PCA()
    X = X.reshape(X.shape[0], -1)
    X_pca = pca.fit_transform(X)
    explained_variance = np.cumsum(pca.explained_variance_ratio_)
    plt.figure(figsize=(8, 5))
    plt.plot(range(1, len(explained_variance) + 1), explained_variance, marker='o', linestyle='--')
    plt.xlabel("# of components")
    plt.ylabel("Cumulative explained variance")
    plt.title("PCA explained variance")
    plt.axhline(y=0.95, color='r', linestyle='--', label="95%")
    plt.legend()
    plt.grid()
    plt.show()
    optimal_components = np.argmax(explained_variance >= 0.95) + 1
    print(f"# of components for 95% variance: {optimal_components}")
else:
    print('Skipping PCA — no images loaded')


In [None]:
# Train / test split and model training (run only after data prep)
if len(images) > 0:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    pca = PCA(n_components=0.95, whiten=True, random_state=42)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)
else:
    print('No data prepared for training — load images first')


## SVC example (GridSearch)

In [None]:
# Example: only run after X_train_pca is available
svc = SVC()
param_grid = {
    'C': [0.1, 0.5, 1, 10],
    'gamma': ['scale', 'auto'],
    'kernel': ['linear', 'rbf'],
}
from sklearn.model_selection import GridSearchCV
GScv = GridSearchCV(svc, param_grid, cv=5, scoring="accuracy", return_train_score=True)
# GScv.fit(X_train_pca, y_train)  # uncomment to run grid search


In [None]:
# Predict / evaluate — run after training
# y_predict = estimator.predict(X_test_pca)
# print(accuracy_score(y_test, y_predict))


## KNN / Logistic Regression examples

In [None]:
from sklearn.neighbors import KNeighborsClassifier
# Example: knn = KNeighborsClassifier(n_neighbors=3)
# knn.fit(X_train_pca, y_train)
# pred = knn.predict(X_test_pca)


In [None]:
# Logistic regression example
logistic = LogisticRegression()
# logistic.fit(X_train_pca, y_train)
# print(logistic.score(X_test_pca, y_test))
