# Eigenfaces - PCA for Face Recognition

Applying PCA to face images:
1. Load face dataset
2. Apply PCA
3. Visualize eigenfaces
4. Reconstruct faces
5. Face recognition

---

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_olivetti_faces
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

sns.set_style('white')
plt.rcParams['figure.figsize'] = (12, 6)
np.random.seed(42)

---
## Load Olivetti Faces Dataset

400 face images (64x64 pixels) of 40 people.

In [None]:
# Load dataset
faces_data = fetch_olivetti_faces(shuffle=True, random_state=42)
faces = faces_data.data
face_images = faces_data.images
n_samples, h, w = face_images.shape

print(f'Dataset: {n_samples} images')
print(f'Image size: {h}x{w} = {h*w} pixels')
print(f'Data matrix shape: {faces.shape}')

### Visualize Sample Faces

In [None]:
fig, axes = plt.subplots(2, 6, figsize=(12, 4))
for i, ax in enumerate(axes.ravel()):
    ax.imshow(face_images[i], cmap='gray')
    ax.axis('off')
plt.suptitle('Sample Faces from Olivetti Dataset', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

---
## Apply PCA

Reduce from 4096 dimensions to much fewer.

In [None]:
# Apply PCA
n_components = 150
pca = PCA(n_components=n_components, whiten=True)
faces_pca = pca.fit_transform(faces)

print(f'Original dimensions: {faces.shape}')
print(f'Reduced dimensions: {faces_pca.shape}')
print(f'Variance explained: {np.sum(pca.explained_variance_ratio_)*100:.2f}%')

### Variance Explained

In [None]:
cumsum = np.cumsum(pca.explained_variance_ratio_)

plt.figure(figsize=(10, 6))
plt.plot(cumsum, linewidth=2)
plt.axhline(y=0.95, color='r', linestyle='--', label='95% variance')
plt.axhline(y=0.99, color='orange', linestyle='--', label='99% variance')
plt.xlabel('Number of Components', fontsize=12)
plt.ylabel('Cumulative Variance Explained', fontsize=12)
plt.title('How Many Eigenfaces Do We Need?', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# Find how many for 95% and 99%
n_95 = np.argmax(cumsum >= 0.95) + 1
n_99 = np.argmax(cumsum >= 0.99) + 1
print(f'Components for 95% variance: {n_95}')
print(f'Components for 99% variance: {n_99}')

---
## Visualize Eigenfaces

Eigenvectors reshaped into images.

In [None]:
# Get eigenfaces (principal components)
eigenfaces = pca.components_.reshape((n_components, h, w))

# Plot first 15 eigenfaces
fig, axes = plt.subplots(3, 5, figsize=(12, 7))
for i, ax in enumerate(axes.ravel()):
    ax.imshow(eigenfaces[i], cmap='gray')
    ax.set_title(f'Eigenface {i+1}', fontsize=10)
    ax.axis('off')
plt.suptitle('Top 15 Eigenfaces (Principal Components)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

**Observation**: Eigenfaces capture key facial features - lighting, expressions, orientations.

---
## Reconstruct Faces with Different Components

In [None]:
# Choose a face to reconstruct
face_idx = 0
original_face = face_images[face_idx]

# Reconstruct with different numbers of components
n_components_list = [10, 25, 50, 100, 150]

fig, axes = plt.subplots(2, 3, figsize=(12, 8))
axes = axes.ravel()

# Original
axes[0].imshow(original_face, cmap='gray')
axes[0].set_title('Original', fontsize=12, fontweight='bold')
axes[0].axis('off')

# Reconstructions
for i, n_comp in enumerate(n_components_list):
    pca_temp = PCA(n_components=n_comp)
    pca_temp.fit(faces)
    
    # Transform and inverse transform
    face_encoded = pca_temp.transform(faces[face_idx:face_idx+1])
    face_reconstructed = pca_temp.inverse_transform(face_encoded)
    face_reconstructed = face_reconstructed.reshape(h, w)
    
    var_exp = np.sum(pca_temp.explained_variance_ratio_) * 100
    
    axes[i+1].imshow(face_reconstructed, cmap='gray')
    axes[i+1].set_title(f'{n_comp} components ({var_exp:.1f}% var)', fontsize=11)
    axes[i+1].axis('off')

plt.suptitle('Face Reconstruction with Different Numbers of Eigenfaces', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

### Observation:
- **10 components**: Blurry, captures basic structure
- **25 components**: Recognizable
- **50+ components**: Very close to original

**Compression**: 64×64 = 4096 pixels → 50 numbers! (98.8% reduction)

---
## Face Recognition with Eigenfaces

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    faces, faces_data.target, test_size=0.25, random_state=42
)

# Apply PCA
n_comp_recog = 100
pca_recog = PCA(n_components=n_comp_recog, whiten=True)
X_train_pca = pca_recog.fit_transform(X_train)
X_test_pca = pca_recog.transform(X_test)

print(f'Training set: {X_train.shape} → {X_train_pca.shape}')
print(f'Test set: {X_test.shape} → {X_test_pca.shape}')

# Simple nearest neighbor classifier
def predict_face(X_train_pca, y_train, X_test_pca):
    predictions = []
    for test_face in X_test_pca:
        # Find nearest neighbor in PCA space
        distances = np.linalg.norm(X_train_pca - test_face, axis=1)
        nearest_idx = np.argmin(distances)
        predictions.append(y_train[nearest_idx])
    return np.array(predictions)

# Predict
y_pred = predict_face(X_train_pca, y_train, X_test_pca)
accuracy = np.mean(y_pred == y_test)

print(f'\nRecognition accuracy: {accuracy*100:.2f}%')

### Visualize Predictions

In [None]:
# Show some predictions
fig, axes = plt.subplots(2, 5, figsize=(14, 6))
for i, ax in enumerate(axes.ravel()):
    test_face = X_test[i].reshape(h, w)
    ax.imshow(test_face, cmap='gray')
    
    correct = y_test[i] == y_pred[i]
    color = 'green' if correct else 'red'
    ax.set_title(f'True: {y_test[i]}, Pred: {y_pred[i]}', color=color, fontsize=10)
    ax.axis('off')

plt.suptitle('Face Recognition Results (Green=Correct, Red=Wrong)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

---
## Summary

### Eigenfaces Approach:
1. **Collect** face images (flatten to vectors)
2. **Apply PCA** to find eigenfaces
3. **Project** faces into eigenface space (dimensionality reduction)
4. **Store** low-dimensional representations
5. **Recognize** by finding nearest neighbor in PCA space

### Benefits:
- **Compression**: 4096 → ~100 dimensions (97% reduction)
- **Speed**: Faster comparisons in low-D space
- **Noise reduction**: Captures essential features
- **Works well**: High recognition accuracy

### Limitations:
- Sensitive to lighting and pose
- Linear method (modern: use deep learning)
- Requires aligned faces

### Applications:
- Face recognition systems
- Face verification
- Avatar generation
- Video compression

**Interview Tip**: "Eigenfaces applies PCA to face images, treating each pixel as a feature. The eigenfaces capture key facial variations. By projecting into this low-dimensional space, we achieve massive compression (4096→100 dims) while maintaining recognition accuracy. It's a classic example of PCA for real-world dimensionality reduction."