In [1]:
import numpy as np
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split

# Activation functions and utility functions

# ReLU activation function
def relu(Z):
    return np.maximum(0, Z)

# Derivative of ReLU for backpropagation
def relu_derivative(Z):
    return Z > 0

# Softmax activation function for the output layer
def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z))  # Subtract max for numerical stability
    return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

# Cross-entropy loss function
def cross_entropy_loss(Y_true, Y_pred):
    n_samples = Y_true.shape[0]
    logp = - np.log(Y_pred[range(n_samples), Y_true])
    loss = np.sum(logp) / n_samples
    return loss

# Accuracy function
def accuracy(Y_true, Y_pred):
    predictions = np.argmax(Y_pred, axis=1)
    return np.mean(predictions == Y_true)


# Initialize weights and biases for a 2-layer neural network
def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(42)  # For reproducibility

    W1 = np.random.randn(input_size, hidden_size) * 0.01  # Input to hidden layer weights
    b1 = np.zeros((1, hidden_size))  # Hidden layer biases
    W2 = np.random.randn(hidden_size, output_size) * 0.01  # Hidden to output layer weights
    b2 = np.zeros((1, output_size))  # Output layer biases

    return W1, b1, W2, b2


# Forward pass function
def forward_propagation(X, W1, b1, W2, b2):
    # Input to hidden layer
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)

    # Hidden layer to output
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)

    return Z1, A1, Z2, A2


# Backward pass (backpropagation)
def backpropagation(X, Y_true, Z1, A1, Z2, A2, W1, W2):
    n_samples = X.shape[0]

    # Gradient of output layer
    dZ2 = A2
    dZ2[range(n_samples), Y_true] -= 1  # Subtract 1 from the true class's predicted probability
    dZ2 /= n_samples

    # Gradients for W2 and b2
    dW2 = np.dot(A1.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    # Gradient of hidden layer
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_derivative(Z1)

    # Gradients for W1 and b1
    dW1 = np.dot(X.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    return dW1, db1, dW2, db2


# Gradient descent update
def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    return W1, b1, W2, b2


# Training loop for neural network
def train(X, Y, input_size, hidden_size, output_size, epochs, learning_rate):
    W1, b1, W2, b2 = initialize_parameters(input_size, hidden_size, output_size)

    for epoch in range(epochs):
        # Forward pass
        Z1, A1, Z2, A2 = forward_propagation(X, W1, b1, W2, b2)

        # Compute loss and accuracy
        loss = cross_entropy_loss(Y, A2)
        acc = accuracy(Y, A2)

        # Backward pass (compute gradients)
        dW1, db1, dW2, db2 = backpropagation(X, Y, Z1, A1, Z2, A2, W1, W2)

        # Update parameters
        W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)

        # Print loss and accuracy every 100 epochs
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss:.4f}, Accuracy: {acc:.4f}")

    return W1, b1, W2, b2


# Load the LFW dataset
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

# Extract features and labels
X = lfw_people.data  # The face images, already flattened
Y = lfw_people.target  # The person IDs (labels)

# Normalize the pixel values
X = X / 255.0

# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Get the input size (flattened image size) and output size (number of unique people)
input_size = X_train.shape[1]  # Size of flattened images (e.g., 1600 pixels)
hidden_size = 128  # Number of neurons in the hidden layer
output_size = len(lfw_people.target_names)  # Number of unique people (classes)

# Train the model
epochs = 1000  # Number of training epochs
learning_rate = 0.01  # Learning rate for gradient descent
W1, b1, W2, b2 = train(X_train, Y_train, input_size, hidden_size, output_size, epochs, learning_rate)

# Evaluate the model on the test set
Z1, A1, Z2, A2 = forward_propagation(X_test, W1, b1, W2, b2)
test_acc = accuracy(Y_test, A2)
print(f"Test Accuracy: {test_acc:.2f}")


Epoch 0, Loss: 1.9459, Accuracy: 0.1136
Epoch 100, Loss: 1.8705, Accuracy: 0.3990
Epoch 200, Loss: 1.8164, Accuracy: 0.3990
Epoch 300, Loss: 1.7791, Accuracy: 0.3990
Epoch 400, Loss: 1.7540, Accuracy: 0.3990
Epoch 500, Loss: 1.7375, Accuracy: 0.3990
Epoch 600, Loss: 1.7266, Accuracy: 0.3990
Epoch 700, Loss: 1.7193, Accuracy: 0.3990
Epoch 800, Loss: 1.7145, Accuracy: 0.3990
Epoch 900, Loss: 1.7111, Accuracy: 0.3990


KeyboardInterrupt: 