**Project-7 Title: Decision Tree with CNN Features for Face Recognition**
            *Submitted By: Lakshmi Sai Praneet Sadhuneni(AP21110010206)*

In [3]:
#Load and Preprocess the data
from sklearn.datasets import fetch_lfw_people  # Import the function to load the LFW dataset
import numpy as np  # Import NumPy for numerical operations

# Load the LFW dataset with specified parameters
lfw_dataset = fetch_lfw_people(min_faces_per_person=150, resize=0.4)  # Load images of people with at least 150 samples, resized to 40%

X = lfw_dataset.images  # Extract the grayscale face images from the dataset as a NumPy array
y = lfw_dataset.target  # Extract the labels corresponding to each face image
target_names = lfw_dataset.target_names  # Extract the names of individuals as a list of strings

# Normalize pixel values to the range [0, 1] to standardize the input data
X = X / 255.0  # Divide all pixel intensity values by 255 (the max pixel value for images)

# Reshape the images to add a channel dimension for compatibility with ResNet
X_reshaped = np.expand_dims(X, axis=-1)  # Change shape from (n_samples, height, width) to (n_samples, height, width, 1)

# Print the total number of samples loaded in the dataset
print(f"Dataset loaded with {len(X)} samples.")  # Verify and display the dataset size


Dataset loaded with 766 samples.


In [None]:
#Now we start coding the resnet(CNN) utility functions

#First, the convolution layer

import numpy as np

def conv2d(X, W, stride=1, padding=1): #Applies a 2D convolution over an input image


    # Get the shapes of the input and filter
    N, C, H, Width = X.shape  # N: batch size, C: channels, H: height, Width: width
    F, _, HH, WW = W.shape  # F: number of filters, HH: filter height, WW: filter width

    # Compute output dimensions
    out_height = (H - HH) // stride + 1  # Height of the output feature map
    out_width = (W - WW) // stride + 1  # Width of the output feature map

    # Initialize the output tensor with zeros
    out = np.zeros((N, F, out_height, out_width))  # Shape: (batch size, filters, height, width)

    # Perform convolution operation
    for n in range(N):  # Loop over each image in the batch
        for f in range(F):  # Loop over each filter
            for i in range(out_height):  # Loop over each row of the output feature map
                for j in range(out_width):  # Loop over each column of the output feature map
                    # Extract a region of the input image of size (HH, WW)
                    region = X[n, :, i * stride:i * stride + HH, j * stride:j * stride + WW]

                    # Perform element-wise multiplication and sum the results
                    out[n, f, i, j] = np.sum(region * W[f, :, :, :])

    # Return the output tensor after convolution
    return out

In [None]:
#After the convolution layer, lets define the Relu activation function

def relu(X):
    return np.maximum(0, X)

In [None]:
#Define the batch normalisation function
def batch_norm(X, gamma, beta, eps=1e-5):

    # Compute the mean across the batch, height, and width dimensions for each channel
    mean = np.mean(X, axis=(0, 2, 3), keepdims=True)  # Shape: (1, C, 1, 1)

    # Compute the variance across the batch, height, and width dimensions for each channel
    var = np.var(X, axis=(0, 2, 3), keepdims=True)  # Shape: (1, C, 1, 1)

    # Normalize the input by subtracting the mean and dividing by the standard deviation
    X_hat = (X - mean) / np.sqrt(var + eps)  # Shape: (N, C, H, W)

    # Scale the normalized input using gamma and shift using beta
    return gamma * X_hat + beta  # Shape: (N, C, H, W)

In [None]:
#Function for max pooling
def max_pool(X, size=2, stride=2):

    # Extract the shape of the input tensor
    N, C, H, W = X.shape  # N: batch size, C: channels, H: height, W: width

    # Compute the dimensions of the output tensor
    out_height = (H - size) // stride + 1  # Height of the pooled output
    out_width = (W - size) // stride + 1  # Width of the pooled output

    # Initialize the output tensor with zeros
    out = np.zeros((N, C, out_height, out_width))  # Shape: (N, C, out_height, out_width)

    # Perform max pooling operation
    for n in range(N):  # Loop over each sample in the batch
        for c in range(C):  # Loop over each channel
            for i in range(out_height):  # Loop over each row of the output
                for j in range(out_width):  # Loop over each column of the output
                    # Extract the region corresponding to the pooling window
                    region = X[n, c, i * stride:i * stride + size, j * stride:j * stride + size]

                    # Compute the maximum value in the region and assign it to the output
                    out[n, c, i, j] = np.max(region)

    # Return the pooled output tensor
    return out

In [None]:
def residual_block(X, W1, W2, gamma, beta, W_skip):

    # Apply the first convolution, batch normalization, and ReLU activation
    Y = conv2d(X, W1, stride=1, padding=1)  # Convolution with weights W1
    Y = batch_norm(Y, gamma, beta)  # Batch normalization
    Y = relu(Y)  # ReLU activation

    # Apply the second convolution and batch normalization
    Y = conv2d(Y, W2, stride=1, padding=1)  # Convolution with weights W2
    Y = batch_norm(Y, gamma, beta)  # Batch normalization

    # If dimensions of X and Y differ, adjust X using a 1x1 convolution
    if W_skip is not None:
        X = conv2d(X, W_skip, stride=1, padding=0)  # 1x1 convolution for dimension matching

    # Add the shortcut connection and apply ReLU activation
    return relu(Y + X)  # Element-wise addition and activation

In [None]:
def resnet_block(X, num_blocks, num_filters):#Adds a series of residual blocks into the network

    # Get the number of input channels from the input tensor
    input_channels = X.shape[1]

    # Add the specified number of residual blocks
    for _ in range(num_blocks):
        # Initialize weights for the two convolution layers
        W1 = np.random.randn(num_filters, input_channels, 3, 3).astype(np.float32) * 0.01
        W2 = W1  # Sharing the same weights for simplicity
        # Initialize batch normalization parameters
        gamma = np.ones((1, num_filters, 1, 1), dtype=np.float32)  # Scaling
        beta = np.zeros((1, num_filters, 1, 1), dtype=np.float32)  # Shifting



        # Apply a residual block, including a 1x1 convolution for dimension matching
        X = residual_block(X, W1, W2, gamma, beta, W_skip=np.random.randn(num_filters, input_channels, 1, 1) * 0.01)

        # Update the input channels to match the number of filters for the next block
        input_channels = num_filters

    return X


def resnet(X): #Function that Aligns the Resnet like neural network

    # Initial convolution layer with 16 filters, followed by ReLU activation
    W = np.random.randn(16, 1, 3, 3) * 0.01  # 16 filters, 1 input channel, 3x3 kernel
    X = conv2d(X, W, stride=1, padding=1)  # Apply convolution
    X = relu(X)  # Apply ReLU activation

    # Apply max pooling with a 2x2 window and stride 2
    X = max_pool(X, size=2, stride=2)

    # Add the first stage of residual blocks (2 blocks, 16 filters each)
    X = resnet_block(X, num_blocks=2, num_filters=16)

    # Add the second stage of residual blocks (2 blocks, 32 filters each)
    X = resnet_block(X, num_blocks=2, num_filters=32)

    # Apply global average pooling to reduce spatial dimensions to a single value per channel
    X = X.mean(axis=(2, 3))  # Take the mean across height and width

    # Return the final feature representation
    return X

In [None]:
# Extract features from training and testing data using ResNet
features_train = resnet(X_train)  # Apply the ResNet model on training data
features_test = resnet(X_test)  # Apply the ResNet model on testing data



In [4]:
#Defining a custom decision-tree based classifier.

class DecisionTree:
    def __init__(self, max_depth=None): #Initialize the decision tree.
        self.max_depth = max_depth  # Store the maximum depth
        self.tree = None  # Initialize the tree structure

    def fit(self, X, y):

        self.tree = self._build_tree(X, y, depth=0)  # Build the tree starting at depth 0

    def predict(self, X): #Function to predict the labels for the input data
        # Use the built tree to predict each sample
        return np.array([self._predict_single(self.tree, sample) for sample in X])

    def _build_tree(self, X, y, depth): # Function to recursively build the decision tree

        # Check stopping conditions
        if len(np.unique(y)) == 1 or (self.max_depth and depth >= self.max_depth):
            return np.mean(y)  # Return the average label for the leaf node

        # Find the best feature and threshold for splitting
        feature, threshold = self._find_best_split(X, y)

        # Split the data into left and right branches
        left_indices = X[:, feature] < threshold
        right_indices = ~left_indices

        # Recursively build left and right subtrees
        return {
            'feature': feature,  # Index of the splitting feature
            'threshold': threshold,  # Threshold value for the split
            'left': self._build_tree(X[left_indices], y[left_indices], depth + 1),
            'right': self._build_tree(X[right_indices], y[right_indices], depth + 1),
        }

    def _find_best_split(self, X, y): #Function to find the best feature and threshold for splitting the data
        best_feature, best_threshold, best_gain = None, None, float('-inf')

        # Iterate over all features
        for feature in range(X.shape[1]):
            # Get all unique thresholds for the current feature
            thresholds = np.unique(X[:, feature])

            # Iterate over thresholds to compute information gain
            for threshold in thresholds:
                gain = self._information_gain(X[:, feature], y, threshold)
                if gain > best_gain:  # Update the best split
                    best_gain, best_feature, best_threshold = gain, feature, threshold

            print(f"Best feature: {best_feature}, Threshold: {best_threshold}")

        return best_feature, best_threshold

    def _entropy(self, y): #Compute the entropy of a label array.


        probabilities = np.bincount(y) / len(y)  # Class probabilities
        return -np.sum([p * np.log2(p) for p in probabilities if p > 0])  # Compute entropy

    def _information_gain(self, feature_column, y, threshold): #Compute the information gain of a split.

        parent_entropy = self._entropy(y)  # Compute parent entropy

        # Split the data based on the threshold
        left_indices = feature_column < threshold
        right_indices = ~left_indices

        if sum(left_indices) == 0 or sum(right_indices) == 0:  # Avoid empty splits
            return 0

        # Compute weighted entropy of children
        n = len(y)
        left_entropy = self._entropy(y[left_indices])
        right_entropy = self._entropy(y[right_indices])
        weighted_entropy = (sum(left_indices) / n) * left_entropy + (sum(right_indices) / n) * right_entropy

        # Information gain is the reduction in entropy
        return parent_entropy - weighted_entropy

    def _predict_single(self, node, sample): #Predict the label for a single sample.



        if isinstance(node, (int, float)):  # If the node is a leaf, return the value
            return node

        # Traverse left or right based on the threshold
        if sample[node['feature']] < node['threshold']:
            return self._predict_single(node['left'], sample)
        return self._predict_single(node['right'], sample)




In [5]:
from sklearn.model_selection import train_test_split  # For splitting data
from sklearn.metrics import accuracy_score  # For evaluating model performance

# Step 1: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    features_flattened,  # Input features (flattened ResNet outputs)
    y,  # Target labels
    test_size=0.2,  # 20% of the data is reserved for testing
    random_state=42  # Ensure reproducibility of splits
)

# Step 2: Create and train the decision tree classifier
tree = DecisionTree(max_depth=5)  # Initialize the decision tree with max depth of 5
tree.fit(X_train, y_train)  # Train the tree on the training data

# Step 3: Predict on the test set
y_pred = tree.predict(X_test)  # Generate predictions for the test data






[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275
Best_Feature: 275

ValueError: Classification metrics can't handle a mix of binary and continuous targets

In [7]:
from sklearn.metrics import classification_report, confusion_matrix

# Map continuous predictions to the nearest integer (class labels)
y_pred_discrete = np.rint(y_pred).astype(int)

# Use y_test directly if it's already integer labels
y_test_labels = y_test  # Ensure y_test is 1D with class labels

# Evaluate metrics
accuracy = accuracy_score(y_test_labels, y_pred_discrete)
print(f"Decision Tree Accuracy: {accuracy * 100:.2f}%")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test_labels, y_pred_discrete, target_names=lfw_dataset.target_names))

# Confusion matrix
print("\nConfusion Matrix:")
conf_matrix = confusion_matrix(y_test_labels, y_pred_discrete)
print(conf_matrix)


Decision Tree Accuracy: 85.71%

Classification Report:
               precision    recall  f1-score   support

 Colin Powell       0.80      0.70      0.74        46
George W Bush       0.88      0.93      0.90       108

     accuracy                           0.86       154
    macro avg       0.84      0.81      0.82       154
 weighted avg       0.85      0.86      0.85       154


Confusion Matrix:
[[ 32  14]
 [  8 100]]


In [12]:
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Load the dataset
lfw_dataset = fetch_lfw_people(min_faces_per_person=150, resize=0.4)
X = lfw_dataset.images  # Grayscale images
y = lfw_dataset.target  # Labels
num_classes = len(lfw_dataset.target_names)

# Normalize images to [0, 1]
X = X / 255.0

# Reshape images to add channel dimension
X = np.expand_dims(X, axis=-1)  # Shape: (n_samples, height, width, 1)

# One-hot encode the labels
encoder = OneHotEncoder(sparse_output=False)
y_one_hot = encoder.fit_transform(y.reshape(-1, 1))

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)

print(f"Training data shape: {X_train.shape}, Training labels shape: {y_train.shape}")
print(f"Test data shape: {X_test.shape}, Test labels shape: {y_test.shape}")



Training data shape: (612, 50, 37, 1), Training labels shape: (612, 2)
Test data shape: (154, 50, 37, 1), Test labels shape: (154, 2)


In [13]:
from tensorflow.keras.models import Sequential  # Import Sequential model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout  # Import necessary layers

def create_simple_cnn(input_shape, num_classes):

    model = Sequential([  # Initialize a sequential model
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),  # Add a 2D convolutional layer
        MaxPooling2D((2, 2)),  # Add a max-pooling layer
        Flatten(),  # Flatten the 2D feature maps into a 1D vector
        Dense(128, activation='relu'),  # Add a fully connected layer with 128 neurons
        Dropout(0.5),  # Add a dropout layer to reduce overfitting
        Dense(num_classes, activation='softmax')  # Add the output layer with softmax activation
    ])

    # Compile the model with an optimizer, loss function, and evaluation metric
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model  # Return the compiled model



In [14]:
from tensorflow.keras.models import Sequential  # Import Sequential model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout  # Import layers

def create_deeper_cnn(input_shape, num_classes):

    model = Sequential([  # Initialize a sequential model
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),  # First convolutional layer with 32 filters
        MaxPooling2D((2, 2)),  # First max-pooling layer with a 2x2 window
        Conv2D(64, (3, 3), activation='relu'),  # Second convolutional layer with 64 filters
        MaxPooling2D((2, 2)),  # Second max-pooling layer with a 2x2 window
        Flatten(),  # Flatten the 2D feature maps into a 1D vector
        Dense(256, activation='relu'),  # Fully connected layer with 256 neurons
        Dropout(0.5),  # Dropout layer to prevent overfitting
        Dense(num_classes, activation='softmax')  # Output layer with softmax activation
    ])

    # Compile the model with optimizer, loss function, and evaluation metric
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model  # Return the compiled model



In [15]:
simple_cnn = create_simple_cnn(X_train.shape[1:], num_classes)
history_simple = simple_cnn.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=32) #trains the simple CNN for 10 epochs


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 189ms/step - accuracy: 0.5881 - loss: 0.6605 - val_accuracy: 0.6829 - val_loss: 0.6248
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7036 - loss: 0.6148 - val_accuracy: 0.6829 - val_loss: 0.6270
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6890 - loss: 0.6193 - val_accuracy: 0.6829 - val_loss: 0.6246
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6826 - loss: 0.6417 - val_accuracy: 0.6829 - val_loss: 0.6241
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6507 - loss: 0.6622 - val_accuracy: 0.6829 - val_loss: 0.6246
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6902 - loss: 0.6205 - val_accuracy: 0.6829 - val_loss: 0.6236
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━

In [16]:
deeper_cnn = create_deeper_cnn(X_train.shape[1:], num_classes)
history_deeper = deeper_cnn.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=32) #trains the deeper CNN model for 10 epochs


Epoch 1/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 297ms/step - accuracy: 0.6870 - loss: 0.6576 - val_accuracy: 0.6829 - val_loss: 0.6381
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.6788 - loss: 0.6689 - val_accuracy: 0.6829 - val_loss: 0.6533
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6756 - loss: 0.6408 - val_accuracy: 0.6829 - val_loss: 0.6353
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6871 - loss: 0.6351 - val_accuracy: 0.6829 - val_loss: 0.6248
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7207 - loss: 0.6024 - val_accuracy: 0.6829 - val_loss: 0.6255
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6919 - loss: 0.6245 - val_accuracy: 0.6829 - val_loss: 0.6245
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━

In [21]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Evaluate Simple CNN
simple_cnn_predictions = np.argmax(simple_cnn.predict(X_test), axis=1)  # Convert probabilities to class predictions
y_test_labels = np.argmax(y_test, axis=1)  # Convert one-hot encoded labels back to integers
simple_cnn_accuracy = accuracy_score(y_test_labels, simple_cnn_predictions)
print(f"Simple CNN Accuracy: {simple_cnn_accuracy * 100:.2f}%")

print("\nSimple CNN Classification Report:")
print(classification_report(y_test_labels, simple_cnn_predictions, target_names=lfw_dataset.target_names))

print("\nSimple CNN Confusion Matrix:")
print(confusion_matrix(y_test_labels, simple_cnn_predictions))

# Evaluate Deeper CNN
deeper_cnn_predictions = np.argmax(deeper_cnn.predict(X_test), axis=1)  # Convert probabilities to class predictions
deeper_cnn_accuracy = accuracy_score(y_test_labels, deeper_cnn_predictions)
print(f"\nDeeper CNN Accuracy: {deeper_cnn_accuracy * 100:.2f}%")

print("\nDeeper CNN Classification Report:")
print(classification_report(y_test_labels, deeper_cnn_predictions, target_names=lfw_dataset.target_names))

print("\nDeeper CNN Confusion Matrix:")
print(confusion_matrix(y_test_labels, deeper_cnn_predictions))


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 202ms/step
Simple CNN Accuracy: 70.13%

Simple CNN Classification Report:
               precision    recall  f1-score   support

 Colin Powell       0.00      0.00      0.00        46
George W Bush       0.70      1.00      0.82       108

     accuracy                           0.70       154
    macro avg       0.35      0.50      0.41       154
 weighted avg       0.49      0.70      0.58       154


Simple CNN Confusion Matrix:
[[  0  46]
 [  0 108]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m1/5[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 233ms/step



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step

Deeper CNN Accuracy: 70.13%

Deeper CNN Classification Report:
               precision    recall  f1-score   support

 Colin Powell       0.00      0.00      0.00        46
George W Bush       0.70      1.00      0.82       108

     accuracy                           0.70       154
    macro avg       0.35      0.50      0.41       154
 weighted avg       0.49      0.70      0.58       154


Deeper CNN Confusion Matrix:
[[  0  46]
 [  0 108]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
