Create the multilayer perceptron and training


In [34]:
from keras.utils import to_categorical
import time
from keras.datasets import cifar10
from sklearn.metrics import accuracy_score


class MultiLayerPerceptron:
    def __init__(self, input_size, hidden_sizes, output_size, activations):
        # Initialization of parameters
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.output_size = output_size
        self.activations = activations
        self.learning_rate = 0.01

        self.weights_hidden = []
        self.biases_hidden = []

        # Initialize weights and biases for the first hidden layer
        self.weights_hidden.append(np.random.randn(self.input_size, self.hidden_sizes[0]) * np.sqrt(2. / self.input_size))
        self.biases_hidden.append(np.zeros((1, self.hidden_sizes[0])))

        # Initialize weights and biases for the second hidden layer
        self.weights_hidden.append(np.random.randn(self.hidden_sizes[0], self.hidden_sizes[1]) * np.sqrt(2. / self.hidden_sizes[0]))
        self.biases_hidden.append(np.zeros((1, self.hidden_sizes[1])))

        # He initialization for weights in the output layer
        self.weights_output = np.random.randn(self.hidden_sizes[1], self.output_size) * np.sqrt(2. / self.hidden_sizes[1])
        self.bias_output = np.zeros((1, self.output_size))

    def forward_propagation(self, X):
        # Forward propagation through the network

        # Initialize the list to store the outputs of each layer
        self.hidden_layer_outputs = [X]
        # Loop through hidden layers
        for i in range(len(self.weights_hidden)):
             # Calculate the input to the hidden layer, apply activ function and store the output
            hidden_layer_input = np.dot(self.hidden_layer_outputs[i], self.weights_hidden[i]) + self.biases_hidden[i]
            hidden_layer_output = self.activation_function(hidden_layer_input, self.activations[i])
            self.hidden_layer_outputs.append(hidden_layer_output)

        # Calculate the input to the output layer, apply activ function
        output_layer_input = np.dot(self.hidden_layer_outputs[-1], self.weights_output) + self.bias_output
        predicted_output = self.activation_function(output_layer_input, 'sigmoid')

        return predicted_output

    def backward_propagation(self, X, y, output):
        # Backward propagation through the network

        errors = [y - output]
        # Loop through hidden layers in reverse order
        for i in range(len(self.weights_hidden)-1, 0, -1):
            # Calculate the error at the hidden layer and apply activ function
            error_hidden_layer = errors[0].dot(self.weights_output.T)
            derivative_hidden_layer = self.activation_derivative(self.hidden_layer_outputs[i], self.activations[i-1])
            errors.insert(0, error_hidden_layer * derivative_hidden_layer)

            # Update weights and biases for the hidden layer
            self.weights_hidden[i-1] += self.learning_rate * self.hidden_layer_outputs[i-1].T.dot(errors[0])
            self.biases_hidden[i-1] += self.learning_rate * np.sum(errors[0], axis=0, keepdims=True)

        # Update weights and biases for the output layer
        self.weights_output += self.learning_rate * self.hidden_layer_outputs[-1].T.dot(errors[-1])
        self.bias_output += self.learning_rate * np.sum(errors[-1], axis=0, keepdims=True)


    def activation_function(self, x, activation):
        # the activation functions
        if activation == 'sigmoid':
            return 1 / (1 + np.exp(-x))
        elif activation == 'relu':
            return np.maximum(0, x)
        else:
            raise ValueError("Activation function not recognized.")

    def activation_derivative(self, x, activation):
        # derivative of two functions
        if activation == 'sigmoid':
            return x * (1 - x)
        elif activation == 'relu':
            return 1. * (x > 0)
        else:
            raise ValueError("Activation function not recognized.")

    def evaluate(self, X, y, phase):

        # Forward propagate to get predictions
        output = self.forward_propagation(X)
        # Convert predictions to class labels
        predictions = np.argmax(output, axis=1)
        accuracy = accuracy_score(y, predictions)

        # Identify correctly and incorrectly classified examples
        correct_indices = np.where(predictions == y)[0]
        incorrect_indices = np.where(predictions != y)[0]

        print(f"{phase.capitalize()} Set - Accuracy: {accuracy:.4f}")

        if phase == 'training':
            print(f"Training Time: {self.training_time:.2f} seconds")

        if phase == 'testing':
            print("Examples of Correct Categorization:")
            for i in range(min(5, len(correct_indices))):
                print(f"Predicted: {predictions[correct_indices[i]]}, Actual: {y[correct_indices[i]]}")

            print("\nExamples of Incorrect Categorization:")
            for i in range(min(5, len(incorrect_indices))):
                print(f"Predicted: {predictions[incorrect_indices[i]]}, Actual: {y[incorrect_indices[i]]}")

    def train(self, X, y, epochs, learning_rate, batch_size=None, print_every=500):
        # training of the neural network

        self.learning_rate = learning_rate
        y_one_hot = to_categorical(y, num_classes=self.output_size)

        start_time = time.time()

        # The training algorithm starts
        for epoch in range(epochs):
            total_loss = 0

            # Iterate through the dataset in batches
            for i in range(0, len(X), batch_size):
                X_batch = X[i:i + batch_size] if batch_size else X
                y_batch = y_one_hot[i:i + batch_size] if batch_size else y_one_hot
                # Forward propagation to get predictions
                output = self.forward_propagation(X_batch)
                # Backward propagation to update weights and biases
                self.backward_propagation(X_batch, y_batch, output)

                # Calculate the cross-entropy loss
                loss = -np.sum(y_batch * np.log(np.clip(output, 1e-10, 1.0 - 1e-10))) / len(X_batch)
                total_loss += loss

            # Calculate the average loss for the epoch
            average_loss = total_loss / (len(X) // batch_size) if batch_size else total_loss / len(X)

            # Print progress every print_every epochs
            if (epoch + 1) % print_every == 0:
                self.training_time = time.time() - start_time
                print(f"Epoch {epoch + 1}/{epochs}")
                print(f"Loss: {average_loss:.3f}")  # Print loss before evaluation

                # Evaluate and print examples
                self.evaluate(X, y, 'training')
                self.evaluate(X, y, 'testing')

                print("\n")  # Add a newline for better readability
                start_time = time.time()  # Reset start_time for the next print interval

        self.training_time = time.time() - start_time
        print(f"Epoch {epochs}/{epochs}")
        self.evaluate(X, y, 'training')
        self.evaluate(X, y, 'testing')
        print(f"Loss: {average_loss:.3f}\n")

Import data and extract specific features

In [35]:
from sklearn.decomposition import PCA
from keras.utils import to_categorical

# Load CIFAR-10 dataset
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# Preprocess and normalize pixel values
X_train = X_train.reshape((50000, 32 * 32 * 3)).astype('float32') / 255
X_test = X_test.reshape((10000, 32 * 32 * 3)).astype('float32') / 255
y_train = y_train.reshape((50000,))
y_test = y_test.reshape((10000,))

# Extract features using PCA
def extract_features(images, n_components=64):
    features = []
    for img in images:
        mean_brightness_rows = np.mean(img, axis=1).flatten()
        mean_brightness_cols = np.mean(img, axis=0).flatten()
        image_features = np.concatenate((mean_brightness_rows.reshape(-1), mean_brightness_cols.reshape(-1)))

        features.append(image_features)
    features = np.array(features)

    # Apply PCA for dimensionality reduction
    pca = PCA(n_components=n_components)
    features_pca = pca.fit_transform(features)

    return features_pca

# Extract features from training and test images using PCA
train_features = extract_features(X_train.reshape((50000, 32, 32, 3)))
test_features = extract_features(X_test.reshape((10000, 32, 32, 3)))

# Convert labels to one-hot encoding
y_train_one_hot = to_categorical(y_train, num_classes=10)
y_test_one_hot = to_categorical(y_test, num_classes=10)


Creation of an instance of MultiLayerPerceptron and its evaluation

In [36]:
import numpy as np
from keras.utils import to_categorical
import time
from sklearn.metrics import accuracy_score

# Create a new instance of  MultiLayerPerceptron
nn = MultiLayerPerceptron(input_size=train_features.shape[1], hidden_sizes=[128,128], output_size=10, activations=['sigmoid', 'sigmoid'])

# Train the network
nn.train(train_features, y_train, epochs=10000, learning_rate=0.001, batch_size=32, print_every=500)

# Evaluate on training set
start_time = time.time()
y_pred_train = nn.forward_propagation(train_features)
training_time = time.time() - start_time
accuracy_train = accuracy_score(y_train, np.argmax(y_pred_train, axis=1))

# Evaluate on testing set
y_pred_test = nn.forward_propagation(test_features)
accuracy_test = accuracy_score(y_test, np.argmax(y_pred_test, axis=1))

# Print results
print(f"Activation Hidden: {'sigmoid'}, Activation Output: {'sigmoid'}, Hidden Neurons: {[128,128]}, Learning Rate: {0.001}")
print(f"Training Accuracy: {accuracy_train:.3f}, Test Accuracy: {accuracy_test:.3f}, Training Time: {training_time:.2f} seconds")
print("------------------------------------------------------------")



Epoch 500/10000
Loss: 1.496
Training Set - Accuracy: 0.4863
Training Time: 333.72 seconds
Testing Set - Accuracy: 0.4863
Examples of Correct Categorization:
Predicted: 6, Actual: 6
Predicted: 1, Actual: 1
Predicted: 2, Actual: 2
Predicted: 7, Actual: 7
Predicted: 8, Actual: 8

Examples of Incorrect Categorization:
Predicted: 1, Actual: 9
Predicted: 8, Actual: 9
Predicted: 6, Actual: 4
Predicted: 7, Actual: 1
Predicted: 7, Actual: 3


Epoch 1000/10000
Loss: 1.434
Training Set - Accuracy: 0.5159
Training Time: 331.41 seconds
Testing Set - Accuracy: 0.5159
Examples of Correct Categorization:
Predicted: 6, Actual: 6
Predicted: 2, Actual: 2
Predicted: 7, Actual: 7
Predicted: 8, Actual: 8
Predicted: 7, Actual: 7

Examples of Incorrect Categorization:
Predicted: 1, Actual: 9
Predicted: 8, Actual: 9
Predicted: 6, Actual: 4
Predicted: 8, Actual: 1
Predicted: 7, Actual: 1


Epoch 1500/10000
Loss: 1.402
Training Set - Accuracy: 0.5248
Training Time: 331.00 seconds
Testing Set - Accuracy: 0.5248
E

K Nearest Neighbors Classifier (from scratch)

In [37]:
class KNNClassifier:
    def __init__(self, k):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = []
        for sample in X:
            distances = np.sqrt(np.sum((self.X_train - sample) ** 2, axis=1))
            nearest_neighbors = np.argsort(distances)[:self.k]
            nearest_labels = self.y_train[nearest_neighbors]
            unique, counts = np.unique(nearest_labels, return_counts=True)
            predicted_label = unique[np.argmax(counts)]
            predictions.append(predicted_label)
        return np.array(predictions)


Nearest Centroid Classifier (from scratch)

In [38]:
class NearestCenterClassifier:
    def __init__(self):
        self.centers = None

    def fit(self, X, y):
        self.centers = []
        for i in range(10):
            center = np.mean(X[y == i], axis=0)
            self.centers.append(center)

    def predict(self, X):
        predictions = []
        for sample in X:
            distances = [np.linalg.norm(sample - center[:X.shape[1]]) for center in self.centers]
            predicted_label = np.argmin(distances)
            predictions.append(predicted_label)
        return np.array(predictions)

KNN for 1-neighbor and results

In [39]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
k_value = 1
knn1 = KNNClassifier(k=k_value)
knn1.fit(train_features, y_train)
knn1_predictions = knn1.predict(test_features)

accuracy_knn1 = accuracy_score(y_test, knn1_predictions)
print(f"Accuracy of KNN (k={k_value}): {accuracy_knn1}")
weighted_recall_knn1 = recall_score(y_test, knn1_predictions, average='weighted')
print(f"Weighted recall score of KNN (k={k_value}): {weighted_recall_knn1}")
macro_recall_knn1 = recall_score(y_test, knn1_predictions, average='macro')
print(f"Macro recall score of KNN (k={k_value}): {macro_recall_knn1}")
weighted_f1score_knn1 = f1_score(y_test, knn1_predictions, average='weighted')
print(f"Weighted f1 score of KNN (k={k_value}): {weighted_f1score_knn1}")
macro_f1score_knn1 = f1_score(y_test, knn1_predictions, average='macro')
print(f"Macro f1 score of KNN (k={k_value}): {macro_f1score_knn1}")

Accuracy of KNN (k=1): 0.303
Weighted recall score of KNN (k=1): 0.303
Macro recall score of KNN (k=1): 0.30300000000000005
Weighted f1 score of KNN (k=1): 0.30233528841172397
Macro f1 score of KNN (k=1): 0.3023352884117239


KNN for 3-neighbors and results

In [40]:
k_value = 3
knn3 = KNNClassifier(k=k_value)
knn3.fit(train_features, y_train)
knn3_predictions = knn3.predict(test_features)

accuracy_knn3 = accuracy_score(y_test, knn3_predictions)
print(f"Accuracy of KNN (k={k_value}): {accuracy_knn3}")
weighted_recall_knn3 = recall_score(y_test, knn3_predictions, average='weighted')
print(f"Weighted recall score of KNN (k={k_value}): {weighted_recall_knn3}")
macro_recall_knn3 = recall_score(y_test, knn3_predictions, average='macro')
print(f"Macro recall score of KNN (k={k_value}): {macro_recall_knn3}")
weighted_f1score_knn3 = f1_score(y_test, knn3_predictions, average='weighted')
print(f"Weighted f1 score of KNN (k={k_value}): {weighted_f1score_knn3}")
macro_f1score_knn3 = f1_score(y_test, knn3_predictions, average='macro')
print(f"Macro f1 score of KNN (k={k_value}): {macro_f1score_knn3}")

Accuracy of KNN (k=3): 0.3111
Weighted recall score of KNN (k=3): 0.3111
Macro recall score of KNN (k=3): 0.3111
Weighted f1 score of KNN (k=3): 0.3062192987148664
Macro f1 score of KNN (k=3): 0.3062192987148664


NC and results


In [41]:
from sklearn.neighbors import NearestCentroid

nc = NearestCenterClassifier()
nc.fit(train_features, y_train)
nc_predictions = nc.predict(test_features)

accuracy_nc = accuracy_score(y_test, nc_predictions)
print(f"Accuracy of Nearest Center Classifier: {accuracy_nc}")
weighted_recall_nc = recall_score(y_test, nc_predictions, average='weighted')
print(f"Weighted recall score of Nearest Center Classifier: {weighted_recall_nc}")
macro_recall_nc = recall_score(y_test, nc_predictions, average='macro')
print(f"Macro recall score of Nearest Center Classifier: {macro_recall_nc}")
weighted_f1score_nc = f1_score(y_test, nc_predictions, average='weighted')
print(f"Weighted f1 score of Nearest Center Classifier: {weighted_f1score_nc}")
macro_f1score_nc = f1_score(y_test, nc_predictions, average='macro')
print(f"Macro f1 score of Nearest Center Classifier: {macro_f1score_nc}")

Accuracy of Nearest Center Classifier: 0.2498
Weighted recall score of Nearest Center Classifier: 0.2498
Macro recall score of Nearest Center Classifier: 0.24980000000000002
Weighted f1 score of Nearest Center Classifier: 0.22298517407375856
Macro f1 score of Nearest Center Classifier: 0.2229851740737586
