In [None]:
import numpy as np
from keras.datasets import cifar10
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Preprocess the data
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# Reshape and one-hot encode the labels
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)
encoder = OneHotEncoder(sparse=False)
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

# Split the training data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

# Function to initialize weights and biases
def initialize_parameters(input_size, hidden_size1, hidden_size2, output_size):
    np.random.seed(42)
    weights_input_hidden1 = np.random.randn(input_size, hidden_size1)
    biases_input_hidden1 = np.zeros((1, hidden_size1))
    weights_hidden1_hidden2 = np.random.randn(hidden_size1, hidden_size2)
    biases_hidden1_hidden2 = np.zeros((1, hidden_size2))
    weights_hidden2_output = np.random.randn(hidden_size2, output_size)
    biases_hidden2_output = np.zeros((1, output_size))
    return weights_input_hidden1, biases_input_hidden1, weights_hidden1_hidden2, biases_hidden1_hidden2, weights_hidden2_output, biases_hidden2_output

# Activation functions
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def softmax(x):
    exp_values = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_values / np.sum(exp_values, axis=1, keepdims=True)

def softmax_derivative(x):
    return x * (1 - x)

# Initialize the neural network architecture with two hidden layers
input_size = x_train.shape[1]
hidden_size1 = 256
hidden_size2 = 128
output_size = 10
learning_rate = 0.01

weights_input_hidden1, biases_input_hidden1, weights_hidden1_hidden2, biases_hidden1_hidden2, weights_hidden2_output, biases_hidden2_output = initialize_parameters(
    input_size, hidden_size1, hidden_size2, output_size
)

# Training loop
num_epochs = 5

for epoch in range(num_epochs):
    # Forward propagation
    hidden1_input = np.dot(x_train, weights_input_hidden1) + biases_input_hidden1
    hidden1_output = relu(hidden1_input)
    hidden2_input = np.dot(hidden1_output, weights_hidden1_hidden2) + biases_hidden1_hidden2
    hidden2_output = relu(hidden2_input)
    output_layer_input = np.dot(hidden2_output, weights_hidden2_output) + biases_hidden2_output
    output_layer_output = softmax(output_layer_input)

    # Calculate the loss
    loss = -1 * np.sum(y_train * np.log(output_layer_output)) / len(x_train)

    # Backpropagation
    dloss_doutput = output_layer_output - y_train
    doutput_dout_layer_input = softmax_derivative(output_layer_output)

    # Gradients for the second hidden layer
    dout_layer_input_dweights_hidden2_output = hidden2_output.T
    dloss_dweights_hidden2_output = np.dot(
        dout_layer_input_dweights_hidden2_output, (dloss_doutput * doutput_dout_layer_input)
    )
    dloss_dbiases_hidden2_output = np.sum(
        dloss_doutput * doutput_dout_layer_input, axis=0, keepdims=True
    )
    dloss_dhidden2_output = np.dot(
        dloss_doutput * doutput_dout_layer_input, weights_hidden2_output.T
    )
    dhidden2_output_dhidden2_input = relu_derivative(hidden2_output)

    # Gradients for the first hidden layer
    dhidden2_input_dweights_hidden1_hidden2 = hidden1_output.T
    dloss_dweights_hidden1_hidden2 = np.dot(
        dhidden2_input_dweights_hidden1_hidden2, (dloss_dhidden2_output * dhidden2_output_dhidden2_input)
    )
    dloss_dbiases_hidden1_hidden2 = np.sum(
        dloss_dhidden2_output * dhidden2_output_dhidden2_input, axis=0, keepdims=True
    )
    dloss_dhidden1_output = np.dot(
        dloss_dhidden2_output * dhidden2_output_dhidden2_input, weights_hidden1_hidden2.T
    )
    dhidden1_output_dhidden1_input = relu_derivative(hidden1_output)

    # Gradients for the input layer
    dhidden1_input_dweights_input_hidden1 = x_train.T
    dloss_dweights_input_hidden1 = np.dot(
        dhidden1_input_dweights_input_hidden1, (dloss_dhidden1_output * dhidden1_output_dhidden1_input)
    )
    dloss_dbiases_input_hidden1 = np.sum(
        dloss_dhidden1_output * dhidden1_output_dhidden1_input, axis=0, keepdims=True
    )

    # Update weights and biases
    weights_input_hidden1 -= learning_rate * dloss_dweights_input_hidden1
    biases_input_hidden1 -= learning_rate * dloss_dbiases_input_hidden1
    weights_hidden1_hidden2 -= learning_rate * dloss_dweights_hidden1_hidden2
    biases_hidden1_hidden2 -= learning_rate * dloss_dbiases_hidden1_hidden2
    weights_hidden2_output -= learning_rate * dloss_dweights_hidden2_output
    biases_hidden2_output -= learning_rate * dloss_dbiases_hidden2_output

    # Validation accuracy
    #The validation accuracy is the accuracy of the model on the validation set( Validation dataset is a set which is made separately from the training dataset to avoid overfitting) 
    #It is a measure of how well the model generalizes to new, unseen data.
    # Training is often stopped when the validation accuracy stops
    #improving or starts decreasing, indicating that further training may lead to overfitting.
    correct = 0
    for i in range(len(x_val)):
        hidden1_input = np.dot(x_val[i], weights_input_hidden1) + biases_input_hidden1
        hidden1_output = relu(hidden1_input)
        hidden2_input = np.dot(hidden1_output, weights_hidden1_hidden2) + biases_hidden1_hidden2
        hidden2_output = relu(hidden2_input)
        output_layer_input = np.dot(hidden2_output, weights_hidden2_output) + biases_hidden2_output
        output_layer_output = softmax(output_layer_input)
        predicted_class = np.argmax(output_layer_output)
        true_class = np.argmax(y_val[i])
        if predicted_class == true_class:
            correct += 1
    val_accuracy = correct / len(x_val)
    print(f"Epoch {epoch + 1}/{num_epochs}, Validation Accuracy: {val_accuracy * 100:.2f}%")

# Test accuracy
correct = 0
for i in range(len(x_test)):
    hidden1_input = np.dot(x_test[i], weights_input_hidden1) + biases_input_hidden1
    hidden1_output = relu(hidden1_input)
    hidden2_input = np.dot(hidden1_output, weights_hidden1_hidden2) + biases_hidden1_hidden2
    hidden2_output = relu(hidden2_input)
    output_layer_input = np.dot(hidden2_output, weights_hidden2_output) + biases_hidden2_output
    output_layer_output = softmax(output_layer_input)
    predicted_class = np.argmax(output_layer_output)
    true_class = np.argmax(y_test[i])

    if predicted_class == true_class:
        correct += 1
test_accuracy = correct / len(x_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


# Import the data

In [14]:
import numpy as np
from keras.datasets import cifar10
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Importing the data from keras 
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Preprocessing the data

In [15]:
# Normalizing the data
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# Reshaping the data 
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

# In the CIFAR-10 dataset:
# The original class labels are integers representing the class index (e.g., 0 for "airplane," 1 for "automobile," etc).
# One-hot encoding is used to convert these integer class labels into binary vectors.
# For instance, if there are 10 classes in total, the one-hot encoding converts each class label into a 10-dimensional binary vector.
# It is used when categorical cross entropy loss is used.
encoder = OneHotEncoder(sparse=False)
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

# Data preparation

In [16]:
# We split the data as 80% training data and 20% test data using a function under keras
# randomstate is used to specify whether we want the data to be randomly split or it should be recurring if seeded.
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=None)

In [17]:
# This function initializes the weights and biases for each layer of the neural network.
def initialize_parameters(input_size, hidden_size1, hidden_size2, output_size):
    np.random.seed(42)
    weights_input_hidden1 = np.random.randn(input_size, hidden_size1)
    biases_input_hidden1 = np.zeros((1, hidden_size1))
    weights_hidden1_hidden2 = np.random.randn(hidden_size1, hidden_size2)
    biases_hidden1_hidden2 = np.zeros((1, hidden_size2))
    weights_hidden2_output = np.random.randn(hidden_size2, output_size)
    biases_hidden2_output = np.zeros((1, output_size))
    return weights_input_hidden1, biases_input_hidden1, weights_hidden1_hidden2, biases_hidden1_hidden2, weights_hidden2_output, biases_hidden2_output


# Activation Function

In [18]:
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def softmax(x):
    exp_values = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_values / np.sum(exp_values, axis=1, keepdims=True)

def softmax_derivative(x):
    return x * (1 - x)

# Neural Network Architecture

In [19]:
input_size = x_train.shape[1]
hidden_size1 = 256
hidden_size2 = 128
output_size = 10
learning_rate = 0.01

weights_input_hidden1, biases_input_hidden1, weights_hidden1_hidden2, biases_hidden1_hidden2, weights_hidden2_output, biases_hidden2_output = initialize_parameters(
    input_size, hidden_size1, hidden_size2, output_size
)
# The above model has two hidden layers and hence two different sets of parameters are initialized.


# Train the data

In [None]:
# Number of iterations
num_epochs = 5

# We are using batch gradient descent which uses entire dataset to compute the gradient of the loss function in each iteration.

for epoch in range(num_epochs):
    # Forward propagation
    hidden1_input = np.dot(x_train, weights_input_hidden1) + biases_input_hidden1
    hidden1_output = relu(hidden1_input)
    hidden2_input = np.dot(hidden1_output, weights_hidden1_hidden2) + biases_hidden1_hidden2
    hidden2_output = relu(hidden2_input)
    output_layer_input = np.dot(hidden2_output, weights_hidden2_output) + biases_hidden2_output
    output_layer_output = softmax(output_layer_input)

    # Calculate the loss( Categorical Cross entropy loss function)
    loss = -1 * np.sum(y_train * np.log(output_layer_output)) / len(x_train)

    # Backpropagation
    dloss_doutput = output_layer_output - y_train
    doutput_dout_layer_input = softmax_derivative(output_layer_output)

    # Gradients for the second hidden layer
    dout_layer_input_dweights_hidden2_output = hidden2_output.T
    dloss_dweights_hidden2_output = np.dot(
        dout_layer_input_dweights_hidden2_output, (dloss_doutput * doutput_dout_layer_input)
    )
    dloss_dbiases_hidden2_output = np.sum(
        dloss_doutput * doutput_dout_layer_input, axis=0, keepdims=True
    )
    dloss_dhidden2_output = np.dot(
        dloss_doutput * doutput_dout_layer_input, weights_hidden2_output.T
    )
    dhidden2_output_dhidden2_input = relu_derivative(hidden2_output)

    # Gradients for the first hidden layer
    dhidden2_input_dweights_hidden1_hidden2 = hidden1_output.T
    dloss_dweights_hidden1_hidden2 = np.dot(
        dhidden2_input_dweights_hidden1_hidden2, (dloss_dhidden2_output * dhidden2_output_dhidden2_input)
    )
    dloss_dbiases_hidden1_hidden2 = np.sum(
        dloss_dhidden2_output * dhidden2_output_dhidden2_input, axis=0, keepdims=True
    )
    dloss_dhidden1_output = np.dot(
        dloss_dhidden2_output * dhidden2_output_dhidden2_input, weights_hidden1_hidden2.T
    )
    dhidden1_output_dhidden1_input = relu_derivative(hidden1_output)

    # Gradients for the input layer
    dhidden1_input_dweights_input_hidden1 = x_train.T
    dloss_dweights_input_hidden1 = np.dot(
        dhidden1_input_dweights_input_hidden1, (dloss_dhidden1_output * dhidden1_output_dhidden1_input)
    )
    dloss_dbiases_input_hidden1 = np.sum(
        dloss_dhidden1_output * dhidden1_output_dhidden1_input, axis=0, keepdims=True
    )

    # Update weights and biases
    weights_input_hidden1 -= learning_rate * dloss_dweights_input_hidden1
    biases_input_hidden1 -= learning_rate * dloss_dbiases_input_hidden1
    weights_hidden1_hidden2 -= learning_rate * dloss_dweights_hidden1_hidden2
    biases_hidden1_hidden2 -= learning_rate * dloss_dbiases_hidden1_hidden2
    weights_hidden2_output -= learning_rate * dloss_dweights_hidden2_output
    biases_hidden2_output -= learning_rate * dloss_dbiases_hidden2_output

    # Validation accuracy
    #The validation accuracy is the accuracy of the model on the validation set( Validation dataset is a set which is made separately from the training dataset to avoid overfitting) 
    #It is a measure of how well the model generalizes to new, unseen data.
    # Training is often stopped when the validation accuracy stops
    #improving or starts decreasing, indicating that further training may lead to overfitting.
    correct = 0
    for i in range(len(x_val)):
        hidden1_input = np.dot(x_val[i], weights_input_hidden1) + biases_input_hidden1
        hidden1_output = relu(hidden1_input)
        hidden2_input = np.dot(hidden1_output, weights_hidden1_hidden2) + biases_hidden1_hidden2
        hidden2_output = relu(hidden2_input)
        output_layer_input = np.dot(hidden2_output, weights_hidden2_output) + biases_hidden2_output
        output_layer_output = softmax(output_layer_input)
        predicted_class = np.argmax(output_layer_output)
        true_class = np.argmax(y_val[i])
        if predicted_class == true_class:
            correct += 1
    val_accuracy = correct / len(x_val)
    print(f"Epoch {epoch + 1}/{num_epochs}, Validation Accuracy: {val_accuracy * 100:.2f}%")


  loss = -1 * np.sum(y_train * np.log(output_layer_output)) / len(x_train)
  loss = -1 * np.sum(y_train * np.log(output_layer_output)) / len(x_train)


Epoch 1/5, Validation Accuracy: 10.35%
Epoch 2/5, Validation Accuracy: 10.16%
Epoch 3/5, Validation Accuracy: 10.16%


# Evaluation of the model

In [None]:
# Accuracy
correct = 0
for i in range(len(x_test)):
    hidden1_input = np.dot(x_test[i], weights_input_hidden1) + biases_input_hidden1
    hidden1_output = relu(hidden1_input)
    hidden2_input = np.dot(hidden1_output, weights_hidden1_hidden2) + biases_hidden1_hidden2
    hidden2_output = relu(hidden2_input)
    output_layer_input = np.dot(hidden2_output, weights_hidden2_output) + biases_hidden2_output
    output_layer_output = softmax(output_layer_input)
    predicted_class = np.argmax(output_layer_output)
    true_class = np.argmax(y_test[i])

    if predicted_class == true_class:
        correct += 1
test_accuracy = correct / len(x_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")