In [None]:
import numpy as np

# Load the data from .npy files
x_train = np.load("/content/drive/MyDrive/Untitled_folder/x_train.npy")
y_train = np.load("/content/drive/MyDrive/Untitled_folder/y_train.npy")

# Ensure the data is consistent
assert len(x_train) == len(y_train), "Mismatch in the number of images and labels"

# Generate a permutation of indices
indices = np.arange(len(x_train))
np.random.shuffle(indices)

# Shuffle both x_train and y_train using the permutation of indices
x_train_shuffled = x_train[indices]
y_train_shuffled = y_train[indices]

# Save the shuffled data to new .npy files
np.save("/content/drive/MyDrive/Untitled_folder/x_train_shuffled.npy", x_train_shuffled)
np.save("/content/drive/MyDrive/Untitled_folder/y_train_shuffled.npy", y_train_shuffled)

print("Data has been shuffled and saved successfully.")


Data has been shuffled and saved successfully.


**KERAS IMPLEMENTATION**

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.utils import to_categorical
import random
import os

# Set a random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)
os.environ['PYTHONHASHSEED'] = str(42)

# Additional configuration for reproducibility
# This can vary based on the TensorFlow version and hardware, but it's good practice to include
tf.config.experimental.enable_op_determinism()

# 1. Load and Preprocess the Data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the images to a pixel value range of 0 to 1
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# One-hot encode the labels
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# 2. Build the MLP Model
model = Sequential()
model.add(Input(shape=(28, 28)))  # Input layer
model.add(Flatten())  # Flatten the 28x28 images to a 1D vector
model.add(Dense(512, activation='relu'))  # Fully connected layer with 512 units
model.add(Dense(256, activation='relu'))  # Fully connected layer with 256 units
model.add(Dense(10, activation='softmax'))  # Output layer with 10 units (one for each class)

# Print model summary
model.summary()

# Save initial weights
initial_weights = model.get_weights()
np.savez('initial_weights.npz', *initial_weights)

# 3. Compile the Model
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])

# 4. Train the Model
history = model.fit(x_train, y_train, epochs=35, batch_size=128, verbose=2)

# Save trained weights
trained_weights = model.get_weights()
np.savez('trained_weights.npz', *trained_weights)

# 5. Evaluate the Model
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)

print(f"Test accuracy: {test_acc * 100:.2f}%")

# Save the trained model
model.save('mnist_MLP.keras')


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


Epoch 1/35
469/469 - 6s - 13ms/step - accuracy: 0.7408 - loss: 1.1558
Epoch 2/35
469/469 - 4s - 9ms/step - accuracy: 0.8831 - loss: 0.4578
Epoch 3/35
469/469 - 5s - 11ms/step - accuracy: 0.9006 - loss: 0.3629
Epoch 4/35
469/469 - 5s - 11ms/step - accuracy: 0.9101 - loss: 0.3212
Epoch 5/35
469/469 - 4s - 9ms/step - accuracy: 0.9169 - loss: 0.2946
Epoch 6/35
469/469 - 4s - 9ms/step - accuracy: 0.9226 - loss: 0.2747
Epoch 7/35
469/469 - 6s - 13ms/step - accuracy: 0.9272 - loss: 0.2584
Epoch 8/35
469/469 - 10s - 21ms/step - accuracy: 0.9308 - loss: 0.2445
Epoch 9/35
469/469 - 6s - 12ms/step - accuracy: 0.9343 - loss: 0.2323
Epoch 10/35
469/469 - 4s - 9ms/step - accuracy: 0.9377 - loss: 0.2214
Epoch 11/35
469/469 - 4s - 9ms/step - accuracy: 0.9404 - loss: 0.2115
Epoch 12/35
469/469 - 6s - 13ms/step - accuracy: 0.9432 - loss: 0.2025
Epoch 13/35
469/469 - 8s - 18ms/step - accuracy: 0.9455 - loss: 0.1942
Epoch 14/35
469/469 - 7s - 14ms/step - accuracy: 0.9475 - loss: 0.1866
Epoch 15/35
469/469

**RESULTS FOR FIRST TRAINING EXAMPLE**

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the images to a pixel value range of 0 to 1
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# One-hot encode the labels
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Load initial weights
initial_weights = np.load('/content/drive/MyDrive/Untitled_folder/initial_weights.npz', allow_pickle=True)
weights = [initial_weights[key] for key in initial_weights]

# Extract weights and biases
W1, b1 = weights[0], weights[1]
W2, b2 = weights[2], weights[3]
W3, b3 = weights[4], weights[5]

# Print weights of the first hidden layer
print(f"Weights of the first hidden layer (W1):\n{W1}")

# Flatten the first image in the training set
first_train_image = x_train[0:1]  # Shape (1, 28, 28)
first_train_image_flattened = first_train_image.flatten()
print(f"Flattened input image: {first_train_image_flattened}")

# Forward propagation

# First layer (Dense 512 -> ReLU)
z1 = np.dot(first_train_image_flattened, W1) + b1
a1 = np.maximum(0, z1)  # ReLU activation
print(f"z1 (pre-activation output of the first layer): {z1}")
print(f"a1 (activation output of the first layer): {a1}")

# Second layer (Dense 256 -> ReLU)
z2 = np.dot(a1, W2) + b2
a2 = np.maximum(0, z2)  # ReLU activation
print(f"z2 (pre-activation output of the second layer): {z2}")
print(f"a2 (activation output of the second layer): {a2}")

# Output layer (Dense 10 -> Softmax)
z3 = np.dot(a2, W3) + b3
exp_scores = np.exp(z3)
train_predictions = exp_scores / np.sum(exp_scores)  # Softmax activation
print(f"z3 (pre-activation output of the output layer): {z3}")
print(f"train_predictions (softmax probabilities): {train_predictions}")

# Get the predicted class
predicted_train_class = np.argmax(train_predictions)
print(f"The predicted class for the first training image (with initial weights) is: {predicted_train_class}")

# Print the actual class (as an integer index)
actual_class = np.argmax(y_train[0])
print(f"The actual class for the first training image is: {actual_class}")

# Compute the cross-entropy loss
# Cross-Entropy Loss = -log(p[true_class])
cross_entropy_loss = -np.log(train_predictions[actual_class])
print(f"Cross-Entropy Loss for the first training image: {cross_entropy_loss}")


Weights of the first hidden layer (W1):
[[ 0.04874337  0.02213094 -0.01922703 ...  0.06163257 -0.03144919
   0.0341397 ]
 [ 0.02751227  0.02611049 -0.00524846 ...  0.03215915 -0.03102985
  -0.00401989]
 [-0.01754742  0.05107565 -0.00734604 ...  0.03353318 -0.01523611
   0.02394588]
 ...
 [ 0.02318398  0.01186018  0.00903605 ...  0.03019822  0.02065135
  -0.05716718]
 [ 0.05829592  0.06271122  0.00899053 ...  0.01005762 -0.01094808
  -0.01063794]
 [-0.01252478 -0.05149334 -0.05023279 ...  0.0500096  -0.00848199
  -0.03685535]]
Flattened input image: [0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.       

**RESULTS FOR FORWARD PROP IN A BATCH**

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the images to a pixel value range of 0 to 1
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# One-hot encode the labels
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Load weights and biases directly from .npy files
W1 = np.load("/content/drive/MyDrive/Untitled_folder/hidden1_weights.npy")
b1 = np.load("/content/drive/MyDrive/Untitled_folder/hidden1_biases.npy")
W2 = np.load("/content/drive/MyDrive/Untitled_folder/hidden2_weights.npy")
b2 = np.load("/content/drive/MyDrive/Untitled_folder/hidden2_biases.npy")
W3 = np.load("/content/drive/MyDrive/Untitled_folder/output_weights.npy")
b3 = np.load("/content/drive/MyDrive/Untitled_folder/output_biases.npy")

# Forward propagation for the first 5 images
for i in range(5):
    # Flatten the image
    image = x_train[i:i+1].flatten()

    # First layer (Dense 512 -> ReLU)
    z1 = np.dot(image, W1) + b1
    a1 = np.maximum(0, z1)  # ReLU activation

    # Second layer (Dense 256 -> ReLU)
    z2 = np.dot(a1, W2) + b2
    a2 = np.maximum(0, z2)  # ReLU activation

    # Output layer (Dense 10 -> Softmax)
    z3 = np.dot(a2, W3) + b3
    exp_scores = np.exp(z3)
    predictions = exp_scores / np.sum(exp_scores)  # Softmax activation

    # Get the predicted class
    predicted_class = np.argmax(predictions)

    # Print the actual class (as an integer index)
    actual_class = np.argmax(y_train[i])

    # Compute the cross-entropy loss
    cross_entropy_loss = -np.log(predictions[actual_class])

    # Print results for the current image
    print(f"Image {i+1}:")
    print(f"  Actual class: {actual_class}")
    print(f"  Predicted class: {predicted_class}")
    print(f"  Cross-Entropy Loss: {cross_entropy_loss}\n")


Image 1:
  Actual class: 5
  Predicted class: 4
  Cross-Entropy Loss: 2.0678915977478027

Image 2:
  Actual class: 0
  Predicted class: 4
  Cross-Entropy Loss: 2.614464282989502

Image 3:
  Actual class: 4
  Predicted class: 4
  Cross-Entropy Loss: 2.0395843982696533

Image 4:
  Actual class: 1
  Predicted class: 5
  Cross-Entropy Loss: 2.2978529930114746

Image 5:
  Actual class: 9
  Predicted class: 4
  Cross-Entropy Loss: 2.650764226913452

