In [1]:
# importing required modules
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import fashion_mnist

2024-10-22 14:04:38.094681: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import ssl

try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    # Legacy Python that doesn't verify HTTPS certificates by default
    pass
else:
    # Handle target environment that doesn't support HTTPS verification
    ssl._create_default_https_context = _create_unverified_https_context

In [3]:
# loading data
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [4]:
# checking if all data is loaded properly
assert x_train.shape == (60000, 28, 28)
assert x_test.shape == (10000, 28, 28)
assert y_train.shape == (60000,)
assert y_test.shape == (10000,)

In [5]:
# flattening and normalizing the input data
x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

In [6]:
# one-hot encoding (creating an identity matrix corresponding to the number of categories)
def one_hot_encode(y, num_classes):
    return np.eye(num_classes)[y]

In [7]:
# applying one-hot encoding (creating 10x10 matrix for the 10 categories of digits)
y_train_encoded = one_hot_encode(y_train, 10)
y_test_encoded = one_hot_encode(y_test, 10)

In [8]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01, l1_lambda=0.0, l2_lambda=0.0):
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))
        self.learning_rate = learning_rate
        self.l1_lambda = l1_lambda
        self.l2_lambda = l2_lambda

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = np.tanh(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.softmax(self.z2)
        return self.a2

    def backward(self, X, y, output):
        m = X.shape[0]
        output_error = output - y
        hidden_error = np.dot(output_error, self.W2.T) * (1 - np.tanh(self.z1) ** 2)

        dW2 = np.dot(self.a1.T, output_error) / m
        db2 = np.sum(output_error, axis=0, keepdims=True) / m
        dW1 = np.dot(X.T, hidden_error) / m
        db1 = np.sum(hidden_error, axis=0, keepdims=True) / m

        # L1 regularization
        dW1 += self.l1_lambda * np.sign(self.W1)
        dW2 += self.l1_lambda * np.sign(self.W2)

        # L2 regularization
        dW1 += self.l2_lambda * self.W1
        dW2 += self.l2_lambda * self.W2

        self.W1 -= self.learning_rate * dW1
        self.b1 -= self.learning_rate * db1
        self.W2 -= self.learning_rate * dW2
        self.b2 -= self.learning_rate * db2

    def train(self, X, y, epochs):
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, y, output)
            if epoch % 10 == 0:
                loss = -np.mean(np.sum(y * np.log(output + 1e-10), axis=1))
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

In [31]:
# Hyperparameters
input_size = 784  # 28x28 pixels
hidden_size = 64  # Number of neurons in the hidden layer
output_size = 10   # 10 classes for digits 0-9
learning_rate = 0.5
epochs = 1000

In [32]:
# Train without regularization
nn_no_reg = NeuralNetwork(input_size, hidden_size, output_size, learning_rate)
nn_no_reg.train(x_train, y_train_encoded, epochs)
predictions_no_reg = nn_no_reg.predict(x_test)
accuracy_no_reg = np.mean(predictions_no_reg == y_test)
print(f"Test Accuracy (No Regularization): {accuracy_no_reg:.4f}")

Epoch 0, Loss: 2.3036
Epoch 10, Loss: 1.4640
Epoch 20, Loss: 1.1372
Epoch 30, Loss: 0.9007
Epoch 40, Loss: 0.8005
Epoch 50, Loss: 0.7401
Epoch 60, Loss: 0.7060
Epoch 70, Loss: 0.6485
Epoch 80, Loss: 0.6386
Epoch 90, Loss: 0.6293
Epoch 100, Loss: 0.6367
Epoch 110, Loss: 0.5739
Epoch 120, Loss: 0.5715
Epoch 130, Loss: 0.5689
Epoch 140, Loss: 0.5551
Epoch 150, Loss: 0.5363
Epoch 160, Loss: 0.5702
Epoch 170, Loss: 0.5130
Epoch 180, Loss: 0.5215
Epoch 190, Loss: 0.4941
Epoch 200, Loss: 0.5072
Epoch 210, Loss: 0.5207
Epoch 220, Loss: 0.5057
Epoch 230, Loss: 0.5341
Epoch 240, Loss: 0.4628
Epoch 250, Loss: 0.5164
Epoch 260, Loss: 0.5035
Epoch 270, Loss: 0.4952
Epoch 280, Loss: 0.4874
Epoch 290, Loss: 0.4507
Epoch 300, Loss: 0.4703
Epoch 310, Loss: 0.4519
Epoch 320, Loss: 0.4511
Epoch 330, Loss: 0.4561
Epoch 340, Loss: 0.4330
Epoch 350, Loss: 0.4435
Epoch 360, Loss: 0.4331
Epoch 370, Loss: 0.4300
Epoch 380, Loss: 0.4269
Epoch 390, Loss: 0.4354
Epoch 400, Loss: 0.4240
Epoch 410, Loss: 0.4367
Epo

In [35]:
# Train with L1 regularization
nn_l1 = NeuralNetwork(input_size, hidden_size, output_size, learning_rate, l1_lambda=0.0001)
nn_l1.train(x_train, y_train_encoded, epochs)
predictions_l1 = nn_l1.predict(x_test)
accuracy_l1 = np.mean(predictions_l1 == y_test)
print(f"Test Accuracy (L1 Regularization): {accuracy_l1:.4f}")

Epoch 0, Loss: 2.3043
Epoch 10, Loss: 1.4979
Epoch 20, Loss: 1.3549
Epoch 30, Loss: 0.9169
Epoch 40, Loss: 0.8239
Epoch 50, Loss: 0.7550
Epoch 60, Loss: 0.7145
Epoch 70, Loss: 0.6763
Epoch 80, Loss: 0.6220
Epoch 90, Loss: 0.6186
Epoch 100, Loss: 0.6110
Epoch 110, Loss: 0.6124
Epoch 120, Loss: 0.5658
Epoch 130, Loss: 0.5759
Epoch 140, Loss: 0.5551
Epoch 150, Loss: 0.6335
Epoch 160, Loss: 0.5890
Epoch 170, Loss: 0.5341
Epoch 180, Loss: 0.5482
Epoch 190, Loss: 0.5729
Epoch 200, Loss: 0.5491
Epoch 210, Loss: 0.5370
Epoch 220, Loss: 0.5387
Epoch 230, Loss: 0.5760
Epoch 240, Loss: 0.5319
Epoch 250, Loss: 0.5087
Epoch 260, Loss: 0.5137
Epoch 270, Loss: 0.5334
Epoch 280, Loss: 0.4922
Epoch 290, Loss: 0.4943
Epoch 300, Loss: 0.4887
Epoch 310, Loss: 0.5201
Epoch 320, Loss: 0.5219
Epoch 330, Loss: 0.5269
Epoch 340, Loss: 0.4628
Epoch 350, Loss: 0.4821
Epoch 360, Loss: 0.4500
Epoch 370, Loss: 0.4525
Epoch 380, Loss: 0.4500
Epoch 390, Loss: 0.4563
Epoch 400, Loss: 0.4605
Epoch 410, Loss: 0.4960
Epo

In [9]:
# Hyperparameters
input_size = 784  # 28x28 pixels
hidden_size = 64  # Number of neurons in the hidden layer
output_size = 10   # 10 classes for digits 0-9
learning_rate = 0.5
epochs = 10000

In [None]:
# Train without regularization
nn_no_reg = NeuralNetwork(input_size, hidden_size, output_size, learning_rate)
nn_no_reg.train(x_train, y_train_encoded, epochs)
predictions_no_reg = nn_no_reg.predict(x_test)
accuracy_no_reg = np.mean(predictions_no_reg == y_test)
print(f"Test Accuracy (No Regularization): {accuracy_no_reg:.4f}")

Epoch 0, Loss: 2.3030
Epoch 10, Loss: 1.4539
Epoch 20, Loss: 1.0136
Epoch 30, Loss: 0.9170
Epoch 40, Loss: 1.0347
Epoch 50, Loss: 0.7169
Epoch 60, Loss: 0.6892
Epoch 70, Loss: 0.6600
Epoch 80, Loss: 0.6736
Epoch 90, Loss: 0.6189
Epoch 100, Loss: 0.5884
Epoch 110, Loss: 0.6606
Epoch 120, Loss: 0.5640
Epoch 130, Loss: 0.5651
Epoch 140, Loss: 0.5342
Epoch 150, Loss: 0.5563
Epoch 160, Loss: 0.5256
Epoch 170, Loss: 0.5370
Epoch 180, Loss: 0.5281
Epoch 190, Loss: 0.5351
Epoch 200, Loss: 0.5040
Epoch 210, Loss: 0.5469
Epoch 220, Loss: 0.5462
Epoch 230, Loss: 0.5054
Epoch 240, Loss: 0.4909
Epoch 250, Loss: 0.4669
Epoch 260, Loss: 0.5071
Epoch 270, Loss: 0.4848
Epoch 280, Loss: 0.4628
Epoch 290, Loss: 0.4530
Epoch 300, Loss: 0.4627
Epoch 310, Loss: 0.4746
Epoch 320, Loss: 0.4594
Epoch 330, Loss: 0.4359
Epoch 340, Loss: 0.4545
Epoch 350, Loss: 0.4546
Epoch 360, Loss: 0.4380
Epoch 370, Loss: 0.4422
Epoch 380, Loss: 0.4524
Epoch 390, Loss: 0.4175
Epoch 400, Loss: 0.4438
Epoch 410, Loss: 0.4384
Epo

In [None]:
# Evaluate the model
predictions = nn.predict(x_test)
accuracy = np.mean(predictions == y_test)
print(f"Test Accuracy: {accuracy:.4f}")

In [None]:
# Visualize some predictions
def plot_predictions(X, y_true, y_pred, num_images=10):
    plt.figure(figsize=(10, 5))
    for i in range(num_images):
        plt.subplot(2, 5, i + 1)
        plt.imshow(X[i].reshape(28, 28), cmap='gray')
        plt.title(f'True: {y_true[i]}\nPred: {y_pred[i]}')
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Visualize predictions on test set
plot_predictions(x_test, y_test, predictions)