In [43]:
import numpy as np

class DenseLayer:
    def __init__(self, input_dim, output_dim):
        # Initialize weights and biases
        self.weights = np.random.randn(input_dim, output_dim) * 0.01
        self.bias = np.zeros((1, output_dim))
        self.input = None  # To store the input for backward pass
        self.d_weights = None  # To store gradients for weights
        self.d_bias = None    # To store gradients for bias

    def forward(self, X):
        # Save the input for the backward pass
        self.input = X
        # Compute the output
        return np.dot(X, self.weights) + self.bias

    def backward(self, d_out, learning_rate):
        # Gradient of the loss with respect to the weights and bias
        self.d_weights = np.dot(self.input.T, d_out)
        self.d_bias = np.sum(d_out, axis=0, keepdims=True)

        # Gradient of the loss with respect to the input
        d_input = np.dot(d_out, self.weights.T)

        return d_input

In [44]:
class ReLU:
    def forward(self, X):
        # ReLU forward pass
        self.input = X
        return np.maximum(0, X)

    def backward(self, d_out):
        # Gradient for ReLU
        return d_out * (self.input > 0)

In [45]:
class BatchNormalization:
    def __init__(self, dim, epsilon=1e-5, momentum=0.9):
        self.gamma = np.ones(dim)
        self.beta = np.zeros(dim)
        self.epsilon = epsilon
        self.momentum = momentum
        self.running_mean = np.zeros(dim)
        self.running_var = np.zeros(dim)
        self.input = None  # To store the input for the backward pass

    def forward(self, X, training=True):
        if training:
            # Calculate batch statistics
            self.mean = np.mean(X, axis=0)
            self.var = np.var(X, axis=0)
            self.input = X

            # Normalize the input
            self.X_norm = (X - self.mean) / np.sqrt(self.var + self.epsilon)
            out = self.gamma * self.X_norm + self.beta

            # Update running statistics
            self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * self.mean
            self.running_var = self.momentum * self.running_var + (1 - self.momentum) * self.var
        else:
            # Use running statistics for inference
            X_norm = (X - self.running_mean) / np.sqrt(self.running_var + self.epsilon)
            out = self.gamma * X_norm + self.beta
        
        return out

    def backward(self, d_out, learning_rate):
        # Gradient computation for batch normalization
        N, D = d_out.shape

        # Intermediate values from forward pass
        X_mu = self.input - self.mean
        std_inv = 1. / np.sqrt(self.var + self.epsilon)

        dX_norm = d_out * self.gamma
        dvar = np.sum(dX_norm * X_mu, axis=0) * -0.5 * std_inv**3
        dmean = np.sum(dX_norm * -std_inv, axis=0) + dvar * np.mean(-2. * X_mu, axis=0)

        # Gradient w.r.t. input
        dX = (dX_norm * std_inv) + (dvar * 2 * X_mu / N) + (dmean / N)
        self.gamma -= learning_rate * np.sum(d_out * self.X_norm, axis=0)
        self.beta -= learning_rate * np.sum(d_out, axis=0)
        
        return dX


In [46]:
class Dropout:
    def __init__(self, dropout_rate):
        self.dropout_rate = dropout_rate

    def forward(self, X, training=True):
        if training:
            # Create dropout mask
            self.mask = np.random.rand(*X.shape) > self.dropout_rate
            return X * self.mask / (1 - self.dropout_rate)
        else:
            # During inference, do nothing
            return X

    def backward(self, d_out):
        # Apply dropout mask to gradient
        return d_out * self.mask / (1 - self.dropout_rate)


In [47]:
class AdamOptimizer:
    def __init__(self, learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = {}  # First moment vector
        self.v = {}  # Second moment vector
        self.t = 0   # Time step

    def update(self, params, grads):
        self.t += 1
        updated_params = {}

        for key in params:
            # Initialize moments if not already done
            if key not in self.m:
                self.m[key] = [np.zeros_like(grads[key][0]),np.zeros_like(grads[key][1])]
                self.v[key] = [np.zeros_like(grads[key][0]),np.zeros_like(grads[key][1])]

            # Update biased first moment estimate
            self.m[key][0] = self.beta1 * self.m[key][0] + (1 - self.beta1) * grads[key][0]
            self.m[key][1] = self.beta1 * self.m[key][1] + (1 - self.beta1) * grads[key][1]

            # Update biased second raw moment estimate
            self.v[key][0] = self.beta2 * self.v[key][0] + (1 - self.beta2) * (grads[key][0] ** 2)
            self.v[key][1] = self.beta2 * self.v[key][1] + (1 - self.beta2) * (grads[key][1] ** 2)

            # Compute bias-corrected first and second moment estimates
            m_hat_weights = self.m[key][0] / (1 - self.beta1 ** self.t)
            m_hat_bias = self.m[key][1] / (1 - self.beta1 ** self.t)
            v_hat_weights = self.v[key][0] / (1 - self.beta2 ** self.t)
            v_hat_bias = self.v[key][1] / (1 - self.beta2 ** self.t)

            # Update parameters
            updated_params[key] =[params[key][0] - self.learning_rate * m_hat_weights / (np.sqrt(v_hat_weights) + self.epsilon),
                                    params[key][1] - self.learning_rate * m_hat_bias / (np.sqrt(v_hat_bias) + self.epsilon)]

        return updated_params



In [48]:
class Softmax:
    def forward(self, X):
        exps = np.exp(X - np.max(X, axis=1, keepdims=True))
        self.output = exps / np.sum(exps, axis=1, keepdims=True)
        return self.output

    def backward(self, d_out):
        return self.output * (d_out - np.sum(d_out * self.output, axis=1, keepdims=True))


In [49]:
class FeedForwardNeuralNetwork:
    def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate):
        # Create a list of layers
        self.layers = []
        
        # Add first Dense layer
        prev_dim = input_dim
        for hidden_dim in hidden_dims:
            self.layers.append(DenseLayer(prev_dim, hidden_dim))
            self.layers.append(BatchNormalization(hidden_dim))
            self.layers.append(ReLU())
            self.layers.append(Dropout(dropout_rate))
            prev_dim = hidden_dim
        
        # Add final Dense layer
        self.layers.append(DenseLayer(prev_dim, output_dim))
        self.layers.append(Softmax())

    def forward(self, X, training=True):
        # Forward pass through all layers
        for layer in self.layers:
            if isinstance(layer, Dropout):
                X = layer.forward(X, training)
            elif isinstance(layer, BatchNormalization):
                X = layer.forward(X, training)
            else:
                X = layer.forward(X)
        return X

    def backward(self, d_out, learning_rate):
        # Backward pass through all layers in reverse order
        for layer in reversed(self.layers):
            if isinstance(layer, (DenseLayer, BatchNormalization)):
                d_out = layer.backward(d_out, learning_rate)
            elif isinstance(layer, (ReLU, Dropout, Softmax)):
                d_out = layer.backward(d_out)

    def update_params(self, adam_optimizer):
        # Prepare parameter and gradient dictionaries
        params = {}
        grads = {}
        
        for layer in self.layers:
            if isinstance(layer, DenseLayer):
                # Store weights and biases in params dictionary
                params[id(layer)] = [layer.weights,layer.bias]
                
                # Store gradients for weights and biases in grads dictionary
                grads[id(layer)] = [layer.d_weights,layer.d_bias]
        
        # Update parameters using Adam optimizer
        updated_params = adam_optimizer.update(params, grads)

        # Update the layers with the new parameters
        for layer in self.layers:
            if isinstance(layer, DenseLayer):
                layer.weights = updated_params[id(layer)][0]
                layer.bias = updated_params[id(layer)][1]


In [50]:
from torchvision import datasets, transforms

# Define transformation
transform = transforms.ToTensor()

# Load the training dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)

# Load the test dataset separately
test_dataset = datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)

In [51]:
from torch.utils.data import DataLoader
import tqdm

# Hyperparameters
input_dim = 28 * 28  # Image size (28x28)
hidden_dims = [128, 64]  # Hidden layers
output_dim = 10  # Number of classes (0-9)
dropout_rate = 0.2
learning_rate = 0.001
batch_size = 64
num_epochs = 40

# Initialize the model
model = FeedForwardNeuralNetwork(input_dim, hidden_dims, output_dim, dropout_rate)
adam_optimizer = AdamOptimizer(learning_rate)

# DataLoader for training
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Training loop
for epoch in range(num_epochs):
    epoch_loss = 0
    correct = 0
    total = 0

    for images, labels in tqdm.tqdm(train_loader):
        # Flatten images
        images = images.view(-1, 28 * 28).numpy()

        # Forward pass
        outputs = model.forward(images, training=True)
        
        # One-hot encoding for labels
        one_hot_labels = np.eye(output_dim)[labels.numpy()]

        # Compute loss (cross-entropy loss)
        loss = -np.sum(one_hot_labels * np.log(outputs + 1e-8)) / len(labels)
        epoch_loss += loss

        # Backward pass
        d_out = outputs - np.eye(output_dim)[labels.numpy()]
        model.backward(d_out, learning_rate)

        # Update parameters
        model.update_params(adam_optimizer)

        # Calculate accuracy
        predictions = np.argmax(outputs, axis=1)
        correct += (predictions == labels.numpy()).sum()
        total += labels.size(0)

    # Display epoch results
    print("total: ",total,"correct: ",correct)
    epoch_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')


  0%|          | 4/938 [00:00<00:31, 29.92it/s]

100%|██████████| 938/938 [00:16<00:00, 57.09it/s]


total:  60000 correct:  47815
Epoch [1/40], Loss: 626.5752, Accuracy: 0.7969


100%|██████████| 938/938 [00:23<00:00, 39.44it/s]


total:  60000 correct:  51083
Epoch [2/40], Loss: 419.0253, Accuracy: 0.8514


100%|██████████| 938/938 [00:14<00:00, 65.43it/s]


total:  60000 correct:  51779
Epoch [3/40], Loss: 385.3202, Accuracy: 0.8630


100%|██████████| 938/938 [00:14<00:00, 65.82it/s]


total:  60000 correct:  52125
Epoch [4/40], Loss: 366.2263, Accuracy: 0.8688


100%|██████████| 938/938 [00:18<00:00, 51.65it/s]


total:  60000 correct:  52426
Epoch [5/40], Loss: 353.7467, Accuracy: 0.8738


100%|██████████| 938/938 [00:15<00:00, 61.07it/s]


total:  60000 correct:  52765
Epoch [6/40], Loss: 339.6976, Accuracy: 0.8794


100%|██████████| 938/938 [00:14<00:00, 62.97it/s]


total:  60000 correct:  52962
Epoch [7/40], Loss: 332.4082, Accuracy: 0.8827


100%|██████████| 938/938 [00:15<00:00, 60.71it/s]


total:  60000 correct:  53087
Epoch [8/40], Loss: 324.8517, Accuracy: 0.8848


100%|██████████| 938/938 [00:18<00:00, 50.17it/s]


total:  60000 correct:  53227
Epoch [9/40], Loss: 319.2861, Accuracy: 0.8871


100%|██████████| 938/938 [00:18<00:00, 51.04it/s]


total:  60000 correct:  53453
Epoch [10/40], Loss: 311.0922, Accuracy: 0.8909


100%|██████████| 938/938 [00:19<00:00, 48.88it/s]


total:  60000 correct:  53549
Epoch [11/40], Loss: 306.7547, Accuracy: 0.8925


100%|██████████| 938/938 [00:13<00:00, 67.30it/s]


total:  60000 correct:  53646
Epoch [12/40], Loss: 302.9173, Accuracy: 0.8941


100%|██████████| 938/938 [00:15<00:00, 61.61it/s]


total:  60000 correct:  53813
Epoch [13/40], Loss: 296.8247, Accuracy: 0.8969


100%|██████████| 938/938 [00:20<00:00, 44.85it/s]


total:  60000 correct:  53914
Epoch [14/40], Loss: 292.9289, Accuracy: 0.8986


100%|██████████| 938/938 [00:17<00:00, 52.64it/s]


total:  60000 correct:  54012
Epoch [15/40], Loss: 290.8124, Accuracy: 0.9002


100%|██████████| 938/938 [00:22<00:00, 41.42it/s]


total:  60000 correct:  54128
Epoch [16/40], Loss: 284.0946, Accuracy: 0.9021


100%|██████████| 938/938 [00:18<00:00, 50.67it/s]


total:  60000 correct:  54099
Epoch [17/40], Loss: 286.5020, Accuracy: 0.9016


100%|██████████| 938/938 [00:14<00:00, 63.22it/s]


total:  60000 correct:  54262
Epoch [18/40], Loss: 280.4582, Accuracy: 0.9044


100%|██████████| 938/938 [00:15<00:00, 60.15it/s]


total:  60000 correct:  54335
Epoch [19/40], Loss: 277.9931, Accuracy: 0.9056


100%|██████████| 938/938 [00:14<00:00, 63.07it/s]


total:  60000 correct:  54403
Epoch [20/40], Loss: 275.4207, Accuracy: 0.9067


100%|██████████| 938/938 [00:14<00:00, 64.55it/s]


total:  60000 correct:  54398
Epoch [21/40], Loss: 274.4675, Accuracy: 0.9066


100%|██████████| 938/938 [00:14<00:00, 63.25it/s]


total:  60000 correct:  54425
Epoch [22/40], Loss: 272.7300, Accuracy: 0.9071


100%|██████████| 938/938 [00:14<00:00, 63.72it/s]


total:  60000 correct:  54593
Epoch [23/40], Loss: 269.5281, Accuracy: 0.9099


100%|██████████| 938/938 [00:14<00:00, 62.84it/s]


total:  60000 correct:  54635
Epoch [24/40], Loss: 266.3342, Accuracy: 0.9106


100%|██████████| 938/938 [00:14<00:00, 64.73it/s]


total:  60000 correct:  54654
Epoch [25/40], Loss: 265.7343, Accuracy: 0.9109


100%|██████████| 938/938 [00:16<00:00, 58.30it/s]


total:  60000 correct:  54759
Epoch [26/40], Loss: 263.1360, Accuracy: 0.9126


100%|██████████| 938/938 [00:14<00:00, 64.48it/s]


total:  60000 correct:  54769
Epoch [27/40], Loss: 261.6918, Accuracy: 0.9128


100%|██████████| 938/938 [00:17<00:00, 53.00it/s]


total:  60000 correct:  54784
Epoch [28/40], Loss: 261.4109, Accuracy: 0.9131


100%|██████████| 938/938 [00:14<00:00, 66.21it/s]


total:  60000 correct:  54927
Epoch [29/40], Loss: 253.9948, Accuracy: 0.9154


100%|██████████| 938/938 [00:14<00:00, 66.20it/s]


total:  60000 correct:  54920
Epoch [30/40], Loss: 257.2245, Accuracy: 0.9153


100%|██████████| 938/938 [00:14<00:00, 66.15it/s]


total:  60000 correct:  55050
Epoch [31/40], Loss: 250.6792, Accuracy: 0.9175


100%|██████████| 938/938 [00:14<00:00, 66.01it/s]


total:  60000 correct:  54962
Epoch [32/40], Loss: 253.2972, Accuracy: 0.9160


100%|██████████| 938/938 [00:14<00:00, 66.73it/s]


total:  60000 correct:  55135
Epoch [33/40], Loss: 250.6781, Accuracy: 0.9189


100%|██████████| 938/938 [00:15<00:00, 61.92it/s]


total:  60000 correct:  55112
Epoch [34/40], Loss: 247.5781, Accuracy: 0.9185


100%|██████████| 938/938 [00:14<00:00, 66.16it/s]


total:  60000 correct:  55182
Epoch [35/40], Loss: 246.3319, Accuracy: 0.9197


100%|██████████| 938/938 [00:14<00:00, 66.68it/s]


total:  60000 correct:  55260
Epoch [36/40], Loss: 247.2262, Accuracy: 0.9210


100%|██████████| 938/938 [00:14<00:00, 66.86it/s]


total:  60000 correct:  55240
Epoch [37/40], Loss: 242.8576, Accuracy: 0.9207


100%|██████████| 938/938 [00:15<00:00, 62.00it/s]


total:  60000 correct:  55337
Epoch [38/40], Loss: 243.0136, Accuracy: 0.9223


100%|██████████| 938/938 [00:14<00:00, 66.67it/s]


total:  60000 correct:  55320
Epoch [39/40], Loss: 241.5300, Accuracy: 0.9220


100%|██████████| 938/938 [00:14<00:00, 64.77it/s]

total:  60000 correct:  55404
Epoch [40/40], Loss: 241.6243, Accuracy: 0.9234





In [52]:
from sklearn.metrics import accuracy_score

# DataLoader for testing
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Testing phase
model_accuracy = 0
model_total = 0
all_predictions = []
all_true_labels = []

for images, labels in test_loader:
    # Flatten images
    images = images.view(-1, 28 * 28).numpy()

    # Forward pass (inference mode)
    outputs = model.forward(images, training=False)
    predictions = np.argmax(outputs, axis=1)

    # Collect results
    all_predictions.extend(predictions)
    all_true_labels.extend(labels.numpy())

# Calculate accuracy using sklearn
accuracy = accuracy_score(all_true_labels, all_predictions)
print(f'Test Accuracy: {accuracy:.4f}')


Test Accuracy: 0.8855


In [53]:
import pickle

# Save the trained model to a file
with open('fashion_mnist_model.pkl', 'wb') as file:
    pickle.dump(model, file)
print("Model saved successfully!")


Model saved successfully!


In [54]:
# Load the model from the file
with open('fashion_mnist_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)
print("Model loaded successfully!")


Model loaded successfully!


In [55]:
# Step 3: Run the model on the test data
all_predictions = []
all_labels = []
import torch

# Disable gradient computation for testing
with torch.no_grad():
    for images, labels in test_loader:
        # Flatten the images
        images = images.view(images.size(0), -1).numpy()

        # Forward pass through the loaded model
        outputs = loaded_model.forward(images)

        # Get the predicted class (highest probability)
        predictions = np.argmax(outputs, axis=1)

        # Collect predictions and true labels for accuracy computation
        all_predictions.extend(predictions)
        all_labels.extend(labels.numpy())

# Step 4: Evaluate the model
accuracy = accuracy_score(all_labels, all_predictions)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 87.32%
