In [1]:
import numpy as np

class DenseLayer:
    def __init__(self, input_dim, output_dim):
        # Initialize weights and biases
        self.weights = np.random.randn(input_dim, output_dim) * 0.01
        self.bias = np.zeros((1, output_dim))
        self.input = None  # To store the input for backward pass
        self.d_weights = None  # To store gradients for weights
        self.d_bias = None    # To store gradients for bias

    def forward(self, X):
        # Save the input for the backward pass
        self.input = X
        # Compute the output
        return np.dot(X, self.weights) + self.bias

    def backward(self, d_out, learning_rate):
        # Gradient of the loss with respect to the weights and bias
        self.d_weights = np.dot(self.input.T, d_out)
        self.d_bias = np.sum(d_out, axis=0, keepdims=True)

        # Gradient of the loss with respect to the input
        d_input = np.dot(d_out, self.weights.T)

        return d_input

    
    


In [2]:
class ReLU:
    def forward(self, X):
        # ReLU forward pass
        self.input = X
        return np.maximum(0, X)

    def backward(self, d_out):
        # Gradient for ReLU
        return d_out * (self.input > 0)

In [3]:
class BatchNormalization:
    def __init__(self, dim, epsilon=1e-5, momentum=0.9):
        self.gamma = np.ones(dim)
        self.beta = np.zeros(dim)
        self.epsilon = epsilon
        self.momentum = momentum
        self.running_mean = np.zeros(dim)
        self.running_var = np.zeros(dim)
        self.input = None  # To store the input for the backward pass

    def forward(self, X, training=True):
        if training:
            # Calculate batch statistics
            self.mean = np.mean(X, axis=0)
            self.var = np.var(X, axis=0)
            self.input = X

            # Normalize the input
            self.X_norm = (X - self.mean) / np.sqrt(self.var + self.epsilon)
            out = self.gamma * self.X_norm + self.beta

            # Update running statistics
            self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * self.mean
            self.running_var = self.momentum * self.running_var + (1 - self.momentum) * self.var
        else:
            # Use running statistics for inference
            X_norm = (X - self.running_mean) / np.sqrt(self.running_var + self.epsilon)
            out = self.gamma * X_norm + self.beta
        
        return out

    def backward(self, d_out, learning_rate):
        # Gradient computation for batch normalization
        N, D = d_out.shape

        # Intermediate values from forward pass
        X_mu = self.input - self.mean
        std_inv = 1. / np.sqrt(self.var + self.epsilon)

        dX_norm = d_out * self.gamma
        dvar = np.sum(dX_norm * X_mu, axis=0) * -0.5 * std_inv**3
        dmean = np.sum(dX_norm * -std_inv, axis=0) + dvar * np.mean(-2. * X_mu, axis=0)

        # Gradient w.r.t. input
        dX = (dX_norm * std_inv) + (dvar * 2 * X_mu / N) + (dmean / N)
        self.gamma -= learning_rate * np.sum(d_out * self.X_norm, axis=0)
        self.beta -= learning_rate * np.sum(d_out, axis=0)
        
        return dX


In [4]:
class Dropout:
    def __init__(self, dropout_rate):
        self.dropout_rate = dropout_rate

    def forward(self, X, training=True):
        if training:
            # Create dropout mask
            self.mask = np.random.rand(*X.shape) > self.dropout_rate
            return X * self.mask / (1 - self.dropout_rate)
        else:
            # During inference, do nothing
            return X

    def backward(self, d_out):
        # Apply dropout mask to gradient
        return d_out * self.mask / (1 - self.dropout_rate)


In [5]:
class AdamOptimizer:
    def __init__(self, learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = {}  # First moment vector
        self.v = {}  # Second moment vector
        self.t = 0   # Time step

    def update(self, params, grads):
        self.t += 1
        updated_params = {}

        for key in params:
            # Initialize moments if not already done
            if key not in self.m:
                self.m[key] = {'weights': np.zeros_like(grads[key]['weights']),
                               'bias': np.zeros_like(grads[key]['bias'])}
                self.v[key] = {'weights': np.zeros_like(grads[key]['weights']),
                               'bias': np.zeros_like(grads[key]['bias'])}

            # Update biased first moment estimate
            self.m[key]['weights'] = self.beta1 * self.m[key]['weights'] + (1 - self.beta1) * grads[key]['weights']
            self.m[key]['bias'] = self.beta1 * self.m[key]['bias'] + (1 - self.beta1) * grads[key]['bias']

            # Update biased second raw moment estimate
            self.v[key]['weights'] = self.beta2 * self.v[key]['weights'] + (1 - self.beta2) * (grads[key]['weights'] ** 2)
            self.v[key]['bias'] = self.beta2 * self.v[key]['bias'] + (1 - self.beta2) * (grads[key]['bias'] ** 2)

            # Compute bias-corrected first and second moment estimates
            m_hat_weights = self.m[key]['weights'] / (1 - self.beta1 ** self.t)
            m_hat_bias = self.m[key]['bias'] / (1 - self.beta1 ** self.t)
            v_hat_weights = self.v[key]['weights'] / (1 - self.beta2 ** self.t)
            v_hat_bias = self.v[key]['bias'] / (1 - self.beta2 ** self.t)

            # Update parameters
            updated_params[key] = {
                'weights': params[key]['weights'] - self.learning_rate * m_hat_weights / (np.sqrt(v_hat_weights) + self.epsilon),
                'bias': params[key]['bias'] - self.learning_rate * m_hat_bias / (np.sqrt(v_hat_bias) + self.epsilon)
            }

        return updated_params



In [6]:
class Softmax:
    def forward(self, X):
        exps = np.exp(X - np.max(X, axis=1, keepdims=True))
        self.output = exps / np.sum(exps, axis=1, keepdims=True)
        return self.output

    def backward(self, d_out):
        return self.output * (d_out - np.sum(d_out * self.output, axis=1, keepdims=True))


In [7]:
class FeedForwardNeuralNetwork:
    def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate):
        # Create a list of layers
        self.layers = []
        
        # Add first Dense layer
        prev_dim = input_dim
        for hidden_dim in hidden_dims:
            self.layers.append(DenseLayer(prev_dim, hidden_dim))
            self.layers.append(BatchNormalization(hidden_dim))
            self.layers.append(ReLU())
            self.layers.append(Dropout(dropout_rate))
            prev_dim = hidden_dim
        
        # Add final Dense layer
        self.layers.append(DenseLayer(prev_dim, output_dim))
        self.layers.append(Softmax())

    def forward(self, X, training=True):
        # Forward pass through all layers
        for layer in self.layers:
            if isinstance(layer, Dropout):
                X = layer.forward(X, training)
            elif isinstance(layer, BatchNormalization):
                X = layer.forward(X, training)
            else:
                X = layer.forward(X)
        return X

    def backward(self, d_out, learning_rate):
        # Backward pass through all layers in reverse order
        for layer in reversed(self.layers):
            if isinstance(layer, (DenseLayer, BatchNormalization)):
                d_out = layer.backward(d_out, learning_rate)
            elif isinstance(layer, (ReLU, Dropout, Softmax)):
                d_out = layer.backward(d_out)

    def update_params(self, adam_optimizer):
        # Prepare parameter and gradient dictionaries
        params = {}
        grads = {}
        
        for layer in self.layers:
            if isinstance(layer, DenseLayer):
                # Store weights and biases in params dictionary
                params[id(layer)] = {'weights': layer.weights, 'bias': layer.bias}
                
                # Store gradients for weights and biases in grads dictionary
                grads[id(layer)] = {'weights': layer.d_weights, 'bias': layer.d_bias}
        
        # Update parameters using Adam optimizer
        updated_params = adam_optimizer.update(params, grads)

        # Update the layers with the new parameters
        for layer in self.layers:
            if isinstance(layer, DenseLayer):
                layer.weights = updated_params[id(layer)]['weights']
                layer.bias = updated_params[id(layer)]['bias']


In [8]:
from torchvision import datasets, transforms

# Define transformation
transform = transforms.ToTensor()

# Load the training dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)

# Load the test dataset separately
test_dataset = datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)

In [9]:
from torch.utils.data import DataLoader
import tqdm

# Hyperparameters
input_dim = 28 * 28  # Image size (28x28)
hidden_dims = [128, 64]  # Hidden layers
output_dim = 10  # Number of classes (0-9)
dropout_rate = 0.2
learning_rate = 0.001
batch_size = 64
num_epochs = 10

# Initialize the model
model = FeedForwardNeuralNetwork(input_dim, hidden_dims, output_dim, dropout_rate)
adam_optimizer = AdamOptimizer(learning_rate)

# DataLoader for training
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Training loop
for epoch in range(num_epochs):
    epoch_loss = 0
    correct = 0
    total = 0

    for images, labels in tqdm.tqdm(train_loader):
        # Flatten images
        images = images.view(-1, 28 * 28).numpy()

        # Forward pass
        outputs = model.forward(images, training=True)
        
        # One-hot encoding for labels
        one_hot_labels = np.eye(output_dim)[labels.numpy()]

        # Compute loss (cross-entropy loss)
        loss = -np.sum(one_hot_labels * np.log(outputs + 1e-8)) / len(labels)
        epoch_loss += loss

        # Backward pass
        d_out = outputs - np.eye(output_dim)[labels.numpy()]
        model.backward(d_out, learning_rate)

        # Update parameters
        model.update_params(adam_optimizer)

        # Calculate accuracy
        predictions = np.argmax(outputs, axis=1)
        correct += (predictions == labels.numpy()).sum()
        total += labels.size(0)

    # Display epoch results
    epoch_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')


100%|██████████| 938/938 [00:14<00:00, 65.51it/s]


Epoch [1/10], Loss: 632.4594, Accuracy: 0.8001


100%|██████████| 938/938 [00:13<00:00, 67.72it/s]


Epoch [2/10], Loss: 427.4006, Accuracy: 0.8507


100%|██████████| 938/938 [00:18<00:00, 51.75it/s]


Epoch [3/10], Loss: 391.8866, Accuracy: 0.8635


100%|██████████| 938/938 [00:19<00:00, 48.63it/s]


Epoch [4/10], Loss: 369.8415, Accuracy: 0.8692


100%|██████████| 938/938 [00:16<00:00, 58.44it/s]


Epoch [5/10], Loss: 354.7932, Accuracy: 0.8750


100%|██████████| 938/938 [00:13<00:00, 67.43it/s]


Epoch [6/10], Loss: 341.9681, Accuracy: 0.8791


100%|██████████| 938/938 [00:14<00:00, 63.39it/s]


Epoch [7/10], Loss: 335.0120, Accuracy: 0.8811


100%|██████████| 938/938 [00:14<00:00, 64.85it/s]


Epoch [8/10], Loss: 327.1664, Accuracy: 0.8849


100%|██████████| 938/938 [00:14<00:00, 65.12it/s]


Epoch [9/10], Loss: 319.0486, Accuracy: 0.8879


100%|██████████| 938/938 [00:14<00:00, 65.73it/s]

Epoch [10/10], Loss: 313.0244, Accuracy: 0.8882





In [10]:
from sklearn.metrics import accuracy_score

# DataLoader for testing
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Testing phase
model_accuracy = 0
model_total = 0
all_predictions = []
all_true_labels = []

for images, labels in test_loader:
    # Flatten images
    images = images.view(-1, 28 * 28).numpy()

    # Forward pass (inference mode)
    outputs = model.forward(images, training=False)
    predictions = np.argmax(outputs, axis=1)

    # Collect results
    all_predictions.extend(predictions)
    all_true_labels.extend(labels.numpy())

# Calculate accuracy using sklearn
accuracy = accuracy_score(all_true_labels, all_predictions)
print(f'Test Accuracy: {accuracy:.4f}')


Test Accuracy: 0.8741


In [11]:
import pickle

# Save the trained model to a file
with open('fashion_mnist_model.pkl', 'wb') as file:
    pickle.dump(model, file)
print("Model saved successfully!")


Model saved successfully!


In [12]:
# Load the model from the file
with open('fashion_mnist_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)
print("Model loaded successfully!")


Model loaded successfully!


In [13]:
# Step 3: Run the model on the test data
all_predictions = []
all_labels = []
import torch

# Disable gradient computation for testing
with torch.no_grad():
    for images, labels in test_loader:
        # Flatten the images
        images = images.view(images.size(0), -1).numpy()

        # Forward pass through the loaded model
        outputs = loaded_model.forward(images)

        # Get the predicted class (highest probability)
        predictions = np.argmax(outputs, axis=1)

        # Collect predictions and true labels for accuracy computation
        all_predictions.extend(predictions)
        all_labels.extend(labels.numpy())

# Step 4: Evaluate the model
accuracy = accuracy_score(all_labels, all_predictions)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 86.63%
