In [75]:
import numpy as np

class DenseLayer:
    def __init__(self, input_dim, output_dim):
        self.weights = np.random.randn(input_dim, output_dim) * 0.01
        self.bias = np.zeros((1, output_dim))
        self.input = None
        self.d_weights = None
        self.d_bias = None  

    def forward(self, X):
        self.input = X
        return np.dot(X, self.weights) + self.bias

    def backward(self, d_out, learning_rate):
        self.d_weights = np.dot(self.input.T, d_out)
        self.d_bias = np.sum(d_out, axis=0, keepdims=True)
        d_input = np.dot(d_out, self.weights.T)
        return d_input

In [76]:
class ReLU:
    def forward(self, X):
        self.input = X
        return np.maximum(0, X)

    def backward(self, d_out):
        return d_out * (self.input > 0)

In [77]:
class BatchNormalization:
    def __init__(self, dim, epsilon=1e-5, momentum=0.9):
        self.gamma = np.ones(dim)
        self.beta = np.zeros(dim)
        self.epsilon = epsilon
        self.momentum = momentum
        self.running_mean = np.zeros(dim)
        self.running_var = np.zeros(dim)
        self.input = None

    def forward(self, X, training=True):
        if training:
            self.mean = np.mean(X, axis=0)
            self.var = np.var(X, axis=0)
            self.input = X

            self.X_norm = (X - self.mean) / np.sqrt(self.var + self.epsilon)
            out = self.gamma * self.X_norm + self.beta

            self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * self.mean
            self.running_var = self.momentum * self.running_var + (1 - self.momentum) * self.var
        else:
            X_norm = (X - self.running_mean) / np.sqrt(self.running_var + self.epsilon)
            out = self.gamma * X_norm + self.beta
        
        return out

    def backward(self, d_out, learning_rate):
        N, D = d_out.shape

        X_mu = self.input - self.mean
        std_inv = 1. / np.sqrt(self.var + self.epsilon)

        dX_norm = d_out * self.gamma
        dvar = np.sum(dX_norm * X_mu, axis=0) * -0.5 * std_inv**3
        dmean = np.sum(dX_norm * -std_inv, axis=0) + dvar * np.mean(-2. * X_mu, axis=0)

        dX = (dX_norm * std_inv) + (dvar * 2 * X_mu / N) + (dmean / N)
        self.gamma -= learning_rate * np.sum(d_out * self.X_norm, axis=0)
        self.beta -= learning_rate * np.sum(d_out, axis=0)
        
        return dX


In [78]:
class Dropout:
    def __init__(self, dropout_rate):
        self.dropout_rate = dropout_rate

    def forward(self, X, training=True):
        if training:
            self.mask = np.random.rand(*X.shape) > self.dropout_rate
            return X * self.mask / (1 - self.dropout_rate)
        else:
            return X

    def backward(self, d_out):
        return d_out * self.mask / (1 - self.dropout_rate)


In [79]:
class AdamOptimizer:
    def __init__(self, learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = {}
        self.v = {}
        self.t = 0

    def update(self, params, grads):
        self.t += 1
        updated_params = {}

        for key in params:
            if key not in self.m:
                self.m[key] = [np.zeros_like(grads[key][0]),np.zeros_like(grads[key][1])]
                self.v[key] = [np.zeros_like(grads[key][0]),np.zeros_like(grads[key][1])]

            self.m[key][0] = self.beta1 * self.m[key][0] + (1 - self.beta1) * grads[key][0]
            self.m[key][1] = self.beta1 * self.m[key][1] + (1 - self.beta1) * grads[key][1]

            self.v[key][0] = self.beta2 * self.v[key][0] + (1 - self.beta2) * (grads[key][0] ** 2)
            self.v[key][1] = self.beta2 * self.v[key][1] + (1 - self.beta2) * (grads[key][1] ** 2)

            m_hat_weights = self.m[key][0] / (1 - self.beta1 ** self.t)
            m_hat_bias = self.m[key][1] / (1 - self.beta1 ** self.t)
            v_hat_weights = self.v[key][0] / (1 - self.beta2 ** self.t)
            v_hat_bias = self.v[key][1] / (1 - self.beta2 ** self.t)

            updated_params[key] =[params[key][0] - self.learning_rate * m_hat_weights / (np.sqrt(v_hat_weights) + self.epsilon),
                                    params[key][1] - self.learning_rate * m_hat_bias / (np.sqrt(v_hat_bias) + self.epsilon)]

        return updated_params



In [80]:
class Softmax:
    def forward(self, X):
        exps = np.exp(X - np.max(X, axis=1, keepdims=True))
        self.output = exps / np.sum(exps, axis=1, keepdims=True)
        return self.output

    def backward(self, d_out):
        return self.output * (d_out - np.sum(d_out * self.output, axis=1, keepdims=True))


In [81]:
class FeedForwardNeuralNetwork:
    def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate):
        self.layers = []
        
        # Adding first Dense layer
        prev_dim = input_dim
        for hidden_dim in hidden_dims:
            self.layers.append(DenseLayer(prev_dim, hidden_dim))
            self.layers.append(BatchNormalization(hidden_dim))
            self.layers.append(ReLU())
            self.layers.append(Dropout(dropout_rate))
            prev_dim = hidden_dim
        
        # Adding final Dense layer
        self.layers.append(DenseLayer(prev_dim, output_dim))
        self.layers.append(Softmax())

    def forward(self, X, training=True):
        # Forward pass through all layers
        for layer in self.layers:
            if isinstance(layer, (Dropout,BatchNormalization)):
                X = layer.forward(X, training)
            else:
                X = layer.forward(X)
        return X

    def backward(self, d_out, learning_rate):
        # Backward pass through all layers in reverse order
        for layer in reversed(self.layers):
            if isinstance(layer, (DenseLayer, BatchNormalization)):
                d_out = layer.backward(d_out, learning_rate)
            elif isinstance(layer, (ReLU, Dropout, Softmax)):
                d_out = layer.backward(d_out)

    def update_params(self, adam_optimizer):
        # Parameter and gradient dicts
        params = {}
        grads = {}
        
        for layer in self.layers:
            if isinstance(layer, DenseLayer):
                params[id(layer)] = [layer.weights,layer.bias]
                grads[id(layer)] = [layer.d_weights,layer.d_bias]
        
        # Updating parameters using Adam optimizer
        updated_params = adam_optimizer.update(params, grads)

        # Updating the layers with the new parameters
        for layer in self.layers:
            if isinstance(layer, DenseLayer):
                layer.weights = updated_params[id(layer)][0]
                layer.bias = updated_params[id(layer)][1]


In [82]:
from torchvision import datasets, transforms

# Define transformation
transform = transforms.ToTensor()

# Load the training dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)

# Load the test dataset separately
test_dataset = datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)

In [83]:
from torch.utils.data import DataLoader
import tqdm

input_dim = 28 * 28  # Image size (28x28)
hidden_dims = [128, 64]  # Hidden layers
output_dim = 10  # Number of classes (0-9)
dropout_rate = 0.2
learning_rate = 0.001
batch_size = 64
num_epochs = 10

# DataLoader for training
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

class Model:
    def __init__(self,input_dim,hidden_dims,output_dim,dropout_rate,learning_rate,batch_size,num_epochs):
        # Hyperparameters
        self.input_dim = input_dim 
        self.hidden_dims = hidden_dims
        self.output_dim = output_dim 
        self.dropout_rate = dropout_rate
        self.learning_rate = learning_rate
        self.batch_size = batch_size 
        self.num_epochs = num_epochs

        # Initialize the fnn
        self.fnn = FeedForwardNeuralNetwork(self.input_dim, self.hidden_dims, self.output_dim, self.dropout_rate)
        self.adam_optimizer = AdamOptimizer(self.learning_rate)

    def fit(self,train_loader):
        # Training loop
        print(self.num_epochs)
        for epoch in range(self.num_epochs):
            epoch_loss = 0
            correct = 0
            total = 0

            for images, labels in tqdm.tqdm(train_loader):
                # Flatten images
                images = images.view(-1, 28 * 28).numpy()
                
                # Forward pass
                outputs = self.fnn.forward(images, training=True)
                
                # One-hot encoding for labels
                one_hot_labels = np.eye(self.output_dim)[labels.numpy()]

                # Compute loss (cross-entropy loss)
                loss = -np.sum(one_hot_labels * np.log(outputs + 1e-8)) / len(labels)
                #print(len(labels))
                epoch_loss += loss/(self.output_dim*self.batch_size)
                #break
                # Backward pass
                d_out = outputs - np.eye(self.output_dim)[labels.numpy()]
                self.fnn.backward(d_out, self.learning_rate)

                # Update parameters
                self.fnn.update_params(self.adam_optimizer)

                # Calculate accuracy
                predictions = np.argmax(outputs, axis=1)
                correct += (predictions == labels.numpy()).sum()
                total += labels.size(0)

            # Display epoch results
            print("total: ",total,"correct: ",correct)
            epoch_accuracy = correct / total
            print(f'Epoch [{epoch+1}/{self.num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')

        return self.fnn



model1 = Model(input_dim,hidden_dims,output_dim,dropout_rate,learning_rate,batch_size,num_epochs)
model = model1.fit(train_loader)


10


  0%|          | 0/938 [00:00<?, ?it/s]

100%|██████████| 938/938 [00:13<00:00, 70.28it/s]


total:  60000 correct:  48517
Epoch [1/10], Loss: 0.9563, Accuracy: 0.8086


 76%|███████▌  | 712/938 [00:12<00:03, 58.95it/s]


KeyboardInterrupt: 

In [None]:
from sklearn.metrics import accuracy_score

# DataLoader for testing
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Testing phase
model_accuracy = 0
model_total = 0
all_predictions = []
all_true_labels = []

for images, labels in test_loader:
    # Flatten images
    images = images.view(-1, 28 * 28).numpy()

    # Forward pass (inference mode)
    outputs = model.forward(images, training=False)
    predictions = np.argmax(outputs, axis=1)

    # Collect results
    all_predictions.extend(predictions)
    all_true_labels.extend(labels.numpy())

# Calculate accuracy using sklearn
accuracy = accuracy_score(all_true_labels, all_predictions)
print(f'Test Accuracy: {accuracy:.4f}')


Test Accuracy: 0.8388


In [None]:
import pickle

# Save the trained model to a file
with open('fashion_mnist_model.pkl', 'wb') as file:
    pickle.dump(model, file)
print("Model saved successfully!")


Model saved successfully!


In [None]:
# Load the model from the file
with open('fashion_mnist_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)
print("Model loaded successfully!")


Model loaded successfully!


In [None]:
# Step 3: Run the model on the test data
all_predictions = []
all_labels = []
import torch

# Disable gradient computation for testing
with torch.no_grad():
    for images, labels in test_loader:
        # Flatten the images
        images = images.view(images.size(0), -1).numpy()

        # Forward pass through the loaded model
        outputs = loaded_model.forward(images)

        # Get the predicted class (highest probability)
        predictions = np.argmax(outputs, axis=1)

        # Collect predictions and true labels for accuracy computation
        all_predictions.extend(predictions)
        all_labels.extend(labels.numpy())

# Step 4: Evaluate the model
accuracy = accuracy_score(all_labels, all_predictions)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 83.36%
