In [25]:
from torchvision import datasets, transforms

# Define transformation
transform = transforms.ToTensor()

# Load the training dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)

# Load the test dataset separately
test_dataset = datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)

In [26]:
# 1.Dense Layer
# 2.Normalization: Batch Normalization
# 3.Activation: ReLU
# 4.Regularization: Dropout
# 5.Optimization: Adaptive Moment Estimation (Adam)
# 6.Regression: Softmax for Multi-class Classification

In [27]:
import torch
from torchvision import datasets, transforms
import tensorflow as tf
from tensorflow.keras import layers, models
import pickle
from tqdm import tqdm

In [28]:
# Convert datasets to NumPy arrays for TensorFlow
x_train = train_dataset.data.numpy().astype('float32') / 255.0
y_train = train_dataset.targets.numpy()
x_test = test_dataset.data.numpy().astype('float32') / 255.0
y_test = test_dataset.targets.numpy()

# Flatten the images (28x28 -> 784)
x_train = x_train.reshape(-1, 28 * 28)
x_test = x_test.reshape(-1, 28 * 28)

# Hyperparameters
hidden_size = 256
num_classes = 10
batch_size = 64
learning_rate = 0.001
num_epochs = 10
dropout_rate = 0.5



In [29]:

import numpy as np

# 1. Dense Layer
class Dense:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.bias = np.zeros((1, output_size))

    def forward(self, X):
        self.input = X
        self.output = np.dot(X, self.weights) + self.bias
        return self.output

    def backward(self, d_output, learning_rate):
        d_weights = np.dot(self.input.T, d_output)
        d_bias = np.sum(d_output, axis=0, keepdims=True)
        d_input = np.dot(d_output, self.weights.T)
        
        # Update weights and bias
        self.weights -= learning_rate * d_weights
        self.bias -= learning_rate * d_bias
        return d_input


# 2. Batch Normalization
class BatchNormalization:
    def __init__(self, size, epsilon=1e-5, momentum=0.9):
        self.gamma = np.ones((1, size))
        self.beta = np.zeros((1, size))
        self.epsilon = epsilon
        self.momentum = momentum
        self.running_mean = np.zeros((1, size))
        self.running_var = np.zeros((1, size))

    def forward(self, X, training=True):
        if training:
            mean = np.mean(X, axis=0)
            var = np.var(X, axis=0)
            self.X_centered = X - mean
            self.std_inv = 1.0 / np.sqrt(var + self.epsilon)
            self.X_norm = self.X_centered * self.std_inv
            self.output = self.gamma * self.X_norm + self.beta

            # Update running mean and variance
            self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * mean
            self.running_var = self.momentum * self.running_var + (1 - self.momentum) * var
        else:
            self.X_norm = (X - self.running_mean) / np.sqrt(self.running_var + self.epsilon)
            self.output = self.gamma * self.X_norm + self.beta
        return self.output

    def backward(self, d_output, learning_rate):
        m = d_output.shape[0]
        d_gamma = np.sum(d_output * self.X_norm, axis=0)
        d_beta = np.sum(d_output, axis=0)
        d_X_norm = d_output * self.gamma
        d_var = np.sum(d_X_norm * self.X_centered, axis=0) * -0.5 * self.std_inv**3
        d_mean = np.sum(d_X_norm * -self.std_inv, axis=0) + d_var * np.mean(-2 * self.X_centered, axis=0)
        d_input = (d_X_norm * self.std_inv) + (d_var * 2 * self.X_centered / m) + (d_mean / m)

        # Update gamma and beta
        self.gamma -= learning_rate * d_gamma
        self.beta -= learning_rate * d_beta
        return d_input


# 3. ReLU Activation
class ReLU:
    def forward(self, X):
        self.input = X
        return np.maximum(0, X)

    def backward(self, d_output):
        return d_output * (self.input > 0)


# 4. Dropout
class Dropout:
    def __init__(self, dropout_rate):
        self.dropout_rate = dropout_rate

    def forward(self, X, training=True):
        if training:
            self.mask = np.random.binomial(1, 1 - self.dropout_rate, size=X.shape)
            return X * self.mask * (1.0 / (1 - self.dropout_rate))
        return X

    def backward(self, d_output):
        return d_output * self.mask


# 5. Adam Optimizer
class Adam:
    def __init__(self, params, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.params = params
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = [np.zeros_like(p) for p in params]
        self.v = [np.zeros_like(p) for p in params]
        self.t = 0

    def update(self, grads):
        self.t += 1
        updated_params = []
        
        for i, (param, grad) in enumerate(zip(self.params, grads)):
            self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * grad
            self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * (grad**2)
            
            m_hat = self.m[i] / (1 - self.beta1**self.t)
            v_hat = self.v[i] / (1 - self.beta2**self.t)
            
            param_update = self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)
            param -= param_update
            updated_params.append(param)
        
        return updated_params


# 6. Softmax for Multi-class Classification
class Softmax:
    def forward(self, X):
        exps = np.exp(X - np.max(X, axis=1, keepdims=True))
        return exps / np.sum(exps, axis=1, keepdims=True)

    def backward(self, d_output, y_true):
        m = y_true.shape[0]
        d_output[range(m), y_true] -= 1
        return d_output / m


In [30]:
"""
# Build the model
model = models.Sequential([
    layers.Input(shape=(28 * 28,)),
    layers.BatchNormalization(),
    layers.Dense(hidden_size, activation='relu'),
    layers.Dropout(dropout_rate),
    layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Training loop with tqdm for progress tracking
for epoch in range(num_epochs):
    print(f'Epoch {epoch + 1}/{num_epochs}')
    model.fit(x_train, y_train, batch_size=batch_size, epochs=1, verbose=1)

# Model evaluation
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
print(f'Accuracy of the model on the test set: {test_accuracy * 100:.2f}%')

# Save the trained model
model.save('fnn_fashionmnist.h5')
"""

"\n# Build the model\nmodel = models.Sequential([\n    layers.Input(shape=(28 * 28,)),\n    layers.BatchNormalization(),\n    layers.Dense(hidden_size, activation='relu'),\n    layers.Dropout(dropout_rate),\n    layers.Dense(num_classes, activation='softmax')\n])\n\n# Compile the model\nmodel.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),\n              loss='sparse_categorical_crossentropy',\n              metrics=['accuracy'])\n\n# Training loop with tqdm for progress tracking\nfor epoch in range(num_epochs):\n    print(f'Epoch {epoch + 1}/{num_epochs}')\n    model.fit(x_train, y_train, batch_size=batch_size, epochs=1, verbose=1)\n\n# Model evaluation\ntest_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)\nprint(f'Accuracy of the model on the test set: {test_accuracy * 100:.2f}%')\n\n# Save the trained model\nmodel.save('fnn_fashionmnist.h5')\n"

In [31]:
# Cross-Entropy Loss function
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), y_true])
    loss = np.sum(log_likelihood) / m
    return loss

# Accuracy function
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)


In [32]:

# Hyperparameters
input_size = 28 * 28
hidden_size = 256
num_classes = 10
batch_size = 64
learning_rate = 0.001
num_epochs = 10
dropout_rate = 0.5


# Initialize layers
dense1 = Dense(input_size, hidden_size)
batch_norm1 = BatchNormalization(hidden_size)
relu = ReLU()
dropout = Dropout(dropout_rate)
dense2 = Dense(hidden_size, num_classes)
softmax = Softmax()

# Initialize optimizer
params = [dense1.weights, dense1.bias, batch_norm1.gamma, batch_norm1.beta, dense2.weights, dense2.bias]
optimizer = Adam(params, learning_rate=learning_rate)

# Forward and backward pass for one batch
def forward_pass(X, training=True):
    out = dense1.forward(X)
    out = batch_norm1.forward(out, training)
    out = relu.forward(out)
    out = dropout.forward(out, training)
    out = dense2.forward(out)
    out = softmax.forward(out)
    return out

# Compute loss and accuracy function
def compute_loss_and_accuracy(X, y):
    logits = forward_pass(X, training=False)
    loss = cross_entropy_loss(y, logits)
    predictions = np.argmax(logits, axis=1)
    acc = accuracy(y, predictions)  # Renamed 'accuracy' variable to 'acc' to avoid conflict
    return loss, acc

# Training loop
for epoch in range(num_epochs):
    permutation = np.random.permutation(x_train.shape[0])
    x_train_shuffled = x_train[permutation]
    y_train_shuffled = y_train[permutation]
    
    epoch_loss = 0
    epoch_accuracy = 0
    for i in tqdm(range(0, x_train.shape[0], batch_size), desc=f"Epoch {epoch + 1}/{num_epochs}"):
        X_batch = x_train_shuffled[i:i + batch_size]
        y_batch = y_train_shuffled[i:i + batch_size]
        
        # Forward pass
        logits = forward_pass(X_batch)
        
        # Loss calculation
        loss = cross_entropy_loss(y_batch, logits)
        epoch_loss += loss * X_batch.shape[0]
        
        # Calculate accuracy
        predictions = np.argmax(logits, axis=1)
        batch_accuracy = accuracy(y_batch, predictions)
        epoch_accuracy += batch_accuracy * X_batch.shape[0]
        
        # Backward pass
        d_logits = softmax.backward(logits, y_batch)
        d_out = dense2.backward(d_logits, learning_rate=0)  # Pass gradients, no lr as Adam will update it
        
        d_out = dropout.backward(d_out)
        d_out = relu.backward(d_out)
        
        d_out = batch_norm1.backward(d_out, learning_rate=0)
        dense1.backward(d_out, learning_rate=0)  # Use learning_rate=0 here as Adam handles it
        
        # Update parameters using Adam
        grads = [dense1.weights, dense1.bias, batch_norm1.gamma, batch_norm1.beta, dense2.weights, dense2.bias]
        optimizer.update(grads)
    
    epoch_loss /= x_train.shape[0]
    epoch_accuracy /= x_train.shape[0]
    
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy * 100:.2f}%")

# Model evaluation on the test set
test_loss, test_accuracy = compute_loss_and_accuracy(x_test, y_test)
print(f"Test set accuracy: {test_accuracy * 100:.2f}%")

Epoch 1/10: 100%|██████████| 938/938 [00:14<00:00, 63.97it/s]


Epoch 1/10, Loss: 2.3026, Accuracy: 9.88%


Epoch 2/10: 100%|██████████| 938/938 [00:14<00:00, 63.87it/s]


Epoch 2/10, Loss: 2.3026, Accuracy: 9.96%


Epoch 3/10: 100%|██████████| 938/938 [00:22<00:00, 42.48it/s]


Epoch 3/10, Loss: 2.3026, Accuracy: 9.96%


Epoch 4/10: 100%|██████████| 938/938 [00:14<00:00, 63.10it/s]


Epoch 4/10, Loss: 2.3026, Accuracy: 9.89%


Epoch 5/10: 100%|██████████| 938/938 [00:16<00:00, 56.09it/s]


Epoch 5/10, Loss: 2.3026, Accuracy: 10.11%


Epoch 6/10: 100%|██████████| 938/938 [00:14<00:00, 65.57it/s]


Epoch 6/10, Loss: 2.3026, Accuracy: 9.98%


Epoch 7/10: 100%|██████████| 938/938 [00:14<00:00, 63.24it/s]


Epoch 7/10, Loss: 2.3026, Accuracy: 10.00%


Epoch 8/10: 100%|██████████| 938/938 [00:17<00:00, 53.58it/s]


Epoch 8/10, Loss: 2.3026, Accuracy: 10.00%


Epoch 9/10: 100%|██████████| 938/938 [00:15<00:00, 58.87it/s]


Epoch 9/10, Loss: 2.3026, Accuracy: 10.00%


Epoch 10/10: 100%|██████████| 938/938 [00:15<00:00, 60.22it/s]


Epoch 10/10, Loss: 2.3026, Accuracy: 10.00%
Test set accuracy: 10.00%


In [33]:
# Save the trained model weights using pickle
model_weights = model.get_weights()  # Get the model weights
with open('fnn_fashionmnist_weights.pkl', 'wb') as f:
    pickle.dump(model_weights, f)

# To load the weights back into the model later
with open('fnn_fashionmnist_weights.pkl', 'rb') as f:
    loaded_weights = pickle.load(f)

model.set_weights(loaded_weights)  # Load the weights back into the model

In [34]:
import numpy as np

# Run inference on the test data
predictions = model.predict(x_test)

# Get the predicted classes
predicted_classes = np.argmax(predictions, axis=1)

# Evaluate the model's performance on the test data
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
print(f'Accuracy of the model on the test set: {test_accuracy * 100:.2f}%')

# Optionally, compare predicted classes with true labels
correct_predictions = np.sum(predicted_classes == y_test)
total_predictions = y_test.shape[0]
print(f'Correct predictions: {correct_predictions}/{total_predictions}')

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
313/313 - 1s - 2ms/step - accuracy: 0.8635 - loss: 0.3728
Accuracy of the model on the test set: 86.35%
Correct predictions: 8635/10000
