### Download the Fashion_MNIST dataset by using PyTorch's torchvision.datasets module.

In [None]:
from torchvision import datasets
import numpy as np

### Forword pass function

In [None]:
from torchvision import datasets
import numpy as np
class DenseLayer:
    def __init__(self, input_dim, output_dim, activation, lambda_reg=0.1, reg_type=None,dropout_rate=None):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weights = np.random.randn(input_dim, output_dim)* 0.01
        self.biases =  np.zeros((1, output_dim))
        self.activation_name =activation
        self.lambda_reg = lambda_reg
        self.output = None
        self.input = None
        self.reg_type = reg_type
        self.dropout_mask = None
        self.dropout_rate = dropout_rate
        
        if activation == 'relu':
            self.activation = self.relu
            self.activation_prime = self.relu_prime
        elif activation == 'sigmoid':
            self.activation = self.sigmoid
            self.activation_prime = self.sigmoid_prime
        elif activation == 'softmax':
            self.activation = self.softmax            
        else:
            raise ValueError('activation function is not defined')
            
    def __str__(self):
        return f"""DenseLayer(input_dim:{self.input_dim}, output_dim:{self.output_dim}, activation:{self.activation_name})"""
        
    def forward(self, input_data,is_training = False):
        self.input = input_data
        #print(f"self.input: {self.input.shape} \n self.weights {self.weights.shape}")
        Z = np.dot(self.input, self.weights) + self.biases
        #print("Z ", Z.shape)
        if is_training and self.dropout_rate:
            output_before_dropout = self.activation(Z)
            self.dropout_mask = np.random.rand(*output_before_dropout.shape) > self.dropout_rate
            #self.dropout_mask = np.random.binomial(1, 1- dropout_rate, size=output_before_dropout.shape)
            self.output = output_before_dropout * self.dropout_mask
        else:
           self.output = self.activation(Z)  
        #print(f"set..... self.output {self.output.shape}")
        return self.output
        
    def backward(self, dA,learning_rate, y=None, is_training = False):
        """Note: Backward propagate through this layer. dA is the derivative of the loss with respect to the output of this layer.
        y is the true labels, which is only needed if this is an output layer with softmax activation.
        """
        if is_training and self.dropout_mask is not None:
            dA *= self.dropout_mask
        #print(f"self.output {self.output.shape}")
        if self.activation_name == 'softmax':
            y_one_hot = np.zeros_like(self.output)
            y_one_hot[np.arange(len(y)), y] = 1
            # Calculate the derivative of the loss with respect to the softmax inputs
            dZ = (self.output - y_one_hot) / len(y)
        else:
            dZ = dA * self.activation_prime(self.output)
        
        dA_prev = np.dot(dZ, self.weights.T)
        dW = np.dot(self.input.T, dZ)
        db = np.sum(dZ, axis=0, keepdims=True)

        if self.reg_type:
            if self.reg_type.upper() == "L1":
                 #print("Using L1 regularization..") 
                 weights_reg = self.lambda_reg * np.sign(self.weights)
                 biases_reg = self.lambda_reg * np.sign(self.biases)
            else:
                 #print("Using L2 regularization....") 
                 weights_reg = self.lambda_reg * self.weights
                 biases_reg = self.lambda_reg * self.biases 
            self.weights -= learning_rate * (dW + weights_reg)
            self.biases -= learning_rate * (db + biases_reg)
        else:
            #print("No regularization....") 
            self.weights -= learning_rate * dW
            self.biases -= learning_rate * db
        
        return dA_prev

    def relu(self, x):
        return np.maximum(0, x)

    def relu_prime(self, x):
        return np.where(x > 0, 1, 0)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_prime(self, x):
        return self.sigmoid(x) * (1 - self.sigmoid(x))
           
    def softmax(self,Z):
        Z_shift = Z - np.max(Z, axis=1, keepdims=True)
        exp_scores = np.exp(Z_shift)
        return exp_scores / (np.sum(exp_scores, axis=1, keepdims=True) + 1e-9)  # Softmax activation
        


In [None]:
class NeuralNetwork:
    def __init__(self):
        self.layers = []
        self.history = {'train_loss': [], 'val_loss': [], 'train_acc':[], 'val_acc':[]}
        

    def add_layer(self, layer):
        self.layers.append(layer)

    def forward(self, X, is_training):
        for layer in self.layers:
            X = layer.forward(X,is_training)
        return X

    def predict(self, X):
        output = self.forward(X,is_training=False)
        return np.argmax(output, axis=1)
        
    def cross_entropy_loss(self,y, output):
        m = y.shape[0]
        log_likelihood = -np.log(output[range(m), y] + 1e-9)
        loss = np.sum(log_likelihood) / m
        return loss      

    def train(self, train_data, train_labels, val_data, val_labels, epochs, batch_size, learning_rate,decay_rate= 0.02):
      for epoch in range(epochs):
        permutation = np.random.permutation(train_data.shape[0])
        train_data = train_data[permutation]
        train_labels = train_labels[permutation]
        # descrese learning_rate after each 10 epochs  
        if epoch % 50 == 0:
            learning_rate = learning_rate / (1 + decay_rate * epoch)
            print(f"Learning rate :{learning_rate} and decay_rate :{decay_rate}")
        for i in range(0, train_data.shape[0], batch_size):
          X_batch = train_data[i:i+batch_size]
          y_batch = train_labels[i:i+batch_size]
          output = self.forward(X_batch, is_training= True)
          self.backward(output, learning_rate, y_batch, is_training=True)
        train_loss = self.cross_entropy_loss(train_labels, self.forward(train_data, is_training= True))
        self.history['train_loss'].append(train_loss)

        val_output = self.forward(val_data, is_training=False)
        val_loss = self.cross_entropy_loss(val_labels, val_output)  # Use val_labels directly
        self.history['val_loss'].append(val_loss)
          
        val_accuracy = np.mean(self.predict(val_data) == val_labels)
        train_acc = np.mean(self.predict(train_data) == train_labels)
        self.history['train_acc'].append(train_acc)
        self.history['val_acc'].append(val_accuracy)  
        print(f'Epoch {epoch+1}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')
          
    def backward(self,output, learning_rate, y_train_batch, is_training=True):
        for layer in reversed(self.layers):
            #print(layer)
            output = layer.backward(output, learning_rate,y_train_batch, is_training=True)

In [None]:
import os
import gzip
import urllib.request
import numpy as np

# URL and data filename for the Fashion MNIST dataset
DATASET_BASE_URL = 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com'
DATASET_BASE_FOLDER = './data/FashionMNIST/raw'
DATA_FILENAME = {
    'train_images': 'train-images-idx3-ubyte.gz',
    'train_labels': 'train-labels-idx1-ubyte.gz',
    'test_images': 't10k-images-idx3-ubyte.gz',
    'test_labels': 't10k-labels-idx1-ubyte.gz'
}

def one_hot(y, num_classes):
    return np.eye(num_classes)[y]
    
# Helper function to download and extract the dataset
def download_and_extract(filename, is_image=False):
    if not os.path.exists('/'.join([DATASET_BASE_FOLDER, filename])):
        os.makedirs(DATASET_BASE_FOLDER, exist_ok=True)
        urllib.request.urlretrieve('/'.join([DATASET_BASE_URL, filename]),
                                   '/'.join([DATASET_BASE_FOLDER, filename]))
    filename = os.path.join(DATASET_BASE_FOLDER, filename)
    with gzip.open(filename, 'rb') as f:
        if (is_image):
            return np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28 * 28) / 255.0
        else:
            return np.frombuffer(f.read(), np.uint8, offset=8)

# Download and extract all files
train_images = download_and_extract(DATA_FILENAME['train_images'], is_image=True)
train_labels = download_and_extract(DATA_FILENAME['train_labels'])
test_images = download_and_extract(DATA_FILENAME['test_images'], is_image=True)
test_labels = download_and_extract(DATA_FILENAME['test_labels'])

# Split the training set into training and validation sets
num_train = int(0.8 * len(train_images))
train_data, val_data = train_images[:num_train], train_images[num_train:]
train_labels, val_labels = train_labels[:num_train], train_labels[num_train:]

print(f'Training data shape   : Images - {train_data.shape} | Labels - {train_labels.shape}')
print(f'Validation data shape : Images - {val_data.shape} | Labels - {val_labels.shape}')
print(f'Test data shape       : Images - {test_images.shape} | Labels - {test_labels.shape}')

# Convert labels to one-hot encoding
train_labels_one_hot = one_hot(train_labels, 10)
val_labels_one_hot = one_hot(val_labels, 10)
print(f"train_labels_one_hot: {train_labels_one_hot.shape}, val_labels_one_hot {val_labels_one_hot.shape}  ")


In [None]:
# Hyperparameters
input_size = 28 * 28
hidden_size = 128
output_size = 10
epochs = 200
learning_rate = 0.01

nn = NeuralNetwork()
#dropout_rate=.2
nn.add_layer(DenseLayer(input_size, hidden_size, 'relu',reg_type=None, dropout_rate=.5))  #reg_type="L2" does not help
#nn.add_layer(DenseLayer(hidden_size, hidden_size, 'relu', reg_type=None, dropout_rate=.2))  #reg_type="L2" does not help and sigmoid does not help, increasing layer does not help
nn.add_layer(DenseLayer(hidden_size, output_size, 'softmax', reg_type=None, dropout_rate=None))  #reg_type="L2" does not help
nn.train( train_data, train_labels, val_data, val_labels, epochs=epochs, learning_rate=learning_rate, batch_size= 64) 


In [None]:
import matplotlib.pyplot as plt

plt.plot(nn.history['train_loss'], label='Training loss')
plt.plot(nn.history['val_loss'], label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.plot(nn.history['train_acc'], label='Training acc')
plt.plot(nn.history['val_acc'], label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

In [None]:
 test_acc = np.mean(nn.predict(test_images) == test_labels)
test_acc