In [1]:
import numpy as np
import struct

def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims= struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)

In [2]:
def load_mnist(image_path, label_path):
    images = read_idx(image_path)
    labels = read_idx(label_path)
    return images, labels

train_image_path = r'input\train-images.idx3-ubyte'
train_label_path = r'input\train-labels.idx1-ubyte'
test_image_path =  r'input\t10k-images.idx3-ubyte'
test_label_path =  r'input\t10k-labels.idx1-ubyte'

In [3]:


train_images, train_labels = load_mnist(train_image_path, train_label_path)
test_images, test_labels = load_mnist(test_image_path, test_label_path)
print(f'Train images shape: {train_images.shape}')
print(f'Train labels shape: {train_labels.shape}')
print(f'Test images shape: {test_images.shape}')
print(f'Test labels shape: {test_labels.shape}')


Train images shape: (60000, 28, 28)
Train labels shape: (60000,)
Test images shape: (10000, 28, 28)
Test labels shape: (10000,)


In [13]:
import numpy as np

class Layer:
    def __init__(self, no_of_neurons, input_no, activation):
        self.weights = np.random.rand(input_no, no_of_neurons) - 0.5
        self.bias = np.random.rand(1, no_of_neurons) - 0.5
        self.activation = activation
        self.z = None
        self.out_vals = None

class NeuralNetwork:
    def __init__(self, input_para, layers):
        self.ListOfLayers = []
        prev_no = input_para

        for layer in layers:
            temp_obj = Layer(layer[0], prev_no, layer[1])
            self.ListOfLayers.append(temp_obj)
            prev_no = layer[0]

    def apply_activation(self, values, activation_fun):
        if activation_fun == 'relu':
            return np.maximum(0, values)
        elif activation_fun == 'sigmoid':
            return 1 / (1 + np.exp(-values))
        elif activation_fun == 'tanh':
            return np.tanh(values)
        elif activation_fun == 'softmax':
            e_x = np.exp(values - np.max(values, axis=-1, keepdims=True))
            return e_x / np.sum(e_x, axis=-1, keepdims=True)
        else:
            return values  # No activation (identity)

    def apply_activation_derivative(self, values, activation_fun):
        if activation_fun == 'relu':
            return np.where(values > 0, 1, 0)
        elif activation_fun == 'sigmoid':
            sigmoid = 1 / (1 + np.exp(-values))
            return sigmoid * (1 - sigmoid)
        elif activation_fun == 'tanh':
            return 1 - np.tanh(values) ** 2
        else:
            return np.ones_like(values)  # No activation (identity)

    def train(self, X, y, epochs=1000, learning_rate=0.01,printEvery=1):
        for epoch in range(epochs):
            for i in range(len(X)):
                self.Backpropagation(X[i], y[i], learning_rate)
            if epoch % printEvery == 0:
                loss = self.compute_loss(X, y)
                print(f'Epoch {epoch}, Loss: {loss}')

    def compute_loss(self, X, y):
        m = len(X)
        total_loss = 0
        for i in range(m):
            output = self.forward(X[i])
            total_loss -= np.log(output[0, y[i]])
        return total_loss / m

    def forward(self, x):
        output = x
        for layer in self.ListOfLayers:
            pre = np.dot(output, layer.weights) + layer.bias
            output = self.apply_activation(pre, layer.activation)
            layer.z = pre
            layer.out_vals = output
        return output

    def Backpropagation(self, x, y, learning_rate):
        m = 1  # stochastic gradient descent 
        self.forward(x)

        # Output layer error
        delta = self.ListOfLayers[-1].out_vals - np.eye(len(self.ListOfLayers[-1].out_vals[0]))[y]
        self.ListOfLayers[-1].delta = delta

        # Backward pass
        for i in reversed(range(len(self.ListOfLayers) - 1)):
            layer = self.ListOfLayers[i]
            next_layer = self.ListOfLayers[i + 1]
            delta = np.dot(next_layer.delta, next_layer.weights.T) * self.apply_activation_derivative(layer.z, layer.activation)
            layer.delta = delta

        # Update weights and biases
        for i in range(len(self.ListOfLayers)):
            layer = self.ListOfLayers[i]
            a_prev = np.atleast_2d(x if i == 0 else self.ListOfLayers[i - 1].out_vals)
            layer.weights -= learning_rate * np.dot(a_prev.T, layer.delta) / m
            layer.bias -= learning_rate * np.sum(layer.delta, axis=0, keepdims=True) / m

    def addCustomWeights(self, weights, bias, ind):
        self.ListOfLayers[ind].weights = weights
        self.ListOfLayers[ind].bias = bias

    def WeightShape(self):
        for i, layer in enumerate(self.ListOfLayers):
            print(f'weight{i} - {layer.weights.shape}')


In [None]:
#to compare the performance
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

# Reshape train and test images
train_images = train_images.reshape((train_images.shape[0], 784))
test_images = test_images.reshape((test_images.shape[0], 784))

# Define the model
model = Sequential([
    Dense(16, activation='relu', input_shape=(784,)),
    Dense(16, activation='relu'),
    Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='sgd',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Print a summary of the model architecture
model.summary()

# Train the model
model.fit(train_images, train_labels, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy: {test_acc}')


In [10]:
layers = [(16,'relu'),(16,'relu'),(10,'softmax')]

mymodel = NeuralNetwork(784,layers)
mymodel.train(train_images, train_labels, epochs=10, learning_rate=0.01)

Epoch 0, Loss: 0.35439571961563665
Epoch 1, Loss: 0.28293043751114016
Epoch 2, Loss: 0.2805918929513196
Epoch 3, Loss: 0.23798009708583917
Epoch 4, Loss: 0.22582139560457132
Epoch 5, Loss: 0.24428329802722393
Epoch 6, Loss: 0.23048055662673356
Epoch 7, Loss: 0.21181354367324415
Epoch 8, Loss: 0.1966570743519448
Epoch 9, Loss: 0.1977799976971104


In [None]:
#for weights and shit
# for layer_index, keras_layer in enumerate(model.layers):
#     weights, biases = keras_layer.get_weights()
#     print(weights.shape) # debugging
#     mymodel.addCustomWeights(weights, biases, layer_index)


In [18]:
# mymodel.WeightShape()
lable = train_labels[76]
in_img = train_images[76].reshape(1,784)
print(lable)
np.argmax(mymodel.forward(in_img))



2


2