# Basic Implementation

In [18]:
import numpy as np

In [51]:
class NeuralNetwork:
    def __init__(self, input_size=784, hidden_layers=[512, 512], output_size=10):
        self.input_size = input_size
        self.hidden_layers = hidden_layers
        self.output_size = output_size
        self.weights = []
        self.biases = []

        # Input to Hidden Layers Network
        self.weights.append(0.01 * np.random.rand(input_size, hidden_layers[0]))
        self.biases.append(np.zeros((1, hidden_layers[0])))

        # Hidden Layers Network
        for i in range(len(hidden_layers)-1):
            self.weights.append(0.01 * np.random.rand(hidden_layers[i], hidden_layers[i+1]))
            self.biases.append(np.zeros((1, hidden_layers[i+1])))
        
        # Hidden Layers Network to Output
        self.weights.append(0.01 * np.random.rand(hidden_layers[len(hidden_layers)-1], output_size))
        self.biases.append(np.zeros((1, output_size)))

    def forward(self, inputs):
        layers = [inputs]

        for i in range(len(self.weights)):
            # Dot Product to 
            layers.append(np.dot(layers[-1], self.weights[i]) + self.biases[i])

            # Activation Functions (ReLU + SoftMax)
            if i == len(self.weights)-1:
                finalOutput = np.exp(layers[-1] - np.max(layers[-1], axis=1, keepdims=True))
                finalOutput = finalOutput / np.sum(finalOutput, axis=1, keepdims=True)
                layers.append(finalOutput)
            else:
                layers.append(np.maximum(0, layers[-1]))
        
        return layers[-1]


In [20]:
# LossCategoricalCrossEntropy implementation
def LossCategoricalCrossEntropy(yPred, yTrue):
    # If predicted class has a prediction of 0% likelihood this prevents log(0), which would be infinity
    yPred = np.clip(yPred, 1e-10, 1 - 1e-10)

    # We calculate the sum of the log losses
    loss = -np.sum(yTrue * np.log(yPred), axis=1)

    # We calculate the average loss - this depends on the number of samples
    # So the return loss is the average loss not the summed up loss (which took me a while to understand)
    average_loss = np.mean(loss)

    return average_loss


In [21]:
myNeuralNet = NeuralNetwork()

In [23]:
result = myNeuralNet.forward(np.random.rand(1, 784))
result

array([[0.43584075, 0.33492869, 0.22923056]])

# Get Weights from PyTorch Model

In [52]:
modelMNIST = NeuralNetwork(hidden_layers=[256])

In [53]:
# Import Weights from Folder
modelWeights = []
modelWeights.append(np.load("./modelWeights/layer_stack.1.weight.npy"))
modelWeights.append(np.load("./modelWeights/layer_stack.3.weight.npy"))
modelBiases = []
modelBiases.append(np.load("./modelWeights/layer_stack.1.bias.npy"))
modelBiases.append(np.load("./modelWeights/layer_stack.3.bias.npy"))

In [66]:
# Visualize Biases:
for i in range(len(modelBiases)):
    print(f"Shapes PytTorchModel: {modelBiases[i].shape}")
    print(f"Shapes PytTorchModel Transformed: {np.expand_dims(modelBiases[i], axis=0).shape}")
    print(f"Shapes MyModel: {modelMNIST.biases[i].shape}")
    print()

# Visualize Weights:
for i in range(len(modelWeights)):
    print(f"Shapes PytTorchModel: {modelWeights[i].shape}")
    print(f"Shapes PytTorchModel Transformed: {modelWeights[i].T.shape}")
    print(f"Shapes MyModel: {modelMNIST.weights[i].shape}")
    print()

Shapes PytTorchModel: (256,)
Shapes PytTorchModel Transformed: (1, 256)
Shapes MyModel: (1, 256)

Shapes PytTorchModel: (10,)
Shapes PytTorchModel Transformed: (1, 10)
Shapes MyModel: (1, 10)

Shapes PytTorchModel: (256, 784)
Shapes PytTorchModel Transformed: (784, 256)
Shapes MyModel: (784, 256)

Shapes PytTorchModel: (10, 256)
Shapes PytTorchModel Transformed: (256, 10)
Shapes MyModel: (256, 10)



In [67]:
# Change Biases:
for i in range(len(modelBiases)):
    modelMNIST.biases[i] = np.expand_dims(modelBiases[i], axis=0)

# Change Weights:
for i in range(len(modelWeights)):
    modelMNIST.weights[i] = modelWeights[i].T

In [69]:
import gzip

# Load MNIST dataset
def extract_images(filename):
    with gzip.open(filename, 'rb') as f:
        magic, num, rows, cols = np.frombuffer(f.read(16), dtype=np.uint32, count=4).byteswap()
        images = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows, cols)
    return images

def extract_labels(filename):
    with gzip.open(filename, 'rb') as f:
        magic, num = np.frombuffer(f.read(8), dtype=np.uint32, count=2).byteswap()
        labels = np.frombuffer(f.read(), dtype=np.uint8)
    return labels

train_images = extract_images("./MNISTdata/train-images-idx3-ubyte.gz")
train_labels = extract_labels("./MNISTdata/train-labels-idx1-ubyte.gz")
test_images = extract_images("./MNISTdata/t10k-images-idx3-ubyte.gz")
test_labels = extract_labels("./MNISTdata/t10k-labels-idx1-ubyte.gz")

In [98]:
testImage1 = np.expand_dims(test_images[0].flatten(), axis=0)
print(f"Test Label: {test_labels[0]}")
print(f"Predicted Label: {modelMNIST.forward(testImage1)}")

Test Label: 7
Predicted Label: [[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]]


In [101]:
correct = 0
incorrect = 0
for i in range(len(test_images)):
    image = np.expand_dims(test_images[i].flatten(), axis=0)

    if int(test_labels[i]) == int(modelMNIST.forward(image).argmax()):
        correct += 1
    else:
        incorrect += 1

(correct)/(correct+incorrect)

0.9753

# Notes

(batchNumber[32], inputs[718]) * (weights[718], weights[512]) = (32, 512)

- Implement GitHub Repo - DONE

Forward pass - Train 
    - Neurons + - DONE
    - Acitvation function + - DONE
    - Softwax (get probablities) + - DONE
    - loss - DONE
    - Test Forward Pass with Other Model Gradients - DONE

Implement backpropagation
    - Explain how to do and everything
    - Start training with basic 80-20

Introduce regularization
Batch Normalization

Optimizer

Visualize gradients

Visualize MNIST with UMAP or T-SNE