In [48]:
# mnist = tf.keras.datasets.mnist
# (x_train, y_train), (x_test, y_test) = mnist.load_data()
# x_train, x_test = x_train / 255.0, x_test / 255.0

# alpha = 0.1;
# trainingId = 0;

# def sigmoid(x):
#     return 1.0 / (1.0 + np.exp(-x))

# X = x_train[trainingId].reshape(-1,1)
# Y = np.empty((10, 1));
# W3 = np.random.rand(Y.size, X.size);
# B3 = np.random.rand(Y.size, 1);
# A3 = W3 @ X + B3;
# Y = sigmoid(A3);
# correctY = np.zeros((Y.size,1));
# correctY[y_train[trainingId]] = 1;
# dY = correctY - Y;
# dW3 = 2/Y.size * W3 * dY * X.T;
# W3 = W3 * alpha * dW3;

In [49]:
# Necessary imports
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import random as rng
# Load dataset
mnist = tf.keras.datasets.mnist
data = mnist.load_data()
# Split into train set and test set
train = data[0]
test = data[1]
# Getting array containing dataset matrices with actual pixel values (each 28 x 28)
x_train = train[0]
x_test = test[0]
# Getting array containing dataset labels
y_train = train[1]
y_test = test[1]
# Normalize the values (pixels take values from 0 to 255, neural network works better with values from 0 to 1)
x_train = x_train / 255
x_test = x_test / 255

In [50]:
# Select data sample (for representations only)
# index = int(input(f'Which data sample should be displayed (range from 0 to {len(x_train) - 1}): '))

In [51]:
# Terminal representation of the sample digit from x_train dataset ('.' is being displayed when there is 0 in the dataset, otherwise 'o' is being displayed)
def term_repr(index, labels, values):
    print(labels[index], end = '\n')
    for m in range(len(values[index])):
        for n in range(len(values[index][m])):
            if (values[index][m][n] == 0.0):
                print(f'.', end = ' ')
            else:
                print(f'o', end = ' ')
        print(f'', end = '\n')

In [52]:
# Graphical representation of the sample digit from x_train dataset
def grap_repr(index, values):
    fig = plt.figure()
    fig.patch.set_facecolor((0, 0, 0))
    plt.axis('off')
    im = plt.imshow(values[index], cmap='Greys_r')
    cbar = plt.colorbar(im)
    cbar.set_label('Pixel value intensity [%]', color='#999', fontweight='bold', labelpad=20)
    cbar.ax.tick_params(labelcolor='#999')
    plt.show()

In [53]:
# Flatten train dataset
def flatten(dataset, samples):
    temp = np.zeros((samples, (dataset[0].shape[0] * dataset[0].shape[1])))
    for m in range(samples):
        for n in range(dataset[m].shape[0]):
            for b in range(dataset[m].shape[1]):
                temp[m][n * dataset[m].shape[0] + b] = dataset[m][n][b]
    return np.transpose(temp)

In [54]:
# One hot encoding of labels
def one_hot(dataset, samples):
    temp = np.zeros((10, samples))
    for m in range(samples):
        temp[dataset[m]][m] = 1
    return temp

In [55]:
# Parameter initialization
def param_init(inputs, hiddens):
    w1 = np.zeros((hiddens, inputs))
    for m in range(len(w1)):
        for n in range(len(w1[m])):
            w1[m][n] = (rng.random() * 2) - 1
    b1 = np.zeros((hiddens, 1))
    return w1, b1

In [56]:
# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [1]:
# Forward propagation
def forward_propagation(sample, parameters):
    # retrieve the parameters
    w1, b1 = parameters
    # compute the activation of the hidden layer
    z1 = np.dot(w1, sample) + b1
    a1 = sigmoid(z1)
    print(a1)
    return a1

In [58]:
# Mean square error loss function
def mean_square_error(a1, label, samples):
    loss_arr = np.zeros((10, samples))
    for m in range(len(a1)):
        loss_arr[m] += ((a1[m] - label[m]) * (a1[m] - label[m]))
    return loss_arr.mean()

In [59]:
# Backward propagation
def backward_propagation(sample, label, parameters, a1, learning_rate):
    # Retrieve parameters
    w1, b1 = parameters
    # Compute the gradient of the loss with respect to the hidden layer activation
    dL_da1 = a1 - label
    # Compute the gradient of the activation with respect to the linear combination (z1)
    da1_dz1 = a1 * (1 - a1)
    # Compute the gradient of the loss with respect to z1
    dL_dz1 = dL_da1 * da1_dz1
    # Compute the gradient of the loss with respect to w1 and b1
    dL_dw1 = np.dot(dL_dz1, sample.T)
    dL_db1 = np.sum(dL_dz1)
    # Update the weights and biases 
    w1 -= dL_dw1 * learning_rate
    b1 -= dL_db1 * learning_rate
    return w1, b1

In [60]:
# Train the neural network
def train(X, y, num_iterations, learning_rate):
    samples = np.shape(X)[1]
    # Initialize the weights and biases
    parameters = param_init(784, 10)
    for i in range(num_iterations):
        # Forward propagation
        a1 = forward_propagation(X, parameters)
        # Compute the loss
        loss = mean_square_error(a1, y, samples)
        # Backward propagation
        newparams = backward_propagation(X, y, parameters, a1, learning_rate)
        # Update the parameters
        parameters = newparams
        # Display performance gain
        print(f"Iteration {i}: loss = {loss}")
    return parameters

In [61]:
def test_neural_network(X_test, y_test, parameters):
    predictions = forward_propagation(X_test, parameters)
    predicted_labels = np.argmax(predictions, axis=0)
    actual_labels = np.argmax(y_test, axis=0)
    accuracy = np.mean(predicted_labels == actual_labels)
    print(f"Accuracy: {accuracy * 100:.2f}%")

In [62]:
# Train the neural network with the training data (using more samples for better training)
train_samples = 1
parameters = train(flatten(x_train, train_samples), one_hot(y_train, train_samples), 10, 0.1)
# Test the neural network with the test data
# test_samples = 100
# test_neural_network(flatten(x_test, test_samples), one_hot(y_test, test_samples), parameters)

[[3.19616014e-02]
 [9.13611667e-01]
 [7.38061586e-06]
 [9.98874043e-01]
 [9.96720327e-01]
 [9.99944931e-01]
 [9.95577430e-01]
 [6.26495505e-01]
 [8.30531070e-02]
 [4.82594753e-04]]
Iteration 0: loss = 0.4217477677276674
[[3.09708399e-02]
 [8.42424420e-01]
 [7.20934927e-06]
 [9.98835449e-01]
 [9.96541599e-01]
 [9.99943623e-01]
 [9.95288751e-01]
 [3.00389313e-01]
 [7.70709555e-02]
 [4.71400378e-04]]
Iteration 1: loss = 0.378817910259429
[[3.01545011e-02]
 [6.53796894e-01]
 [7.07321884e-06]
 [9.98800413e-01]
 [9.96363343e-01]
 [9.99942538e-01]
 [9.94990436e-01]
 [1.91362098e-01]
 [7.22943779e-02]
 [4.62502283e-04]]
Iteration 2: loss = 0.34505539628651133
[[2.93644064e-02]
 [3.24017105e-01]
 [6.93795055e-06]
 [9.98763627e-01]
 [9.96168875e-01]
 [9.99941418e-01]
 [9.94657847e-01]
 [1.50460179e-01]
 [6.81430902e-02]
 [4.53660452e-04]]
Iteration 3: loss = 0.31123567513323225
[[2.88496920e-02]
 [1.98731237e-01]
 [6.86501262e-06]
 [9.98736369e-01]
 [9.95992053e-01]
 [9.99940795e-01]
 [9.9433586