In [1]:
import numpy as np
import matplotlib.pyplot as plt


Matplotlib is building the font cache; this may take a moment.


## TASK 1

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def sigmoid_derivative(x):
    return x * (1 - x)

In [3]:
class FeedForwardNN:
    def __init__(self, input_size, input_layer_size, hidden_size, output_size):
        # Initialize weights and biases
        # self.W_ii = np.random.rand(input_layer_size, input_size) / 100000
        # self.B_i = np.random.rand(input_layer_size, 1)/ 100000
        # self.W_ih = np.random.rand(hidden_size, input_layer_size) / 100000
        # self.B_h = np.random.rand(hidden_size, 1)/ 100000
        # self.W_ho = np.random.rand(output_size, hidden_size) / 100000
        # self.B_o = np.random.rand(output_size, 1)/ 100000

        self.W_ii = np.zeros((input_layer_size, input_size))
        self.B_i = np.zeros((input_layer_size, 1))
        self.W_ih = np.zeros((hidden_size, input_layer_size))
        self.B_h = np.zeros((hidden_size, 1))
        self.W_ho = np.zeros((output_size, hidden_size))
        self.B_o = np.zeros((output_size, 1))

    def feedforward(self, input_data):
        input_input = np.dot(self.W_ii, input_data.T) + self.B_i
        input_output = sigmoid(input_input)
        hidden_input = np.dot(self.W_ih, input_output) + self.B_h
        hidden_output = sigmoid(hidden_input)
        output_hidden = np.dot(self.W_ho, hidden_output) + self.B_o
        output = sigmoid(output_hidden).T
        return output


## Task 2

In [4]:
import gzip
import pickle

with gzip.open('mnist.pkl.gz', 'rb') as ff:
    training_data, validation_data, test_data = pickle.load(
        ff, encoding='bytes')
print(training_data[0].shape)
print(validation_data[0].shape)
print(test_data[0].shape)

(50000, 784)
(10000, 784)
(10000, 784)


In [5]:
def to_onehot(labels):
    one_hot_labels = np.zeros((len(labels), 10))
    for i, label in enumerate(labels):
        one_hot_labels[i, label] = 1
    return one_hot_labels

 ## TASK 4

In [6]:
def backpropagation(model, input_data, target_output, learning_rate):
    input_input = np.dot(model.W_ii, input_data.T) + model.B_i
    input_output = sigmoid(input_input)  # 50 bat
    hidden_input = np.dot(model.W_ih, input_output) + model.B_h
    hidden_output = sigmoid(hidden_input)  # 30 bat
    output_hidden = np.dot(model.W_ho, hidden_output) + model.B_o
    output = sigmoid(output_hidden).T  # bat 10

    # Calculate the error at the output layer
    loss = output - target_output
    output_delta = loss * sigmoid_derivative(output)  # bat 10

    # Calculate the error at the hidden layer
    hidden_error = output_delta.dot(model.W_ho)  # bat 30
    hidden_delta = hidden_error * sigmoid_derivative(hidden_output).T  # bat 30

    # Calculate the error at the input layer
    input_error = hidden_delta.dot(model.W_ih)  # bat 50
    input_delta = input_error * sigmoid_derivative(input_output.T)  # bat 50

    # Update W and Bes for each layer
    model.W_ho -= (hidden_output.dot(output_delta)).T * learning_rate / len(input_delta)
    model.B_o -= np.sum(output_delta, axis=0, keepdims=True).T * learning_rate / len(input_delta)
    model.W_ih -= (input_output.dot(hidden_delta)).T * learning_rate / len(input_delta)
    model.B_h -= np.sum(hidden_delta, axis=0, keepdims=True).T * learning_rate / len(input_delta)
    model.W_ii -= input_delta.T.dot(input_data) * learning_rate / len(input_delta)
    model.B_i -= np.sum(input_delta, axis=0, keepdims=True).T * learning_rate / len(input_delta)



## TASK 3

In [7]:
def quadratic_loss(model, inputs, labels):
    output = model.feedforward(inputs)
    loss = 0.5 * np.linalg.norm(output - labels, axis=1) ** 2
    return np.mean(loss)

In [8]:
def evaluate(model, inputs, labels):
    outputs = model.feedforward(inputs)
    correct_predictions = np.sum(np.argmax(outputs, axis=1) == np.argmax(labels, axis=1))
    return correct_predictions / len(inputs)

In [9]:
def SGD(model, data, mini_batch_size, learning_rate, epochs):
    n = len(data)
    inputs, labels = data[0], data[1]
    labels = to_onehot(labels)
    loss_history = []

    for epoch in range(epochs):
        mini_batches = [(inputs[k:k + mini_batch_size], labels[k:k + mini_batch_size]) for k in
                        range(0, n, mini_batch_size)]
        for x, y in mini_batches:
            backpropagation(model, x, y, learning_rate)

        # Print learning success per epoch
        accuracy = evaluate(model, inputs, labels)
        loss_history.append(quadratic_loss(model, inputs, labels))
        print(f"Epoch {epoch + 1}/{epochs}: Accuracy {accuracy * 100:.2f}%, Loss {loss_history[-1]:.4f}")

    # Plot the learning curve
    plt.plot(range(epochs), loss_history)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Learning Curve')
    plt.show()

## Task 5

In [11]:
input_size = 784
input_layer_size = 785
hidden_size = 31
output_size = 10
learning_rate = 0.1
mini_batch_size = 32
epochs = 1000
model = FeedForwardNN(input_size, input_layer_size, hidden_size, output_size)

In [None]:
SGD(model, training_data, mini_batch_size, learning_rate, epochs)

Epoch 1/1000: Accuracy 11.36%, Loss 1.1649
Epoch 2/1000: Accuracy 11.36%, Loss 1.0877
Epoch 3/1000: Accuracy 11.36%, Loss 1.0159
Epoch 4/1000: Accuracy 11.36%, Loss 0.9482
Epoch 5/1000: Accuracy 11.36%, Loss 0.8846
Epoch 6/1000: Accuracy 11.36%, Loss 0.8253
Epoch 7/1000: Accuracy 11.36%, Loss 0.7713
Epoch 8/1000: Accuracy 11.36%, Loss 0.7231
Epoch 9/1000: Accuracy 11.36%, Loss 0.6811
Epoch 10/1000: Accuracy 11.36%, Loss 0.6452
Epoch 11/1000: Accuracy 11.36%, Loss 0.6150
Epoch 12/1000: Accuracy 11.36%, Loss 0.5898
Epoch 13/1000: Accuracy 11.36%, Loss 0.5690
Epoch 14/1000: Accuracy 11.36%, Loss 0.5518
Epoch 15/1000: Accuracy 11.36%, Loss 0.5375
Epoch 16/1000: Accuracy 11.36%, Loss 0.5258
Epoch 17/1000: Accuracy 11.36%, Loss 0.5160
Epoch 18/1000: Accuracy 11.36%, Loss 0.5078
Epoch 19/1000: Accuracy 11.36%, Loss 0.5009
Epoch 20/1000: Accuracy 11.36%, Loss 0.4951
Epoch 21/1000: Accuracy 11.36%, Loss 0.4902
Epoch 22/1000: Accuracy 11.36%, Loss 0.4860
Epoch 23/1000: Accuracy 11.36%, Loss 0.48

In [12]:
test_input, test_labels = test_data[0], to_onehot(test_data[1])
test_accuracy = evaluate(model, test_input, test_labels)
test_loss = quadratic_loss(model, test_input, test_labels)
print(f"test data:  Accuracy {test_accuracy * 100:.2f}%, Loss {test_loss:.4f}")


test data:  Accuracy 11.35%, Loss 0.4574
