In [45]:
import numpy as np
import numpy.typing as npt
import matplotlib.pyplot as plt
from nn import *
from losses import *
from optimizers import Optimizer
from utils import *

In [46]:
from keras.datasets import fashion_mnist

In [47]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [48]:
(x_train, y_train), (x_valid, y_valid) = train_valid_split(x_train, y_train)
print(f"x_train.shape = {x_train.shape},\ty_train.shape = {y_train.shape}")
print(f"x_valid.shape = {x_valid.shape},\t\ty_valid.shape = {y_valid.shape}")
print(f"x_test.shape = {x_test.shape},\t\ty_test.shape = {y_test.shape}")

x_train.shape = (54000, 28, 28),	y_train.shape = (54000,)
x_valid.shape = (6000, 28, 28),		y_valid.shape = (6000,)
x_test.shape = (10000, 28, 28),		y_test.shape = (10000,)


In [49]:
X_train, Y_train = flatten_image_to_vector(x_train), one_hot_encoder(y_train)
print(f"X_train.shape = {X_train.shape}, Y_train.shape = {Y_train.shape}")

X_train.shape = (54000, 784, 1), Y_train.shape = (54000, 10, 1)


In [50]:
Y_train[0]

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.]])

In [76]:
nn1 = NeuralNetwork(weight_init="xavier",
                    layer_sizes=[784,16], activations=["tanh"],
                    optimizer="rmsprop", learning_rate=0.01, beta=0.5, epsilon=1e-6)#, beta1=0.5, beta2=0.5, )

In [77]:
nn1.add_layer(input_size=16, output_size=10, weight_init="xavier", activation="softmax")

In [78]:
for i in range(len(nn1.layers)):
    print(f"Layer_{i} has input neurons size = {nn1.layers[i].input_size}, has W.shape = {nn1.layers[i].W.shape}")
    print(f"Layer_{i} has output neurons size = {nn1.layers[i].output_size}")

Layer_0 has input neurons size = 784, has W.shape = (16, 784)
Layer_0 has output neurons size = 16
Layer_1 has input neurons size = 16, has W.shape = (10, 16)
Layer_1 has output neurons size = 10


In [79]:
np.mean(X_train, axis=0, keepdims=True).shape

(1, 784, 1)

In [None]:
normalizedX_train = (X_train - np.mean(X_train, axis=0, keepdims=True))/np.std(X_train, axis=0, keepdims=True)

In [81]:
nn1.train(X_train=normalizedX_train[:200].copy(), Y_train=Y_train[:200].copy(), batch_size=20, epochs=300)

Epoch 10, Loss = 38.718081763147694
Epoch 20, Loss = 3.6234441419666954
Epoch 30, Loss = 0.3566106906002351
Epoch 40, Loss = 0.057268640390292024
Epoch 50, Loss = 0.01772163223518858
Epoch 60, Loss = 0.008707483164222953
Epoch 70, Loss = 0.005747967401096969
Epoch 80, Loss = 0.004299241844821331
Epoch 90, Loss = 0.0034048679557219714
Epoch 100, Loss = 0.0028254959543899936
Epoch 110, Loss = 0.002424633915406222
Epoch 120, Loss = 0.0021259839708736967
Epoch 130, Loss = 0.0018939747612812196
Epoch 140, Loss = 0.0017079630091877446
Epoch 150, Loss = 0.0015545102271618456
Epoch 160, Loss = 0.0014213475302013322
Epoch 170, Loss = 0.001300554974073468
Epoch 180, Loss = 0.0012087401150385967
Epoch 190, Loss = 0.0011303974792761218
Epoch 200, Loss = 0.0010618690149781448
Epoch 210, Loss = 0.0010013601026114972
Epoch 220, Loss = 0.0009475115903335487
Epoch 230, Loss = 0.0008992387063792371
Epoch 240, Loss = 0.0008556744963785872
Epoch 250, Loss = 0.0008161134403354908
Epoch 260, Loss = 0.000779

In [83]:
X_test, Y_test = flatten_image_to_vector(x_test), one_hot_encoder(y_test)
normalizedX_test = (X_test - np.mean(X_test, axis=0, keepdims=True))/np.std(X_test, axis=0, keepdims=True)

In [85]:
total = 0
correct = 0
for i in range(len(X_test)):
    y_pred = nn1.forward(normalizedX_test[i])
    y_pred = np.int8((y_pred == np.max(y_pred)))
    pred_label = np.argwhere(y_pred == 1)[0][0]
    real_label = np.argwhere(Y_test[i] == 1)[0][0]
    # print(f"true label: {real_label}, predicted label: {pred_label}")
    total += 1
    if pred_label == real_label:
        correct += 1

print(f"Correctly classified {correct} out of {total} images.")

Correctly classified 7245 out of 10000 images.


In [None]:
# y_pred = nn1.forward(X_train[0])
# y_pred

array([[0.07018606],
       [0.05122388],
       [0.16703672],
       [0.29941741],
       [0.08799515],
       [0.08832081],
       [0.12671645],
       [0.02774696],
       [0.04200582],
       [0.03935074]])

In [None]:
# d_theta_tuple_of_lists = nn1.backward(Y_train[0], y_pred=y_pred)
# len(d_theta_tuple_of_lists[0])

In [None]:
# len(d_theta_tuple_of_lists[0])

4

In [None]:
# for i in range(4):
#     print(d_theta_tuple_of_lists[0][i].shape)

(128, 784)
(64, 128)
(32, 64)
(10, 32)


In [None]:
# for i in range(4):
#     print(d_theta_tuple_of_lists[1][i].shape)

(128, 1)
(64, 1)
(32, 1)
(10, 1)
