In [1]:
import numpy as np
import numpy.typing as npt
import matplotlib.pyplot as plt
from nn import *
from losses import *
from optimizers import Optimizer
from utils import *

In [2]:
from keras.datasets import fashion_mnist

In [3]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [4]:
(x_train, y_train), (x_valid, y_valid) = train_valid_split(x_train, y_train)
print(f"x_train.shape = {x_train.shape},\ty_train.shape = {y_train.shape}")
print(f"x_valid.shape = {x_valid.shape},\t\ty_valid.shape = {y_valid.shape}")
print(f"x_test.shape = {x_test.shape},\t\ty_test.shape = {y_test.shape}")

x_train.shape = (54000, 28, 28),	y_train.shape = (54000,)
x_valid.shape = (6000, 28, 28),		y_valid.shape = (6000,)
x_test.shape = (10000, 28, 28),		y_test.shape = (10000,)


In [5]:
X_train, Y_train = flatten_image_to_vector(x_train), one_hot_encoder(y_train)
print(f"X_train.shape = {X_train.shape}, Y_train.shape = {Y_train.shape}")

X_train.shape = (54000, 784, 1), Y_train.shape = (54000, 10, 1)


In [6]:
Y_train[0]

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.]])

In [13]:
nn1 = NeuralNetwork(weight_init="xavier",
                    layer_sizes=[784, 32, 16], activations=["tanh"]*2,
                    optimizer="adam", learning_rate=0.001, weight_decay=0.0005, beta1=0.5, beta2=0.5, epsilon=1e-6)# beta=0.5, )

In [14]:
nn1.add_layer(input_size=16, output_size=10, weight_init="xavier", activation="softmax")

In [15]:
for i in range(len(nn1.layers)):
    print(f"Layer_{i} has input neurons size = {nn1.layers[i].input_size}, has W.shape = {nn1.layers[i].W.shape}")
    print(f"Layer_{i} has output neurons size = {nn1.layers[i].output_size}")

Layer_0 has input neurons size = 784, has W.shape = (32, 784)
Layer_0 has output neurons size = 32
Layer_1 has input neurons size = 32, has W.shape = (16, 32)
Layer_1 has output neurons size = 16
Layer_2 has input neurons size = 16, has W.shape = (10, 16)
Layer_2 has output neurons size = 10


In [16]:
np.mean(X_train, axis=0, keepdims=True).shape

(1, 784, 1)

In [17]:
normalizedX_train = (X_train - np.mean(X_train, axis=0, keepdims=True))/np.std(X_train, axis=0, keepdims=True)

In [18]:
nn1.train(X_train=normalizedX_train.copy(), Y_train=Y_train.copy(), batch_size=20, epochs=3)

Epoch 1, Loss = 0.5252933004705117
Epoch 2, Loss = 0.4006943800594066
Epoch 3, Loss = 0.3745451506928033


In [20]:
X_valid, Y_valid = flatten_image_to_vector(x_valid), one_hot_encoder(y_valid)
normalizedX_valid = (X_valid - np.mean(X_valid, axis=0, keepdims=True))/np.std(X_valid, axis=0, keepdims=True)

In [21]:
total = 0
correct = 0
for i in range(len(X_valid)):
    y_pred = nn1.forward(normalizedX_valid[i])
    y_pred = np.int8((y_pred == np.max(y_pred)))
    pred_label = np.argwhere(y_pred == 1)[0][0]
    real_label = np.argwhere(Y_valid[i] == 1)[0][0]
    # print(f"true label: {real_label}, predicted label: {pred_label}")
    total += 1
    if pred_label == real_label:
        correct += 1

print(f"Correctly classified {correct} out of {total} images.")

Correctly classified 5170 out of 6000 images.


In [None]:
# y_pred = nn1.forward(X_train[0])
# y_pred

array([[0.07018606],
       [0.05122388],
       [0.16703672],
       [0.29941741],
       [0.08799515],
       [0.08832081],
       [0.12671645],
       [0.02774696],
       [0.04200582],
       [0.03935074]])

In [None]:
# d_theta_tuple_of_lists = nn1.backward(Y_train[0], y_pred=y_pred)
# len(d_theta_tuple_of_lists[0])

In [None]:
# len(d_theta_tuple_of_lists[0])

4

In [None]:
# for i in range(4):
#     print(d_theta_tuple_of_lists[0][i].shape)

(128, 784)
(64, 128)
(32, 64)
(10, 32)


In [None]:
# for i in range(4):
#     print(d_theta_tuple_of_lists[1][i].shape)

(128, 1)
(64, 1)
(32, 1)
(10, 1)
