In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tqdm

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [None]:
# Normalize to [0.0, 1.0]
x_train = x_train.astype(np.float32) / 255.0
x_test = x_test.astype(np.float32) / 255.0

In [None]:
# Use one-hot encoding
y_train = tf.one_hot(y_train, depth=10, dtype=tf.float32)
y_test = tf.one_hot(y_test, depth=10, dtype=tf.float32)

In [None]:
# Draw dataset examples
num_row = 3
num_col = 5

num = num_row*num_col
images = x_train[:num]
labels = y_train[:num]

fig, axes = plt.subplots(num_row, num_col, figsize=(1.5*num_col,2*num_row))
for i in range(num_row*num_col):
    ax = axes[i//num_col, i%num_col]
    ax.imshow(images[i], cmap='gray')
    ax.set_title('Label: {}'.format(labels[i]))
plt.tight_layout()
plt.show()

In [None]:
# Create neural network architecture
input = tf.keras.layers.Input(shape=(28, 28))
layer = tf.keras.layers.Flatten()(input)
layer = tf.keras.layers.Dense(100, activation=tf.keras.activations.relu)(layer)
layer = tf.keras.layers.Dense(100, activation=tf.keras.activations.relu)(layer)
output = tf.keras.layers.Dense(10, activation=tf.keras.activations.sigmoid)(layer)

In [None]:
model = tf.keras.Model(inputs=input, outputs=output)

In [None]:
model.summary()

In [None]:
# Training: Keras
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=tf.keras.metrics.CategoricalAccuracy()
)

In [None]:
model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=32, epochs=100)

In [None]:
# Training: manual
def loss_fn(ground_truth, predicted):
    return tf.reduce_mean(tf.square(ground_truth - predicted))

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)


#@tf.function()
def gradient_descent(model: tf.keras.Model, input, ground_truth):
    with tf.GradientTape() as tape:
        predicted = model(input, training=True)
        loss = loss_fn(ground_truth, predicted)
        gradients = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(gradients, model.trainable_weights))
    return loss


def iterate_batches(dataset, batch_size: int):
    for index in range(0, len(dataset), batch_size):
        start = index
        end = index + batch_size
        yield dataset[start:end]


epochs = 100
batch_size = 32
dataset_size = 10000
indices = list(range(dataset_size))

for epoch in tqdm.tqdm(range(epochs)):
    np.random.shuffle(indices)
    x = [x_train[:dataset_size][index] for index in indices]
    y = [y_train[:dataset_size][index] for index in indices]
    x = iterate_batches(x, batch_size)
    y = iterate_batches(y, batch_size)
    print(f"Epoch #{epoch}")
    for (inputs, ground_truths) in zip(x, y):
        loss = gradient_descent(model, np.array(inputs), np.array(ground_truths))
    print(np.mean(loss))

In [None]:
# Evaluate neural network
num_row = 3
num_col = 5

num = num_row*num_col
images = x_test[:num]
labels = y_test[:num]

fig, axes = plt.subplots(num_row, num_col, figsize=(1.5*num_col,2*num_row))
for i in range(num_row*num_col):
    ax = axes[i//num_col, i%num_col]
    ax.imshow(images[i], cmap='gray')
    prediction = model(np.array([images[i]]))[0]
    label = np.argmax(labels[i])
    print(prediction)
    prediction = np.argmax(prediction)
    ax.set_title(f'L: {label}, P: {prediction}')
plt.tight_layout()
plt.show()