In [12]:
import tensorflow as tf
import keras

from keras.models import Sequential
from keras.layers import Input, Dense, Flatten
from keras.utils import to_categorical

import numpy as np
import pandas as pd

from sklearn.metrics import classification_report

import plotly.express as px
import plotly.io as pio
pio.templates.default = 'plotly_dark'

In [13]:
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
X_train.shape, X_test.shape

((60000, 28, 28), (10000, 28, 28))

In [14]:
px.imshow(np.hstack(X_train[:7]), title="Original size").update_coloraxes(showscale=False)

In [15]:
new_size = (14, 14)

X_train_resize = tf.image.resize(X_train[..., np.newaxis], size=new_size)[..., 0]
X_test_resize = tf.image.resize(X_test[..., np.newaxis], size=new_size)[..., 0]
print(X_train_resize.shape)

px.imshow(np.hstack(X_train_resize[: 7]), title="Resized").update_coloraxes(showscale=False)

(60000, 14, 14)


In [16]:
max_val = X_train.max()
X_train_norm = X_train_resize / max_val
X_test_norm = X_test_resize / max_val

In [17]:
y_train_bnr = to_categorical(y_train)
y_test_bnr = to_categorical(y_test)

## 1. Default weights initialization

In [18]:
mlp = Sequential([
    Input(new_size),
    Flatten(),
    Dense(32, activation='elu'),
    Dense(10, activation='softmax')
])

mlp.summary()

mlp.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [19]:
with tf.device("/GPU:0"):
    history = mlp.fit(
        X_train_norm, y_train_bnr, 
        epochs=50, 
        batch_size=256, 
        validation_data=(X_test_norm, y_test_bnr)
    )

Epoch 1/50
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5661 - loss: 1.5160 - val_accuracy: 0.8809 - val_loss: 0.4864
Epoch 2/50
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.8793 - loss: 0.4554 - val_accuracy: 0.9055 - val_loss: 0.3453
Epoch 3/50
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.9028 - loss: 0.3511 - val_accuracy: 0.9133 - val_loss: 0.3049
Epoch 4/50
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.9137 - loss: 0.3090 - val_accuracy: 0.9178 - val_loss: 0.2856
Epoch 5/50
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.9170 - loss: 0.2921 - val_accuracy: 0.9208 - val_loss: 0.2737
Epoch 6/50
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.9201 - loss: 0.2778 - val_accuracy: 0.9256 - val_loss: 0.2587
Epoch 7/50
[1m235/235

In [20]:
loss_data = pd.DataFrame({
    'train': history.history['loss'],
    'valid': history.history['val_loss']
})

accuracy_data = pd.DataFrame({
    'train': history.history['accuracy'],
    'valid': history.history['val_accuracy']
})

px.line(loss_data, title='loss').show()
px.line(accuracy_data, title='accuracy').show()

In [22]:
y_pred = np.argmax(mlp.predict(X_test_norm), axis=1)

print(classification_report(y_test, y_pred))

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       980
           1       0.98      0.99      0.99      1135
           2       0.96      0.97      0.96      1032
           3       0.97      0.96      0.96      1010
           4       0.97      0.96      0.96       982
           5       0.96      0.96      0.96       892
           6       0.97      0.97      0.97       958
           7       0.97      0.97      0.97      1028
           8       0.94      0.96      0.95       974
           9       0.96      0.95      0.96      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000



## 2. Initialize weights with **zeros**

In [31]:
mlp_zeros = Sequential([
    Input(new_size),
    Flatten(),
    Dense(32, activation='elu', kernel_initializer='zeros'),
    Dense(10, activation='softmax', kernel_initializer='zeros')
])

mlp_zeros.summary()

mlp_zeros.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [32]:
with tf.device("/GPU:0"):
    history = mlp_zeros.fit(
        X_train_norm, y_train_bnr, 
        epochs=10, 
        batch_size=256, 
        validation_data=(X_test_norm, y_test_bnr)
    )

Epoch 1/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.1065 - loss: 2.3021 - val_accuracy: 0.1135 - val_loss: 2.3012
Epoch 2/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.1134 - loss: 2.3011 - val_accuracy: 0.1135 - val_loss: 2.3010
Epoch 3/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.1104 - loss: 2.3015 - val_accuracy: 0.1135 - val_loss: 2.3010
Epoch 4/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.1129 - loss: 2.3012 - val_accuracy: 0.1135 - val_loss: 2.3010
Epoch 5/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.1147 - loss: 2.3010 - val_accuracy: 0.1135 - val_loss: 2.3010
Epoch 6/10
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.1106 - loss: 2.3017 - val_accuracy: 0.1135 - val_loss: 2.3010
Epoch 7/10
[1m235/235

## 3. Initialize weights with **ones**

In [52]:
mlp_ones = Sequential([
    Input(new_size),
    Flatten(),
    Dense(32, activation='elu', kernel_initializer='ones'),
    Dense(10, activation='softmax', kernel_initializer='ones')
])

mlp_ones.summary()

mlp_ones.compile(
    loss='categorical_crossentropy', 
    optimizer='adam', 
    metrics=['accuracy']
    )

In [53]:
with tf.device("/GPU:0"):
    history = mlp_ones.fit(
        X_train_norm, y_train_bnr, 
        epochs=50, 
        batch_size=512, 
        validation_data=(X_test_norm, y_test_bnr)
    )

Epoch 1/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - accuracy: 0.1002 - loss: 2.3193 - val_accuracy: 0.0980 - val_loss: 2.3081
Epoch 2/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.1022 - loss: 2.2908 - val_accuracy: 0.1268 - val_loss: 2.2910
Epoch 3/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.1147 - loss: 2.2736 - val_accuracy: 0.1210 - val_loss: 2.2548
Epoch 4/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.1291 - loss: 2.2524 - val_accuracy: 0.1762 - val_loss: 2.2251
Epoch 5/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.1618 - loss: 2.2125 - val_accuracy: 0.1822 - val_loss: 2.1746
Epoch 6/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.1944 - loss: 2.1618 - val_accuracy: 0.2420 - val_loss: 2.1043
Epoch 7/50
[1m118/118

In [54]:
loss_data = pd.DataFrame({
    'train': history.history['loss'],
    'valid': history.history['val_loss']
})

accuracy_data = pd.DataFrame({
    'train': history.history['accuracy'],
    'valid': history.history['val_accuracy']
})

px.line(loss_data, title='loss').show()
px.line(accuracy_data, title='accuracy').show()