In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

### Loading Dataset
***

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(type(x_train))
print(x_train.shape)
print(y_train.shape)

<class 'numpy.ndarray'>
(60000, 28, 28)
(60000,)


In [3]:
x_train = x_train.reshape(-1, 28*28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28*28).astype("float32") / 255.0

In [4]:
# x_train = tf.convert_to_tensor(x_train)
# although not needed if it is a numpy array

### Sequential API
***
very convenient, not very flexible

In [5]:
model = keras.Sequential(
    [
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10)
    ]
)

In [6]:
# here we tell keras how to configure the training part
model.compile(
    # from logits = True, output goes through softmax first
    # sparse -> train labels are just integers
    # without sparse -> we need one hot encoding
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.Adam(learning_rate=0.001),
    metrics = ["accuracy"]
)

In [7]:
# training part of the network
model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
# verbose = 2 -> prints stuff after each epoch
# else we'll just get a progress bar

Epoch 1/5
1875/1875 - 14s - 7ms/step - accuracy: 0.9452 - loss: 0.1846
Epoch 2/5
1875/1875 - 11s - 6ms/step - accuracy: 0.9758 - loss: 0.0805
Epoch 3/5
1875/1875 - 10s - 5ms/step - accuracy: 0.9834 - loss: 0.0528
Epoch 4/5
1875/1875 - 10s - 5ms/step - accuracy: 0.9865 - loss: 0.0431
Epoch 5/5
1875/1875 - 11s - 6ms/step - accuracy: 0.9894 - loss: 0.0329


<keras.src.callbacks.history.History at 0x1367a6141a0>

In [8]:
# evaluating out model on test data
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

313/313 - 0s - 1ms/step - accuracy: 0.9773 - loss: 0.0850


[0.08495482802391052, 0.9772999882698059]

### Model Summary
***

In [9]:
# you need to specify input for model summary
# else you need to got through fir process for summary
model = keras.Sequential(
    [
        keras.Input(shape=(28*28,)),
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10)
    ]
)
print(model.summary())

None


### Adding layers in Sequential API
***

In [10]:
model = keras.Sequential()
model.add(keras.Input(shape=(784,)))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(10))

print(model.summary())

None


### Functional API
***
a bit more flexible

In [11]:
inputs = keras.Input(shape=(784,))
x = layers.Dense(512, activation='relu', name='first_layer')(inputs)
x = layers.Dense(256, activation='relu', name='second_layer')(x)
outputs = layers.Dense(10)(x)

In [12]:
model = keras.Model(inputs=inputs, outputs=outputs)
model.summary()

### Debugging Model 
***

In [13]:
model_dash = keras.Model(inputs=model.inputs,
                    outputs=[model.layers[-2].output])
# model.get_layer('second_layer').output also works

In [14]:
feature = model_dash.predict(x_train)
print(feature.shape)

[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 970us/step
(60000, 256)


In [15]:
model_dash = keras.Model(inputs=model.inputs,
                    outputs=[layer.output for layer in model.layers])

features = model_dash.predict(x_train)

for feature in features:
    print(feature.shape)

[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
(60000, 784)
(60000, 512)
(60000, 256)
(60000, 10)
