In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.models import Sequential, Model

In [2]:
mnist = tf.keras.datasets.mnist

In [3]:
(x_train, y_train), (x_valid, y_valid) = mnist.load_data()

In [4]:
x_train.shape, x_valid.shape

((60000, 28, 28), (10000, 28, 28))

In [5]:
y_train.shape, y_valid.shape

((60000,), (10000,))

## Sequential API

In [6]:
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

In [7]:
model.summary(0)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 256)               200960    
_________________________________________________________________
dense_1 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_2 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_3 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_4 (Dense)              (None, 10)                330       
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
__________________________________________________

In [8]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

model.fit(x_train, y_train,
         validation_data=(x_valid, y_valid),
         epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x214bd738f70>

## Functional API

In [11]:
input_ = Input(shape=(28, 28))

In [12]:
x1 = Flatten()(input_)
x2 = Dense(256, activation='relu')(x1)
x3 = Dense(128, activation='relu')(x2)
x4 = Dense(64, activation='relu')(x3)
x5 = Dense(32, activation='relu')(x4)
output_ = Dense(10, activation='softmax')(x5)

In [13]:
model = Model(input_, output_)

In [14]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28)]          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 256)               200960    
_________________________________________________________________
dense_6 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_7 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_8 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_9 (Dense)              (None, 10)                330   

## Sub-Classing

In [15]:
class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.flatten = Flatten()
        self.dense1 = Dense(256, activation='relu')
        self.dense2 = Dense(128, activation='relu')
        self.dense3 = Dense(64, activation='relu')
        self.dense4 = Dense(32, activation='relu')
        self.output_ = Dense(10, activation='softmax')
    
    def call(self, input_):
        x = self.flatten(input_)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)
        x = self.output_(x)
        return x

In [16]:
model = MyModel()

In [17]:
model(input_)

<KerasTensor: shape=(None, 10) dtype=float32 (created by layer 'my_model')>

In [18]:
model.summary()

Model: "my_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_3 (Flatten)          multiple                  0         
_________________________________________________________________
dense_10 (Dense)             multiple                  200960    
_________________________________________________________________
dense_11 (Dense)             multiple                  32896     
_________________________________________________________________
dense_12 (Dense)             multiple                  8256      
_________________________________________________________________
dense_13 (Dense)             multiple                  2080      
_________________________________________________________________
dense_14 (Dense)             multiple                  330       
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
____________________________________________________

In [19]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

model.fit(x_train, y_train,
         validation_data=(x_valid, y_valid),
         epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x214e4167bb0>

## Layer Custom

In [20]:
class MyDense(tf.keras.layers.Layer):
    def __init__(self, nodes):
        super(MyDense, self).__init__()
        self.dense1 = Dense(nodes, activation='relu')
        self.dense2 = Dense(nodes // 2, activation='relu')
        self.dense3 = Dense(nodes // 4, activation='relu')
        self.dense4 = Dense(nodes // 8, activation='relu')
    
    def call(self, x):
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)
        return x

In [21]:
class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.flatten = Flatten()
        self.dense = MyDense(256)
        self.output_ = Dense(10, activation='softmax')
    
    def call(self, input_):
        x = self.flatten(input_)
        x = self.dense(x)
        x = self.output_(x)
        return x

In [22]:
model = MyModel()
input_ = Input(shape=(28, 28))
model(input_)

<KerasTensor: shape=(None, 10) dtype=float32 (created by layer 'my_model_1')>

In [23]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

model.fit(x_train, y_train,
         validation_data=(x_valid, y_valid),
         epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x215a77dfe50>

In [24]:
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(1000).batch(32)
valid_data = tf.data.Dataset.from_tensor_slices((x_valid, y_valid)).shuffle(1000).batch(32)

In [25]:
optimizer = tf.keras.optimizers.Adam()

In [26]:
loss_function = tf.keras.losses.SparseCategoricalCrossentropy()

In [27]:
train_loss = tf.keras.metrics.Mean()
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy() 
valid_loss = tf.keras.metrics.Mean()
valid_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

In [28]:
@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_function(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(labels, predictions)

In [29]:
@tf.function
def valid_step(images, labels):
    predictions = model(images, training=False)
    loss = loss_function(labels, predictions)
    
    valid_loss(loss)
    valid_accuracy(labels, predictions)

In [30]:
train_loss.reset_states()
train_accuracy.reset_states()
valid_loss.reset_states()
valid_accuracy.reset_states()

for epoch in range(10):
    for images, labels in train_data:
        train_step(images, labels)
    
    for images, labels in valid_data:
        valid_step(images, labels)
    
    template = 'epoch: {}, loss: {:.3f}, acc: {:.3f}, val_loss: {:.3f}, val_accuracy: {:.3f}'
    print(template.format(epoch+1, train_loss.result(), train_accuracy.result(), valid_loss.result(), valid_accuracy.result()))

epoch: 1, loss: 0.075, acc: 0.984, val_loss: 0.159, val_accuracy: 0.974
epoch: 2, loss: 0.062, acc: 0.986, val_loss: 0.148, val_accuracy: 0.974
epoch: 3, loss: 0.056, acc: 0.986, val_loss: 0.150, val_accuracy: 0.974
epoch: 4, loss: 0.053, acc: 0.987, val_loss: 0.154, val_accuracy: 0.974
epoch: 5, loss: 0.052, acc: 0.987, val_loss: 0.146, val_accuracy: 0.975
epoch: 6, loss: 0.050, acc: 0.988, val_loss: 0.141, val_accuracy: 0.975
epoch: 7, loss: 0.048, acc: 0.988, val_loss: 0.147, val_accuracy: 0.974
epoch: 8, loss: 0.046, acc: 0.989, val_loss: 0.145, val_accuracy: 0.974
epoch: 9, loss: 0.045, acc: 0.989, val_loss: 0.148, val_accuracy: 0.974
epoch: 10, loss: 0.044, acc: 0.989, val_loss: 0.148, val_accuracy: 0.974
