In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
import tensorflow as tf
import time

In [None]:
print(tf.__version__)

# A brief summary of major changes

- API cleanup. Removes redundant APIs, makes APIs more consistent.
- Eager execution. Decorate a Python function using `tf.function()` to mark it for JIT compilation.
- No more "globals". If you lose track of a `tf.Variable`, it gets garbage collected.

See https://www.tensorflow.org/guide/effective_tf2 for details.

# Data pipeline

You can use NumPy array or `tf.data` API for data pipelining. Generally, for larger dataset, you want your data as a `tf.data.Dataset` object. A Dataset object can be **created** from data in memory or disk, and can be **transformed** to another Dataset.

See https://www.tensorflow.org/guide/data for details.

In [None]:
# Load MNIST data from `tf.keras.datasets`.
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()  # These are NumPy arrays.

# Standardize data.
x_train = x_train / 255.0
x_test = x_test / 255.0

# Add a channel dimension.
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

In [None]:
# Create dataset, then shuffle and batch them.
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(1000).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

# Model

Model architecture APIs (High-level to low-level):
- Sequential model. Data goes through a sequence of layers.
- Functional API. More flexible than Sequential model.
- Layer subclassing. Subclass `tf.keras.layers.Layer` to create custom layer (custom computation blocks).
- Model subclassing. Subclass `tf.keras.Model`. Like layer subclassing, but allow you to use `.fit()`, `.evaluate()`, and `.predict()` method.

## Sequential

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
], name='mnist_sequential')

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

Model summary

In [None]:
model.summary()

Callbacks

In [None]:
callbacks = [
    tf.keras.callbacks.TensorBoard(log_dir='logs')
]

Train from NumPy array

In [None]:
start = time.time()
model.fit(x_train, y_train, batch_size=32, epochs=5, validation_data=(x_test, y_test), callbacks=callbacks)
print(f'Time taken : {time.time() - start} sec')

Train from `tf.data.Dataset`

In [None]:
start = time.time()
model.fit(train_dataset, epochs=5, validation_data=test_dataset)
print(f'Time taken : {time.time() - start} sec')

Evaluate

In [None]:
model.evaluate(test_dataset)

Predict

In [None]:
y_pred = model.predict(x_test)

## Functional

In [None]:
inputs = tf.keras.Input(shape=(28, 28, 1))
x = tf.keras.layers.Flatten()(inputs)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs, name='mnist_functional')

model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-3),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
start = time.time()
model.fit(train_dataset, epochs=5, validation_data=test_dataset)
print(f'Time taken : {time.time() - start} sec')

### Non-sequential example : ResNet

In [None]:
inputs = tf.keras.Input(shape=(28, 28, 1))
x = tf.keras.layers.Conv2D(32, 3, activation='relu')(inputs)
x = tf.keras.layers.Conv2D(64, 3, activation='relu')(x)
block_1_output = tf.keras.layers.MaxPooling2D(3)(x)

x = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(block_1_output)
x = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(x)
x = tf.keras.layers.add([x, block_1_output])

x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dropout(0.5)(x)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs, name='resnet_functional')

model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-3),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
start = time.time()
model.fit(train_dataset, epochs=5, validation_data=test_dataset)
print(f'Time taken : {time.time() - start} sec')

## Layer subclassing

Create a custom "layer"

In [None]:
class ResidualBlock(tf.keras.layers.Layer):
    
    def __init__(self):
        super().__init__()  # Initialize base class.
        self.conv1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')
        self.conv2 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')
        
    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.conv2(x)
        x += inputs
        return x

In [None]:
inputs = tf.keras.Input(shape=(28, 28, 1))
x = tf.keras.layers.Conv2D(32, 3, activation='relu')(inputs)
x = tf.keras.layers.Conv2D(64, 3, activation='relu')(x)
x = tf.keras.layers.MaxPooling2D(3)(x)

x = ResidualBlock()(x)

x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Flatten()(x)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs, name='custom_layer_functional')

model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-3),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

In [None]:
# Now the model looks "sequential".
model.summary()

## Model subclassing

In [None]:
class CustomModel(tf.keras.Model):
    
    def __init__(self):
        super().__init__()  # Initialize base class.
        
        self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
        self.conv2 = tf.keras.layers.Conv2D(64, 3, activation='relu')
        self.pool = tf.keras.layers.MaxPooling2D(3)
        self.residual = ResidualBlock()
        self.dropout = tf.keras.layers.Dropout(0.5)
        self.flatten = tf.keras.layers.Flatten()
        self.dense = tf.keras.layers.Dense(10, activation='softmax')
    
    def call(self, inputs, training=None):
        x = self.conv1(inputs)
        x = self.conv2(x)
        x = self.pool(x)
        x = self.residual(x)
        if training:
            x = self.dropout(x, training=training)
        x = self.flatten(x)
        x = self.dense(x)
        
        return x
    
model = CustomModel()
model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-3),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

In [None]:
start = time.time()
model.fit(train_dataset, epochs=5, validation_data=test_dataset)
print(f'Time taken : {time.time() - start} sec')

# Training

Training APIs (High-level to low-level):
- Built-in training loops. (`model.compile(...)`, then `model.fit(...)`).
- Writing training loops from scratch with `tf.GradientTape`.

## Custom training

In [None]:
# Load MNIST data from `tf.keras.datasets`.
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()  # These are NumPy arrays.

# Standardize data.
x_train = x_train / 255.0
x_test = x_test / 255.0

# Add a channel dimension.
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

# Create dataset, then shuffle and batch them.
BUFFER = 1000
BATCH_SIZE = 32
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(BUFFER).batch(BATCH_SIZE)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE)

In [None]:
class ResidualBlock(tf.keras.layers.Layer):
    
    def __init__(self):
        super().__init__()  # Initialize base class.
        self.conv1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')
        self.conv2 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')
        
    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.conv2(x)
        x += inputs
        return x
    
class CustomModel(tf.keras.Model):
    
    def __init__(self):
        super().__init__()  # Initialize base class.
        
        self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
        self.conv2 = tf.keras.layers.Conv2D(64, 3, activation='relu')
        self.pool = tf.keras.layers.MaxPooling2D(3)
        self.residual = ResidualBlock()
        self.dropout = tf.keras.layers.Dropout(0.5)
        self.flatten = tf.keras.layers.Flatten()
        self.dense = tf.keras.layers.Dense(10, activation='softmax')
    
    def call(self, inputs, training=None):
        x = self.conv1(inputs)
        x = self.conv2(x)
        x = self.pool(x)
        x = self.residual(x)
        if training:
            x = self.dropout(x, training=training)
        x = self.flatten(x)
        x = self.dense(x)
        
        return x

model = CustomModel()

In [None]:
# TensorBoard writers
train_log_dir = 'logs/basic/train'
val_log_dir = 'logs/basic/val'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
val_summary_writer = tf.summary.create_file_writer(val_log_dir)

optimizer = tf.keras.optimizers.RMSprop(learning_rate=1e-3)
loss = tf.keras.losses.SparseCategoricalCrossentropy()
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
loss_metric = tf.keras.metrics.Mean('loss')

Functions to train and test one batch. Note that we're decorating the functions with `tf.function` to mark them for JIT compilations. We also create a function to perform the training loop.

In [None]:
@tf.function
def train_on_batch(x, y):
    """Train one batch of (x, y)"""
    # Compute loss while recording the gradient.
    with tf.GradientTape() as tape:
        y_pred = model(x, training=True)
        loss_value = loss(y, y_pred)
        
    # Get gradient of weights w.r.t. loss.
    grad = tape.gradient(loss_value, model.trainable_weights)
    # Using optimizer, apply gradients to trainable weights.
    optimizer.apply_gradients(zip(grad, model.trainable_weights))
    
    # Compute metrics. Metrics will accumulate values.
    for metric in metrics:
        metric(y, y_pred)
        
    # Record loss
    loss_metric(loss_value)
        
@tf.function
def test_on_batch(x, y):
    """Test one batch of (x, y)"""
    # Compute loss.
    y_pred = model(x, training=False)
    loss_value = loss(y, y_pred)
    
    # Compute metrics. Metrics will accumulate values.
    for metric in metrics:
        metric(y, y_pred)
        
    # Record loss
    loss_metric(loss_value)
    
def train(train_data, epochs, validation_data=None):
    """Perform training loop."""
    for epoch in range(epochs):
        
        start = time.time()
        message = []
        loss_metric_message = []
        message.append(f'Epoch {epoch}/{epochs}')
        
        # Iterate through training dataset and
        # train model on each batch.
        for i, (x, y) in enumerate(train_data):
            train_on_batch(x, y)
                    
        # Obtain metric values, then write to TensorBoard
        train_metric_values = {}
        with train_summary_writer.as_default():
            train_loss_values = {'loss': loss_metric.result().numpy()}
            tf.summary.scalar('loss', loss_metric.result(), step=epoch)
            for metric in metrics:
                tf.summary.scalar(metric.name, metric.result(), step=epoch)
                train_metric_values[metric.name] = metric.result().numpy()
                
        # Reset metric states at the end of each epoch.
        for metric in metrics:
            metric.reset_states()
        loss_metric.reset_states()
        
        loss_metric_message.append(' - '.join([f'{k}: {v:.4f}' for k,v in train_loss_values.items()]))
        loss_metric_message.append(' - '.join([f'{k}: {v:.4f}' for k,v in train_metric_values.items()]))
        
        if validation_data is not None:
            
            # Iterate through validation dataset and
            # train model on each batch.
            for i, (x, y) in enumerate(validation_data):
                test_on_batch(x, y)

            # Obtain metric values, then write to TensorBoard
            val_metric_values = {}
            with val_summary_writer.as_default():
                val_loss_values = {'val_loss': loss_metric.result().numpy()}
                tf.summary.scalar('loss', loss_metric.result(), step=epoch)
                for metric in metrics:
                    tf.summary.scalar(metric.name, metric.result(), step=epoch)
                    val_metric_values[metric.name] = metric.result().numpy()

            
            # Reset metric states at the end of each epoch.
            for metric in metrics:
                metric.reset_states()
            loss_metric.reset_states()
                
            loss_metric_message.append(' - '.join([f'{k}: {v:.4f}' for k,v in val_loss_values.items()]))
            loss_metric_message.append(' - '.join([f'{k}: {v:.4f}' for k,v in val_metric_values.items()]))
        
        message.append(f'{(time.time() - start):.2f} sec')
        message += loss_metric_message
        message = ' - '.join(message)
        print(message)

In [None]:
# train(train_dataset.take(30), epochs=5, validation_data=test_dataset.take(30))
train(train_dataset, epochs=5, validation_data=test_dataset)

```
WARNING:tensorflow:Layer custom_model_20 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.

If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.

To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 0/5 - 9.96 sec - loss: 0.1264 - sparse_categorical_accuracy: 0.9621 - val_loss: 0.0426 - val_sparse_categorical_accuracy: 0.9855
Epoch 1/5 - 9.03 sec - loss: 0.0457 - sparse_categorical_accuracy: 0.9862 - val_loss: 0.0319 - val_sparse_categorical_accuracy: 0.9898
Epoch 2/5 - 9.09 sec - loss: 0.0372 - sparse_categorical_accuracy: 0.9894 - val_loss: 0.0259 - val_sparse_categorical_accuracy: 0.9911
Epoch 3/5 - 9.06 sec - loss: 0.0327 - sparse_categorical_accuracy: 0.9910 - val_loss: 0.0263 - val_sparse_categorical_accuracy: 0.9908
Epoch 4/5 - 9.12 sec - loss: 0.0290 - sparse_categorical_accuracy: 0.9916 - val_loss: 0.0239 - val_sparse_categorical_accuracy: 0.9923
```

# test

In [None]:
class CustomModel(tf.keras.Model):
    
    def __init__(self):
        super().__init__()  # Initialize base class.
        
        self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
        self.conv2 = tf.keras.layers.Conv2D(64, 3, activation='relu')
        self.pool = tf.keras.layers.MaxPooling2D(3)
        self.residual = ResidualBlock()
        self.dropout = tf.keras.layers.Dropout(0.5)
        self.flatten = tf.keras.layers.Flatten()
        self.dense = tf.keras.layers.Dense(10, activation='softmax')
    
    def call(self, x, training=None):
        x = self.conv1(x)
        # x = self.conv2(x)
        # x = self.pool(x)
        # x = self.residual(x)
        #if training:
        #    x = self.dropout(x, training=training)
        x = self.flatten(x)
        x = self.dense(x)
        
        return x
    
# @tf.function
def train_on_batch(x, y):
    """Train one batch of (x, y)"""
    # Compute loss while recording the gradient.
    with tf.GradientTape() as tape:
        y_pred = model(x, training=True)
        loss_value = loss(y, y_pred)
        
    # Get gradient of weights w.r.t. loss.
    grad = tape.gradient(loss_value, model.trainable_weights)
    # Using optimizer, apply gradients to trainable weights.
    optimizer.apply_gradients(zip(grad, model.trainable_weights))
    
    # Compute metrics. Metrics will accumulate values.
    for metric in metrics:
        metric(y, y_pred)
        
    # Record loss
    loss_metric(loss_value)
        
# @tf.function
def test_on_batch(x, y):
    """Test one batch of (x, y)"""
    # Compute loss.
    y_pred = model(x, training=False)
    loss_value = loss(y, y_pred)
    
    # Compute metrics. Metrics will accumulate values.
    for metric in metrics:
        metric(y, y_pred)
        
    # Record loss
    loss_metric(loss_value)
    
def train(train_data, epochs, validation_data=None):
    """Perform training loop."""
    for epoch in range(epochs):
        
        start = time.time()
        message = []
        loss_metric_message = []
        message.append(f'Epoch {epoch}/{epochs}')
        
        # Iterate through training dataset and
        # train model on each batch.
        for i, (x, y) in enumerate(train_data):
#             s = time.time()
            train_on_batch(x, y)
#             print(time.time()-s)
            
            
        # Obtain metric values after trained on all batches.
        train_metric_values = {metric.name: metric.result().numpy() for metric in metrics}
        train_loss_values = {'loss': loss_metric.result().numpy()}
        # Reset metric states at the end of each epoch.
        for metric in metrics:
            metric.reset_states()
        loss_metric.reset_states()
        
        loss_metric_message.append(' - '.join([f'{k}: {v:.4f}' for k,v in train_loss_values.items()]))
        loss_metric_message.append(' - '.join([f'{k}: {v:.4f}' for k,v in train_metric_values.items()]))
        
        if validation_data is not None:
            
            # Iterate through validation dataset and
            # train model on each batch.
            for i, (x, y) in enumerate(validation_data):
                test_on_batch(x, y)

            # Obtain metric values after trained on all batches.
            val_metric_values = {f'val_{metric.name}': metric.result().numpy() for metric in metrics}
            val_loss_values = {'val_loss': loss_metric.result().numpy()}
            # Reset metric states at the end of each epoch.
            for metric in metrics:
                metric.reset_states()
            loss_metric.reset_states()
                
            loss_metric_message.append(' - '.join([f'{k}: {v:.4f}' for k,v in val_loss_values.items()]))
            loss_metric_message.append(' - '.join([f'{k}: {v:.4f}' for k,v in val_metric_values.items()]))
        
        message.append(f'{(time.time() - start):.2f} sec')
        message += loss_metric_message
        message = ' - '.join(message)
        print(message)
        
# optimizer = tf.keras.optimizers.RMSprop(learning_rate=1e-3)
optimizer = tf.keras.optimizers.Adam()
loss = tf.keras.losses.SparseCategoricalCrossentropy()
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
loss_metric = tf.keras.metrics.Mean('loss')

model = CustomModel()

# train(train_dataset.take(30), epochs=5, validation_data=test_dataset.take(30))
train(train_dataset, epochs=5, validation_data=test_dataset)