<a href="https://colab.research.google.com/github/shenzhun/machine-learning-prep/blob/master/tensorflow/day22_three_ways_of_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import *

In [2]:
MAX_LEN = 300
BATCH_SIZE = 32

(x_train, y_train), (x_test, y_test) = datasets.reuters.load_data()
x_train = preprocessing.sequence.pad_sequences(x_train, maxlen=MAX_LEN)
x_test = preprocessing.sequence.pad_sequences(x_test, maxlen=MAX_LEN)

MAX_WORDS = x_train.max() + 1
CAT_NUM = y_train.max() + 1

ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)) \
          .shuffle(buffer_size=1000).batch(BATCH_SIZE) \
          .prefetch(tf.data.experimental.AUTOTUNE).cache()

ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test)) \
          .shuffle(buffer_size=1000).batch(BATCH_SIZE) \
          .prefetch(tf.data.experimental.AUTOTUNE).cache()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz


  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


1. Pre-defined fit method

In [4]:
tf.keras.backend.clear_session()

def create_model():
  model = models.Sequential()
  model.add(layers.Embedding(MAX_WORDS, 7, input_length=MAX_LEN))
  model.add(layers.Conv1D(filters=64, kernel_size=5, activation="relu"))
  model.add(layers.MaxPool1D(2))
  model.add(layers.Conv1D(filters=32, kernel_size=3, activation="relu"))
  model.add(layers.MaxPool1D(2))
  model.add(layers.Flatten())
  model.add(layers.Dense(CAT_NUM, activation="softmax"))
  return model

def compile_model(model):
  model.compile(optimizer=optimizers.Nadam(),
                loss=losses.SparseCategoricalCrossentropy(),
                metrics=[metrics.SparseCategoricalAccuracy(), metrics.SparseTopKCategoricalAccuracy(5)])
  return model

model = create_model()
model.summary()
model = compile_model(model)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 300, 7)            216874    
_________________________________________________________________
conv1d (Conv1D)              (None, 296, 64)           2304      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 148, 64)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 146, 32)           6176      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 73, 32)            0         
_________________________________________________________________
flatten (Flatten)            (None, 2336)              0         
_________________________________________________________________
dense (Dense)                (None, 46)                1

In [5]:
history = model.fit(ds_train, validation_data=ds_test, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


2. Pre-defined train_on_batch method

In [9]:
tf.keras.backend.clear_session()

def create_model():
  model = models.Sequential()

  model.add(layers.Embedding(MAX_WORDS, 7, input_length=MAX_LEN))
  model.add(layers.Conv1D(filters=64, kernel_size=5, activation="relu"))
  model.add(layers.MaxPool1D(2))
  model.add(layers.Conv1D(filters=32, kernel_size=3, activation="relu"))
  model.add(layers.MaxPool1D(2))
  model.add(layers.Flatten())
  model.add(layers.Dense(CAT_NUM, activation="softmax"))
  return model

def compile_model(model):
  model.compile(optimizer=optimizers.Nadam(),
                loss=losses.SparseCategoricalCrossentropy(),
                metrics=[metrics.SparseCategoricalAccuracy(),metrics.SparseTopKCategoricalAccuracy(5)])
  return model

model = create_model()
model.summary()
model = compile_model(model)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 300, 7)            216874    
_________________________________________________________________
conv1d (Conv1D)              (None, 296, 64)           2304      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 148, 64)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 146, 32)           6176      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 73, 32)            0         
_________________________________________________________________
flatten (Flatten)            (None, 2336)              0         
_________________________________________________________________
dense (Dense)                (None, 46)                1

In [10]:
def train_model(model, ds_train, ds_valid, epoches):
  for epoch in tf.range(1, epoches+1):
    model.reset_metrics()

    if epoch == 5:
      model.optimizer.lr.assign(model.optimizer.lr/2.0)
      tf.print("Lowering optimizer Learning Rate...\n\n")
    
    for x, y in ds_train:
      train_result = model.train_on_batch(x, y)
    
    for x, y in ds_valid:
      valid_result = model.test_on_batch(x, y, reset_metrics=False)
    
    if epoch % 1 == 0:
      tf.print("epoch = ", epoch)
      print("train:", dict(zip(model.metrics_names, train_result)))
      print("valid:", dict(zip(model.metrics_names, valid_result)))
      print("")


In [11]:
train_model(model, ds_train, ds_test, 10)

epoch =  1
train: {'loss': 1.448559045791626, 'sparse_categorical_accuracy': 0.6363636255264282, 'sparse_top_k_categorical_accuracy': 0.8181818127632141}
valid: {'loss': 1.6617642641067505, 'sparse_categorical_accuracy': 0.5672306418418884, 'sparse_top_k_categorical_accuracy': 0.7622439861297607}

epoch =  2
train: {'loss': 1.1990635395050049, 'sparse_categorical_accuracy': 0.6363636255264282, 'sparse_top_k_categorical_accuracy': 0.9090909361839294}
valid: {'loss': 1.5186762809753418, 'sparse_categorical_accuracy': 0.609082818031311, 'sparse_top_k_categorical_accuracy': 0.7983080744743347}

epoch =  3
train: {'loss': 0.9579431414604187, 'sparse_categorical_accuracy': 0.6818181872367859, 'sparse_top_k_categorical_accuracy': 1.0}
valid: {'loss': 1.560197353363037, 'sparse_categorical_accuracy': 0.6139804124832153, 'sparse_top_k_categorical_accuracy': 0.7996438145637512}

epoch =  4
train: {'loss': 0.6208112239837646, 'sparse_categorical_accuracy': 0.7727272510528564, 'sparse_top_k_catego

3. Customized Training Loop

In [12]:
tf.keras.backend.clear_session()

def create_model():
  model = models.Sequential()

  model.add(layers.Embedding(MAX_WORDS, 7, input_length=MAX_LEN))
  model.add(layers.Conv1D(filters=64, kernel_size=5, activation="relu"))
  model.add(layers.MaxPool1D(2))
  model.add(layers.Conv1D(filters=32, kernel_size=3, activation="relu"))
  model.add(layers.MaxPool1D(2))
  model.add(layers.Flatten())
  model.add(layers.Dense(CAT_NUM, activation="softmax"))
  return model

model = create_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 300, 7)            216874    
_________________________________________________________________
conv1d (Conv1D)              (None, 296, 64)           2304      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 148, 64)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 146, 32)           6176      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 73, 32)            0         
_________________________________________________________________
flatten (Flatten)            (None, 2336)              0         
_________________________________________________________________
dense (Dense)                (None, 46)                1

In [17]:
optimizer = optimizers.Nadam()
loss_func = losses.SparseCategoricalCrossentropy()

train_loss = metrics.Mean(name='train_loss')
train_metric = metrics.SparseCategoricalAccuracy(name='train_accuracy')

valid_loss = metrics.Mean(name='valid_loss')
valid_metric = metrics.SparseCategoricalAccuracy(name='valid_accuracy')

@tf.function
def train_step(model, features, labels):
  with tf.GradientTape() as tape:
    predications = model(features, training=True)
    loss = loss_func(labels, predications)
  
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss.update_state(loss)
  train_metric.update_state(labels, predications)

@tf.function
def valid_step(model, features, labels):
  predications = model(features)
  batch_loss = loss_func(labels, predications)
  valid_loss.update_state(batch_loss)
  valid_metric.update_state(labels, predications)

def train_model(model, ds_train, ds_valid, epochs):
  for epoch in tf.range(1, epochs+1):
    for features, labels in ds_train:
      train_step(model, features, labels)
    
    for features, labels in ds_valid:
      valid_step(model, features, labels)
    
    logs = 'Epoch={}, Loss:{}, Accuracy:{}, Valid Loss:{}, Valid Accuracy:{}'

    if epoch%1 ==0:
        tf.print(tf.strings.format(logs,
        (epoch,train_loss.result(),train_metric.result(),valid_loss.result(),valid_metric.result())))
        tf.print("")
    
    train_loss.reset_states()
    valid_loss.reset_states()
    train_metric.reset_states()
    valid_metric.reset_states()

train_model(model, ds_train, ds_test, 10)


Epoch=1, Loss:1.53299952, Accuracy:0.606101096, Valid Loss:1.53616142, Valid Accuracy:0.612644672

Epoch=2, Loss:1.26489651, Accuracy:0.66444, Valid Loss:1.51128232, Valid Accuracy:0.636687458

Epoch=3, Loss:1.00085247, Accuracy:0.73101759, Valid Loss:1.63053751, Valid Accuracy:0.645146906

Epoch=4, Loss:0.721742511, Accuracy:0.812402606, Valid Loss:1.83259153, Valid Accuracy:0.644701719

Epoch=5, Loss:0.510541916, Accuracy:0.872968137, Valid Loss:2.08863878, Valid Accuracy:0.643811226

Epoch=6, Loss:0.390402853, Accuracy:0.905922949, Valid Loss:2.27636456, Valid Accuracy:0.644701719

Epoch=7, Loss:0.31842044, Accuracy:0.92351371, Valid Loss:2.40933776, Valid Accuracy:0.642475486

Epoch=8, Loss:0.27190575, Accuracy:0.93509239, Valid Loss:2.53042626, Valid Accuracy:0.642030299

Epoch=9, Loss:0.239299655, Accuracy:0.941327095, Valid Loss:2.65123105, Valid Accuracy:0.634016037

Epoch=10, Loss:0.216556847, Accuracy:0.944778442, Valid Loss:2.7426393, Valid Accuracy:0.627337515

