In [0]:
!pip install tf-nightly-gpu-2.0-preview
import tensorflow as tf
print(tf.__version__)

In [0]:
# An example of simple Layer class, without best practices

class Linear(tf.keras.layers.Layer):
  """Implements y = w.x + b"""
  
  def __init__(self, units=32, input_dim=32):
    super(Linear, self).__init__()
    w_init = tf.random_normal_initializer()
    self.w = tf.Variable(
        initial_value = w_init(shape=(input_dim, units), dtype='float32'),
        trainable=True)
    b_init = tf.zeros_initializer()
    self.b = tf.Variable(
        initial_value = b_init(shape=(units, ), dtype='float32'),
        trainable=True)
    
  def call(self, inputs):
    return tf.matmul(inputs, self.w) + self.b

# Instantiate a Linear Layer
linear_layer = Linear(4, 2)

# Call it just like a python function
y = linear_layer(tf.ones((10, 2)))
assert y.shape == (10, 4)

# Weights are tracked under the `weights` property
assert linear_layer.weights == [linear_layer.w, linear_layer.b]

In [83]:
# Same example with best practices 
# 1) We use .add_weight() shortcut 
# 2) We use .build() which lazily gets called first time our layer is called
# which allows us to not have to know the the input dims at init time
class Linear(tf.keras.layers.Layer):
  """Implements y = w.x + b""" 
  
  def __init__(self, units=32):
    super(Linear, self).__init__()
    self.units = units
  
  def build(self, input_shape):
    print("**Building layer weigths**")
    self.w = self.add_weight(
        shape=(input_shape[-1], self.units),
        initializer='random_normal',
        trainable=True)
    self.b = self.add_weight(
        shape=(self.units,),
        initializer='random_normal',
        trainable=True)
  
  def call(self, inputs):
    return tf.matmul(inputs, self.w) + self.b

# Instantiate a Linear Layer
linear_layer = Linear(4)
# Call the layer (call() will invoke build() on first invocation)
y = linear_layer(tf.ones((3, 3)))

**Building layer weigths**


In [0]:
# Prepare a dataset
(x_train, y_train), _ = tf.keras.datasets.mnist.load_data()
dataset = tf.data.Dataset.from_tensor_slices(
    (x_train.reshape(60000, 784).astype('float32') / 255, y_train))
dataset = dataset.shuffle(buffer_size=1024).batch(64)

# Instantiate a Linear layer with 10 units
linear_layer = Linear(10)

# Instantiate a logistic loss function expecting logits before softmax layer
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Instantiate an optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)

# Iterate over batches of the dataset
for step, (x, y) in enumerate(dataset):
  
  # Open a GradientTape
  with tf.GradientTape() as tape:
    
    # Forward pass through our layer
    logits = linear_layer(x)
    
    # Calculate loss
    loss = loss_fn(y, logits)
    
    # Calculate gradients of loss wrt to trainable weights
    grads = tape.gradient(loss, linear_layer.trainable_weights)
    
  # Apply gradients
  optimizer.apply_gradients(zip(grads, linear_layer.trainable_weights))

  # Logging
  if step % 100 == 0:
    print(step, float(loss))

In [10]:
# We can reuse this layer to compose more complex objects

class MLP(tf.keras.layers.Layer):
  """Simple stack of linear layers."""
  
  def __init__(self):
    super(MLP, self).__init__()
    self.layer_1 = Linear(32)
    self.layer_2 = Linear(32)
    self.layer_3 = Linear(10)
    
  def call(self, inputs):
    x = self.layer_1(inputs)
    x = tf.nn.relu(x)
    x = self.layer_2(x)
    x = tf.nn.relu(x)
    return self.layer_3(x)
  
mlp = MLP()

# A call to mlp function which builds the network 
y = mlp(tf.ones((1, 6)))

# Weights are recursively tracked
assert len(mlp.trainable_weights) == 6

**Building layer weigths**
**Building layer weigths**
**Building layer weigths**


In [0]:
# Layers can create losses on their forward pass, great for regularization

class ActivityRegularization(tf.keras.layers.Layer):
  """Layer that creates an activity sparsity regularization loss."""
  
  def __init__(self, rate=1e-2):
    super(ActivityRegularization, self).__init__()
    self.rate = rate
    
  def call(self, inputs):
    # Add regularization loss based on the input
    self.add_loss(tf.reduce_sum(inputs))
    return inputs

class SparseMLP(tf.keras.layers.Layer):
  """Simple stack of linear layers with regularization"""
  
  def __init__(self):
    super(SparseMLP, self).__init__()
    self.linear_1 = Linear(32)
    self.regularization = ActivityRegularization()
    self.linear_2 = Linear(10)
  
  def call(self, inputs):
    x = self.linear_1(inputs)
    x = tf.nn.relu(x)
    x = self.regularization(x)
    return self.linear_2(x)

mlp = SparseMLP()
y = mlp(tf.ones((10, 10)))

# These are the losses of the last forward pass
print(mlp.losses)


In [0]:
# An example of using internal and external (to the network) losses to train
(x_train, y_train), _ = tf.keras.datasets.mnist.load_data()
dataset = tf.data.Dataset.from_tensor_slices(
    (x_train.reshape(60000, 784).astype('float32') / 255, y_train))
dataset = dataset.shuffle(buffer_size=1024).batch(32)

mlp = SparseMLP()

optimizer = tf.optimizers.SGD(learning_rate=1e-3)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

for step, (x, y) in enumerate(dataset):
  
  with tf.GradientTape() as tape:
    
    # Forward pass
    logits = mlp(x)
    
    # External loss added with regualirzation term
    loss = loss_fn(y, logits)
    loss += sum(mlp.losses)
    
    # Calculate gradient of weights wrt loss
    grads = tape.gradient(loss, mlp.trainable_weights)  
    
  # Apply gradients
  optimizer.apply_gradients(zip(grads, mlp.trainable_weights))
  
  if step % 100 == 0:
    print(step, float(loss))

In [0]:
# Let's use tf.function to wrap a python function in a graph
# Prepare our layer, loss, and optimizer.
mlp = MLP()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)

# Create a training step function.

@tf.function  # Make it fast.
def train_on_batch(x, y):
  with tf.GradientTape() as tape:
    logits = mlp(x)
    loss = loss_fn(y, logits)
    gradients = tape.gradient(loss, mlp.trainable_weights)
  optimizer.apply_gradients(zip(gradients, mlp.trainable_weights))
  return loss

# Prepare a dataset.
(x_train, y_train), _ = tf.keras.datasets.mnist.load_data()
dataset = tf.data.Dataset.from_tensor_slices(
    (x_train.reshape(60000, 784).astype('float32') / 255, y_train))
dataset = dataset.shuffle(buffer_size=1024).batch(64)
  
for step, (x, y) in enumerate(dataset):
  loss = train_on_batch(x, y)
  if step % 100 == 0:
    print(step, float(loss))

In [86]:
# Let's write Layers with functionality that differs in training vs. inference
class Dropout(tf.keras.layers.Layer):
  
  def __init__(self, rate):
    super(Dropout, self).__init__()
    self.rate = rate
  
  @tf.function
  def call(self, inputs, training=None):
    if training:
      return tf.nn.dropout(inputs, rate=self.rate)
    return inputs

class MLPWithDropout(tf.keras.layers.Layer):
  
  def __init__(self):
    super(MLPWithDropout, self).__init__()
    self.linear1 = Linear(32)
    self.dropout = Dropout(0.5)
    self.linear2 = Linear(10)
  
  def call(self, inputs, training=None):
    x = self.linear1(inputs)
    x = tf.nn.relu(x)
    x = self.dropout(x)
    return self.linear2(x)

mlp = MLPWithDropout()
y_train = mlp(tf.ones((10,10)), training=True)
y_test = mlp(tf.ones((10, 10)), training=False)

**Building layer weigths**
**Building layer weigths**


In [92]:
# `Input` object defines shape and dtype of input data, the deeplearning 
# equivalent of types in good-old programming languages
inputs = tf.keras.Input(shape=(16,))

# Calling layers on these "type" objects returns new types (new shapes/dtypes)
x = Linear(32)(inputs)
x = Dropout(0.5)(x)
outputs = Linear(10)(x)

# A functional model can be defined for any input/outputs and is a layer itself
model = tf.keras.Model(inputs, outputs)

# Because we gave it info about its inputs, it already has weights set
assert len(model.weights) == 4

# You can pass down the training arg which gets passed down to Dropout layer
y = model(tf.ones((6, 16)), training=True)
assert y.shape == (6, 10)

**Building layer weigths**
**Building layer weigths**
