# Initializers

In [3]:
import tensorflow as tf

dense_glorot_initializer = tf.keras.layers.Dense(50, activation="relu") # Glorot is the default
dense_he_initializer = tf.keras.layers.Dense(50, activation="relu", kernel_initializer="he_normal")

# Custom initializer (adjusting scale, fan mode and distribution)
he_avg_init = tf.keras.initializers.VarianceScaling(scale=2.0, mode="fan_avg", distribution="uniform")
dense_custom_he_avg = tf.keras.layers.Dense(50, activation="sigmoid", kernel_initializer=he_avg_init)

# Better activation functions

In [11]:
# Leaky ReLU
leaky_relu = tf.keras.layers.LeakyReLU(alpha=0.2)
dense = tf.keras.layers.Dense(50, activation=leaky_relu, kernel_initializer="he_normal")

# It could be added as a separate layer. In this case, the layer is left with no activation function and the
# activation function goes as a layer.
model = tf.keras.models.Sequential([
    # more layers
    tf.keras.layers.Dense(50, kernel_initializer="he_normal"), # no activation
    tf.keras.layers.LeakyReLU(alpha=0.2), # activation as a separate layer
    # more layers
])

# Batch normalization

In [12]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(300, activation="relu", kernel_initializer="he_normal"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation="softmax")
])

  super().__init__(**kwargs)
I0000 00:00:1760227354.694376    7647 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3482 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1050, pci bus id: 0000:01:00.0, compute capability: 6.1


In [13]:
model.summary()

In [15]:
[(var.name, var.trainable) for var in model.layers[1].variables]

[('gamma', True),
 ('beta', True),
 ('moving_mean', False),
 ('moving_variance', False)]

In [16]:
# Batch normalization before the activation function
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.Dense(300, kernel_initializer="he_normal", use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.Dense(100, kernel_initializer="he_normal", use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.Dense(10, activation="softmax"),
])

In [17]:
model.summary()

# Faster optimizers

In [21]:
# Momentum
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9)

In [22]:
# Nesterov Accelerated Gradient (NAG)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9, nesterov=True)

In [None]:
# RMSProp