<a href="https://colab.research.google.com/github/mdzikrim/Hands-on_DL/blob/main/Chapter_12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import tensorflow as tf
from tensorflow.keras import layers

class CustomLayerNorm(layers.Layer):
    def build(self, input_shape):
        self.alpha = self.add_weight(name="alpha", shape=[input_shape[-1]], initializer="ones", trainable=True)
        self.beta = self.add_weight(name="beta", shape=[input_shape[-1]], initializer="zeros", trainable=True)

    def call(self, inputs):
        mean, var = tf.nn.moments(inputs, axes=-1, keepdims=True)
        epsilon = 1e-5
        norm = (inputs - mean) / tf.sqrt(var + epsilon)
        return self.alpha * norm + self.beta

In [24]:
from tensorflow.keras.layers import LayerNormalization
import numpy as np

# Test layer
sample = tf.constant([[1., 2., 3.], [4., 5., 6.]])

custom_ln = CustomLayerNorm()
keras_ln = LayerNormalization()

out1 = custom_ln(sample)
out2 = keras_ln(sample)

print("Custom LayerNorm Output:\n", out1.numpy())
print("Keras LayerNorm Output:\n", out2.numpy())


Custom LayerNorm Output:
 [[-1.2247356  0.         1.2247356]
 [-1.2247356  0.         1.2247356]]
Keras LayerNorm Output:
 [[-1.2238274  0.         1.2238274]
 [-1.2238274  0.         1.2238274]]


In [25]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_train, X_valid = X_train_full[:50000], X_train_full[50000:]
y_train, y_valid = y_train_full[:50000], y_train_full[50000:]

X_train = X_train[..., tf.newaxis]
X_valid = X_valid[..., tf.newaxis]
X_test = X_test[..., tf.newaxis]


In [26]:
class MyModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = layers.Flatten()
        self.d1 = layers.Dense(256, activation="relu")
        self.d2 = layers.Dense(10)

    def call(self, x):
        x = self.flatten(x)
        x = self.d1(x)
        return self.d2(x)


In [27]:
model = MyModel()
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(1000).batch(32)
val_ds = tf.data.Dataset.from_tensor_slices((X_valid, y_valid)).batch(32)

train_loss = tf.keras.metrics.Mean()
train_acc = tf.keras.metrics.SparseCategoricalAccuracy()

val_acc = tf.keras.metrics.SparseCategoricalAccuracy()

EPOCHS = 5
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}")
    for X_batch, y_batch in train_ds:
        with tf.GradientTape() as tape:
            logits = model(X_batch)
            loss = loss_fn(y_batch, logits)
        grads = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        train_loss(loss)
        train_acc(y_batch, logits)

    for X_batch, y_batch in val_ds:
        val_logits = model(X_batch)
        val_acc.update_state(y_batch, val_logits)

    print(f"Train loss: {train_loss.result():.4f} | Train acc: {train_acc.result():.4f} | Val acc: {val_acc.result():.4f}")
    train_loss.reset_state()
    train_acc.reset_state()
    val_acc.reset_state()



Epoch 1
Train loss: 0.4980 | Train acc: 0.8236 | Val acc: 0.8282

Epoch 2
Train loss: 0.3751 | Train acc: 0.8640 | Val acc: 0.8681

Epoch 3
Train loss: 0.3352 | Train acc: 0.8778 | Val acc: 0.8744

Epoch 4
Train loss: 0.3081 | Train acc: 0.8869 | Val acc: 0.8777

Epoch 5
Train loss: 0.2869 | Train acc: 0.8926 | Val acc: 0.8812


In [28]:
# Scheduler: low LR for early layers, high for last
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9)
model = MyModel()

@tf.function
def train_step(X_batch, y_batch):
    with tf.GradientTape() as tape:
        logits = model(X_batch)
        loss = loss_fn(y_batch, logits)
    grads = tape.gradient(loss, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    return loss

# Reuse train_ds from earlier
for epoch in range(3):
    print(f"\nEpoch {epoch+1}")
    for step, (X_batch, y_batch) in enumerate(train_ds):
        loss = train_step(X_batch, y_batch)
        if step % 200 == 0:
            print(f"Step {step}: Loss = {loss:.4f}")



Epoch 1
Step 0: Loss = 2.4138
Step 200: Loss = 0.6160
Step 400: Loss = 0.6867
Step 600: Loss = 0.6339
Step 800: Loss = 0.4408
Step 1000: Loss = 0.5159
Step 1200: Loss = 0.5663
Step 1400: Loss = 0.2888

Epoch 2
Step 0: Loss = 0.4457
Step 200: Loss = 0.6214
Step 400: Loss = 0.5594
Step 600: Loss = 0.3266
Step 800: Loss = 0.4707
Step 1000: Loss = 0.3948
Step 1200: Loss = 0.2817
Step 1400: Loss = 0.4058

Epoch 3
Step 0: Loss = 0.5149
Step 200: Loss = 0.3599
Step 400: Loss = 0.4600
Step 600: Loss = 0.3892
Step 800: Loss = 0.2241
Step 1000: Loss = 0.4088
Step 1200: Loss = 0.2698
Step 1400: Loss = 0.2415
