# <font color="#418FDE" size="6.5" uppercase>**Modules and Layers**</font>

>Last update: 20260128.
    
By the end of this Lecture, you will be able to:
- Define custom neural network components by subclassing nn.Module and implementing forward methods. 
- Use common nn layers such as Linear, Conv2d, and Dropout to assemble simple models. 
- Inspect and manage model parameters, including initialization and parameter counting. 


## **1. Building Custom Modules**

### **1.1. Creating Custom Modules**

<img src="https://cdn.jsdelivr.net/gh/mhrafiei/contents@main/LFF/Master PyTorch 2.10.0/Module_02/Lecture_B/image_01_01.jpg?v=1769659961" width="250">



>* Custom modules bundle computations, parameters, and configuration
>* They enable reusable, understandable, testable neural components

>* Decide what parameters and layers to store
>* Define how inputs are transformed into outputs

>* Custom modules plug into training like built-ins
>* Internal design can change while interface stays



In [None]:
#@title Python Code - Creating Custom Modules

# This script shows creating simple custom modules.
# We use TensorFlow Keras layers to build modules.
# Focus is on subclassing and defining call methods.

# !pip install tensorflow.

# Import required standard libraries.
import os
import random
import numpy as np

# Import TensorFlow and Keras components.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Set deterministic seeds for reproducibility.
seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# Print TensorFlow version in one short line.
print("TensorFlow version:", tf.__version__)

# Create tiny synthetic input data for demonstration.
num_samples = 8
input_dim = 4
x_data = np.random.randn(num_samples, input_dim).astype("float32")

# Create tiny synthetic targets for regression.
y_data = np.random.randn(num_samples, 1).astype("float32")

# Define a custom dense block as a reusable module.
class DenseBlock(keras.Model):

    # Initialize internal layers and configuration.
    def __init__(self, units, activation="relu"):
        super().__init__()
        self.units = int(units)
        self.activation = activation
        self.linear = layers.Dense(self.units)
        self.dropout = layers.Dropout(0.1)

    # Define the forward computation for this module.
    def call(self, inputs, training=False):
        if inputs.shape[-1] != input_dim:
            raise ValueError("Unexpected input feature size.")
        x = self.linear(inputs)
        x = tf.nn.relu(x) if self.activation == "relu" else x
        x = self.dropout(x, training=training)
        return x

# Define a small custom model using the DenseBlock.
class SmallRegressor(keras.Model):
    # Initialize with one block and an output layer.
    def __init__(self, hidden_units):
        super().__init__()
        self.block = DenseBlock(hidden_units)
        self.out_layer = layers.Dense(1)

    # Implement the forward pass using submodules.
    def call(self, inputs, training=False):
        x = self.block(inputs, training=training)
        output = self.out_layer(x)
        return output

# Instantiate the custom model with chosen hidden size.
model = SmallRegressor(hidden_units=6)

# Build the model by calling once on sample data.
_ = model(x_data[:2])

# Compile the model with simple optimizer and loss.
model.compile(optimizer="adam", loss="mse")

# Train briefly with silent output for speed.
history = model.fit(x_data, y_data, epochs=5, verbose=0)

# Count total trainable parameters in the model.
param_count = model.count_params()

# Print a few informative lines about the custom modules.
print("Custom model trainable parameters:", param_count)
print("DenseBlock internal layers:", len(model.block.layers))
print("Example prediction shape:", model(x_data[:1]).shape)
print("Final training loss:", float(history.history["loss"][-1]))




### **1.2. Init and forward basics**

<img src="https://cdn.jsdelivr.net/gh/mhrafiei/contents@main/LFF/Master PyTorch 2.10.0/Module_02/Lecture_B/image_01_02.jpg?v=1769660011" width="250">



>* Initialization defines layers, parameters, and configuration
>* Forward reuses this setup to process inputs repeatedly

>* Forward pass defines operations transforming inputs to outputs
>* Must be clear, deterministic for debugging and gradients

>* Init builds the module; forward runs data
>* Separation improves reuse, clarity, and integration



In [None]:
#@title Python Code - Init and forward basics

# This script shows basic module initialization concepts.
# It uses TensorFlow to mimic PyTorch style modules.
# Focus on __init__ and call forward style methods.

# !pip install tensorflow==2.20.0.

# Import required standard libraries.
import os
import random
import numpy as np

# Import TensorFlow and Keras layers.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Set deterministic random seeds.
seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# Print TensorFlow version briefly.
print("TensorFlow version:", tf.__version__)

# Define a simple custom dense block class.
class SimpleDenseBlock(keras.Model):

    # Initialize layers and configuration here.
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.input_dim = int(input_dim)
        self.hidden_dim = int(hidden_dim)
        self.output_dim = int(output_dim)

        # Validate basic dimension arguments.
        if self.input_dim <= 0 or self.hidden_dim <= 0:
            raise ValueError("Dimensions must be positive integers")

        # Define internal dense layers once in __init__.
        self.hidden_layer = layers.Dense(
            units=self.hidden_dim,
            activation="relu",
            name="hidden_layer",
        )

        # Define output dense layer without activation.
        self.output_layer = layers.Dense(
            units=self.output_dim,
            activation=None,
            name="output_layer",
        )

    # Define the forward computation using call.
    def call(self, inputs, training=False):
        inputs = tf.convert_to_tensor(inputs)
        if inputs.shape[-1] != self.input_dim:
            raise ValueError("Last dimension must match input_dim")
        x = self.hidden_layer(inputs)
        outputs = self.output_layer(x)
        return outputs

# Create a small instance of the custom block.
model = SimpleDenseBlock(input_dim=4, hidden_dim=3, output_dim=2)

# Build the model by calling it once.
dummy_input = tf.zeros(shape=(1, 4), dtype=tf.float32)
_ = model(dummy_input, training=False)

# Print a short summary like parameter overview.
model.summary(expand_nested=False)

# Create a tiny batch of example data.
example_batch = tf.constant(
    [[1.0, 0.0, -1.0, 2.0], [0.5, 0.5, 0.5, 0.5]],
    dtype=tf.float32,
)

# Run the forward pass on the example batch.
outputs = model(example_batch, training=False)

# Print input and output shapes clearly.
print("Input shape:", example_batch.shape)
print("Output shape:", outputs.shape)

# Show the actual numeric outputs for inspection.
print("Model outputs:\n", outputs.numpy())




### **1.3. Mastering super in Modules**

<img src="https://cdn.jsdelivr.net/gh/mhrafiei/contents@main/LFF/Master PyTorch 2.10.0/Module_02/Lecture_B/image_01_03.jpg?v=1769660072" width="250">



>* super ensures standard module setup and registration
>* Misusing super breaks training, saving, and devices

>* super registers nested modules and parameters correctly
>* Ensures saving, moving, and freezing models works

>* Use super in forward to extend layers
>* Reuse proven behavior while adding custom logic



In [None]:
#@title Python Code - Mastering super in Modules

# This script shows mastering super in modules.
# We use TensorFlow to mimic PyTorch style modules.
# Focus on subclassing and calling parent constructors.

# !pip install tensorflow-2.20.0.

# Import required standard libraries.
import os
import random
import numpy as np

# Import TensorFlow and Keras layers.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Set deterministic random seeds.
seed_value = 42
random.seed(seed_value)

# Set numpy random seed.
np.random.seed(seed_value)

# Set tensorflow random seed.
tf.random.set_seed(seed_value)

# Print TensorFlow version briefly.
print("TensorFlow version:", tf.__version__)

# Define a simple custom dense like layer.
class MyDense(layers.Layer):
    # Initialize the custom layer correctly.
    def __init__(self, units, **kwargs):
        # Call parent constructor using super.
        super().__init__(**kwargs)
        # Store configuration for later use.
        self.units = int(units)

    # Build layer weights when input shape is known.
    def build(self, input_shape):
        # Validate input feature dimension.
        assert int(input_shape[-1]) > 0
        # Create kernel weight using add_weight.
        self.kernel = self.add_weight(
            shape=(int(input_shape[-1]), self.units),
            initializer="glorot_uniform",
            trainable=True,
            name="kernel",
        )
        
        # Create bias weight using add_weight.
        self.bias = self.add_weight(
            shape=(self.units,),
            initializer="zeros",
            trainable=True,
            name="bias",
        )
        
        # Call parent build to finalize.
        super().build(input_shape)

    # Define the forward computation call.
    def call(self, inputs, training=False):
        # Compute linear transformation manually.
        outputs = tf.linalg.matmul(inputs, self.kernel)
        
        # Add bias term to outputs.
        outputs = outputs + self.bias
        
        # Return activated outputs using relu.
        return tf.nn.relu(outputs)

# Define a custom block composed of layers.
class MyBlock(keras.Model):
    # Initialize block and internal layers.
    def __init__(self, units, dropout_rate=0.2):
        # Call parent constructor using super.
        super().__init__()
        # Create first dense layer using MyDense.
        self.dense_one = MyDense(units)
        
        # Create dropout layer for regularization.
        self.dropout = layers.Dropout(dropout_rate)
        
        # Create second dense layer using MyDense.
        self.dense_two = MyDense(units)

    # Define forward pass for the block.
    def call(self, inputs, training=False):
        # Pass inputs through first dense layer.
        x = self.dense_one(inputs, training=training)
        
        # Apply dropout only during training.
        x = self.dropout(x, training=training)
        
        # Pass through second dense layer.
        return self.dense_two(x, training=training)

# Create a small instance of the custom block.
model = MyBlock(units=4, dropout_rate=0.1)

# Create a tiny batch of dummy input data.
dummy_inputs = tf.ones(shape=(2, 3), dtype=tf.float32)

# Run a forward pass to build the model.
outputs = model(dummy_inputs, training=False)

# Validate output shape matches expectations.
assert outputs.shape == (2, 4)

# Count total trainable parameters safely.
param_count = np.sum([np.prod(v.shape) for v in model.trainable_variables])

# Print a short summary of key information.
print("Dummy input shape:", dummy_inputs.shape)
print("Output shape:", outputs.shape)
print("Trainable parameter count:", int(param_count))
print("Registered variable names:")
for v in model.trainable_variables:
    print(" ", v.name, "shape", v.shape)




## **2. Core Neural Layers**

### **2.1. Linear layers and activations**

<img src="https://cdn.jsdelivr.net/gh/mhrafiei/contents@main/LFF/Master PyTorch 2.10.0/Module_02/Lecture_B/image_02_01.jpg?v=1769660142" width="250">



>* Linear layers map input vectors to outputs
>* Weights and bias learn feature importance for tasks

>* Activation functions add nonlinearity beyond stacked linear layers
>* They enable complex features and decision boundaries hierarchies

>* Choose layer counts, sizes, and activations thoughtfully
>* Linear layers combine features; activations add complexity



In [None]:
#@title Python Code - Linear layers and activations

# This script shows linear layers and activations.
# It uses TensorFlow dense layers for clarity.
# Run cells to see shapes and simple outputs.

# !pip install tensorflow==2.20.0.

# Import required standard libraries.
import os
import random
import numpy as np

# Import TensorFlow and Keras layers.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Set deterministic random seeds.
seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# Print TensorFlow version briefly.
print("TensorFlow version:", tf.__version__)

# Create small dummy input features.
num_samples = 4
num_features = 3
x_input = np.array(
    [[0.2, 0.5, 0.1],
     [0.9, 0.1, 0.3],
     [0.4, 0.7, 0.8],
     [0.0, 0.2, 0.9]],
    dtype=np.float32,
)

# Validate input shape before building model.
assert x_input.shape == (num_samples, num_features)

# Build a simple model with linear layers.
model = keras.Sequential(
    [
        layers.Input(shape=(num_features,)),
        layers.Dense(5, activation=None),
        layers.Activation("relu"),
        layers.Dense(1, activation=None),
    ]
)

# Build model by calling it once so weights are created.
_ = model(x_input[:1], training=False)

# Show a short model summary safely.
model.summary(print_fn=lambda x: print(x))

# Get model output for dummy inputs.
outputs = model(x_input, training=False)

# Confirm output shape is as expected.
assert outputs.shape == (num_samples, 1)

# Print input and output to see transformation.
print("Input features shape:", x_input.shape)
print("Output predictions shape:", outputs.shape)

# Access first dense layer weights and biases.
first_dense = model.layers[0]
weights, biases = first_dense.get_weights()

# Print shapes of weights and biases.
print("First dense weights shape:", weights.shape)
print("First dense biases shape:", biases.shape)

# Count total trainable parameters manually.
param_count = np.sum([np.prod(v.shape) for v in model.trainable_variables])

# Print total parameter count clearly.
print("Total trainable parameters:", int(param_count))



### **2.2. Convolutions and Pooling**

<img src="https://cdn.jsdelivr.net/gh/mhrafiei/contents@main/LFF/Master PyTorch 2.10.0/Module_02/Lecture_B/image_02_02.jpg?v=1769660221" width="250">



>* Convolution layers scan locally to detect patterns
>* Stacked convolutions learn complex features across modalities

>* Pooling downsamples feature maps, keeping key information
>* Convolution plus pooling builds robust, compressed hierarchies

>* Stack conv, activation, pooling blocks for features
>* These blocks enable robust, simple models across domains



In [None]:
#@title Python Code - Convolutions and Pooling

# This script shows basic convolutions and pooling.
# We use TensorFlow to build tiny image layers.
# Focus is on shapes not on serious training.

# !pip install tensorflow==2.20.0.

# Import required standard libraries.
import os
import random
import numpy as np

# Import TensorFlow and Keras layers.
import tensorflow as tf
from tensorflow.keras import layers

# Set deterministic random seeds.
seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# Print TensorFlow version briefly.
print("TensorFlow version:", tf.__version__)

# Define image height and width.
img_height = 8
img_width = 8
img_channels = 1

# Create a tiny batch of random images.
batch_size = 2
images = np.random.rand(
    batch_size,
    img_height,
    img_width,
    img_channels,
).astype("float32")

# Validate the created image batch shape.
assert images.shape == (
    batch_size,
    img_height,
    img_width,
    img_channels,
)

# Build a simple convolution and pooling model.
model = tf.keras.Sequential([
    layers.Input(shape=(img_height,
                        img_width,
                        img_channels,)),
    layers.Conv2D(filters=4,
                  kernel_size=(3,
                               3,),
                  padding="same",
                  activation="relu"),
    layers.MaxPooling2D(pool_size=(2,
                                   2,)),
])

# Show a short model summary.
model.summary(print_fn=lambda x: print(x))

# Pass the images through the model.
conv_pooled = model(images)

# Validate output spatial dimensions.
expected_height = img_height // 2
expected_width = img_width // 2
assert conv_pooled.shape[1] == expected_height
assert conv_pooled.shape[2] == expected_width

# Print input and output shapes for clarity.
print("Input batch shape:", images.shape)
print("Output batch shape:", conv_pooled.shape)

# Inspect one pixel location across channels.
sample_tensor = conv_pooled[0,
                             0,
                             0]
print("One pooled location values:", sample_tensor.numpy())




### **2.3. Regularization with Dropout BatchNorm**

<img src="https://cdn.jsdelivr.net/gh/mhrafiei/contents@main/LFF/Master PyTorch 2.10.0/Module_02/Lecture_B/image_02_03.jpg?v=1769660269" width="250">



>* Dropout randomly disables neurons to reduce overfitting
>* Encourages robust, shared representations in dense layers

>* BatchNorm normalizes layer outputs within each batch
>* Stabilizes training, improves gradients, adds mild regularization

>* Place BatchNorm after layers, dropout near output
>* This combo builds stable, robust, generalizing models



In [None]:
#@title Python Code - Regularization with Dropout BatchNorm

# This script shows dropout and batchnorm together.
# We use TensorFlow to build a tiny model.
# Focus on regularization layers and simple outputs.

# !pip install tensorflow==2.20.0.

# Import required standard libraries.
import os
import random
import numpy as np

# Import TensorFlow and Keras layers.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Set deterministic random seeds.
seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# Print TensorFlow version briefly.
print("TensorFlow version:", tf.__version__)

# Create a small synthetic classification dataset.
num_samples = 256
num_features = 20
num_classes = 3

# Generate random input features.
X = np.random.randn(num_samples, num_features).astype("float32")

# Generate random integer labels.
y_int = np.random.randint(num_classes, size=num_samples)

# One hot encode labels for training.
y = keras.utils.to_categorical(y_int, num_classes=num_classes)

# Validate shapes before building the model.
assert X.shape == (num_samples, num_features)
assert y.shape == (num_samples, num_classes)

# Build a simple model without regularization.
inputs = keras.Input(shape=(num_features,))
x_plain = layers.Dense(32, activation="relu")(inputs)
outputs_plain = layers.Dense(num_classes, activation="softmax")(x_plain)
model_plain = keras.Model(inputs, outputs_plain, name="plain_model")

# Build a similar model with BatchNorm and Dropout.
inputs_reg = keras.Input(shape=(num_features,))
x_reg = layers.Dense(32, use_bias=False)(inputs_reg)
x_reg = layers.BatchNormalization()(x_reg)
x_reg = layers.Activation("relu")(x_reg)

# Apply dropout for regularization.
x_reg = layers.Dropout(0.5)(x_reg)
outputs_reg = layers.Dense(num_classes, activation="softmax")(x_reg)
model_reg = keras.Model(inputs_reg, outputs_reg, name="reg_model")

# Compile both models with same settings.
optimizer_plain = keras.optimizers.Adam(learning_rate=0.01)
optimizer_reg = keras.optimizers.Adam(learning_rate=0.01)
loss_fn = keras.losses.CategoricalCrossentropy()
metrics_list = ["accuracy"]

# Compile plain model.
model_plain.compile(optimizer=optimizer_plain, loss=loss_fn, metrics=metrics_list)

# Compile regularized model.
model_reg.compile(optimizer=optimizer_reg, loss=loss_fn, metrics=metrics_list)

# Train both models briefly and silently.
history_plain = model_plain.fit(
    X,
    y,
    epochs=10,
    batch_size=32,
    verbose=0,
)

# Train the regularized model.
history_reg = model_reg.fit(
    X,
    y,
    epochs=10,
    batch_size=32,
    verbose=0,
)

# Evaluate both models on the same data.
plain_loss, plain_acc = model_plain.evaluate(
    X,
    y,
    verbose=0,
)

# Evaluate regularized model.
reg_loss, reg_acc = model_reg.evaluate(
    X,
    y,
    verbose=0,
)

# Print a short comparison of results.
print("Plain model accuracy:", round(float(plain_acc), 3))
print("Reg model accuracy:", round(float(reg_acc), 3))
print("Plain model loss:", round(float(plain_loss), 3))
print("Reg model loss:", round(float(reg_loss), 3))

# Show how dropout behaves in training mode.
sample_batch = X[:3]
plain_pred = model_plain(sample_batch, training=False)
reg_pred_train = model_reg(sample_batch, training=True)
reg_pred_eval = model_reg(sample_batch, training=False)

# Print a few prediction summaries.
print("Plain predictions sum row0:", float(tf.reduce_sum(plain_pred[0])))
print("Reg train predictions sum row0:", float(tf.reduce_sum(reg_pred_train[0])))
print("Reg eval predictions sum row0:", float(tf.reduce_sum(reg_pred_eval[0])))



## **3. Managing Model Parameters**

### **3.1. Iterating Model Parameters**

<img src="https://cdn.jsdelivr.net/gh/mhrafiei/contents@main/LFF/Master PyTorch 2.10.0/Module_02/Lecture_B/image_03_01.jpg?v=1769660353" width="250">



>* Systematically step through every model weight and bias
>* Use parameter iteration for insight, debugging, customization

>* Parameter names and shapes reveal model structure
>* This info aids debugging, design checks, and counting

>* Use iteration to freeze or tune layers
>* Group parameters for custom training, monitoring, protection



In [None]:
#@title Python Code - Iterating Model Parameters

# This script shows how to inspect model parameters.
# We use TensorFlow Keras layers to build a model.
# Focus on iterating parameters and counting them.

# !pip install tensorflow==2.20.0.

# Import required TensorFlow and NumPy modules.
import tensorflow as tf
import numpy as np

# Set a deterministic random seed for reproducibility.
tf.random.set_seed(42)

# Print TensorFlow version in one concise line.
print("TensorFlow version:", tf.__version__)

# Define a simple sequential model with common layers.
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(16,)),
    tf.keras.layers.Dense(8, activation="relu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(4, activation="relu"),
    tf.keras.layers.Dense(2, activation="softmax"),
])

# Build model by calling it once on dummy data.
dummy_input = tf.zeros((1, 16), dtype=tf.float32)

# Run a forward pass to ensure weights are created.
_ = model(dummy_input)

# Prepare containers for parameter statistics.
layer_param_counts = []

# Iterate over each layer and its trainable variables.
for layer in model.layers:
    params = layer.trainable_variables
    layer_count = 0
    for var in params:
        shape = var.shape
        count = np.prod(shape)
        layer_count += int(count)
    layer_param_counts.append((layer.name, layer_count))

# Compute total number of trainable parameters.
total_params = sum(count for _, count in layer_param_counts)

# Print a clear header for parameter listing.
print("\nTrainable parameters by layer:")

# Print each layer name and its parameter count.
for name, count in layer_param_counts:
    print(f"Layer {name:15s} -> {count:4d} params")

# Print the total number of trainable parameters.
print("\nTotal trainable parameters:", total_params)

# Show an example of accessing a specific parameter.
first_layer = model.layers[1]

# Safely access the first trainable variable of this layer.
if first_layer.trainable_variables:
    first_weight = first_layer.trainable_variables[0]
    print("\nFirst layer weight shape:", first_weight.shape)

# Confirm that model output shape matches expectation.
print("Model output shape on dummy input:", _.shape)




### **3.2. Initializing Model Parameters**

<img src="https://cdn.jsdelivr.net/gh/mhrafiei/contents@main/LFF/Master PyTorch 2.10.0/Module_02/Lecture_B/image_03_02.jpg?v=1769660407" width="250">



>* Initialization strongly affects learning speed and quality
>* Control weight initialization to stabilize and improve training

>* Default inits keep activations and gradients stable
>* Complex architectures may need custom initialization choices

>* Use custom initialization to encode task knowledge
>* Initialization choices strongly influence training stability, performance



In [None]:
#@title Python Code - Initializing Model Parameters

# This script shows parameter initialization basics.
# We use TensorFlow dense layers for clarity.
# Focus on inspecting and customizing layer weights.

# !pip install tensorflow.

# Import required standard libraries.
import os
import random
import numpy as np

# Import TensorFlow and Keras layers.
import tensorflow as tf
from tensorflow import keras

# Print TensorFlow version for reproducibility.
print("TensorFlow version:", tf.__version__)

# Set deterministic random seeds for stability.
seed_value = 42
random.seed(seed_value)

# Set NumPy and TensorFlow seeds also.
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# Define a simple dense model building function.

def build_model(initializer):
    # Create a small sequential model.
    model = keras.Sequential(
        [
            keras.layers.Input(shape=(4,)),
            keras.layers.Dense(
                3,
                activation="relu",
                kernel_initializer=initializer,
                bias_initializer="zeros",
            ),
        ]
    )

    # Return the constructed model.
    return model

# Choose two different kernel initializers.
init_random_normal = keras.initializers.RandomNormal(
    mean=0.0,
    stddev=0.05,
)

# Define a glorot uniform initializer.
init_glorot_uniform = keras.initializers.GlorotUniform()

# Build two models with different initializations.
model_normal = build_model(init_random_normal)
model_glorot = build_model(init_glorot_uniform)

# Get weights from the first dense layer of each.
weights_normal, biases_normal = model_normal.layers[0].get_weights()

# Extract weights and biases for second model.
weights_glorot, biases_glorot = model_glorot.layers[0].get_weights()

# Validate shapes to avoid unexpected issues.
assert weights_normal.shape == (4, 3)
assert weights_glorot.shape == (4, 3)

# Compute simple statistics for comparison.
mean_normal = float(weights_normal.mean())
std_normal = float(weights_normal.std())

# Compute statistics for glorot initialized weights.
mean_glorot = float(weights_glorot.mean())
std_glorot = float(weights_glorot.std())

# Print a short header for clarity.
print("\nDense layer kernel statistics by initializer:")

# Show statistics for random normal initializer.
print("RandomNormal -> mean:", round(mean_normal, 5), "std:", round(std_normal, 5))

# Show statistics for glorot uniform initializer.
print("GlorotUniform -> mean:", round(mean_glorot, 5), "std:", round(std_glorot, 5))

# Print a few sample weights from each initializer.
print("\nFirst row weights (RandomNormal):", weights_normal[0])
print("First row weights (GlorotUniform):", weights_glorot[0])

# Confirm bias initialization behavior explicitly.
print("\nBias vector (RandomNormal):", biases_normal)
print("Bias vector (GlorotUniform):", biases_glorot)




### **3.3. Counting model size**

<img src="https://cdn.jsdelivr.net/gh/mhrafiei/contents@main/LFF/Master PyTorch 2.10.0/Module_02/Lecture_B/image_03_03.jpg?v=1769660467" width="250">



>* Model size means counting all scalar parameters
>* Parameter count affects memory, speed, and deployment

>* Different layers contribute parameters in specific ways
>* Sum all parameter tensors to estimate model size

>* Relate parameter counts to hardware and goals
>* Use counts to compare, control, and refine models



In [None]:
#@title Python Code - Counting model size

# This script explores counting model parameters.
# It uses TensorFlow to build simple models.
# Focus on understanding model size and parameters.

# !pip install tensorflow==2.20.0.

# Import required standard libraries.
import os
import random
import numpy as np

# Import TensorFlow and Keras layers.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Set deterministic random seeds.
seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# Print TensorFlow version briefly.
print("TensorFlow version:", tf.__version__)

# Define a helper to count parameters.
def count_parameters(model):
    trainable = np.sum([
        np.prod(v.shape) for v in model.trainable_weights
    ])
    non_trainable = np.sum([
        np.prod(v.shape) for v in model.non_trainable_weights
    ])
    return int(trainable), int(non_trainable)

# Build a small dense model.
small_model = keras.Sequential([
    layers.Input(shape=(16,)),
    layers.Dense(8, activation="relu"),
    layers.Dense(4, activation="softmax"),
])

# Build a slightly larger dense model.
large_model = keras.Sequential([
    layers.Input(shape=(16,)),
    layers.Dense(32, activation="relu"),
    layers.Dense(32, activation="relu"),
    layers.Dense(4, activation="softmax"),
])

# Initialize models by running a dummy forward pass.
dummy_input = np.zeros((1, 16), dtype=np.float32)
_ = small_model(dummy_input)
_ = large_model(dummy_input)

# Count parameters for the small model.
small_trainable, small_non_trainable = count_parameters(small_model)

# Count parameters for the large model.
large_trainable, large_non_trainable = count_parameters(large_model)

# Print parameter counts for both models.
print("Small model trainable parameters:", small_trainable)
print("Small model non_trainable parameters:", small_non_trainable)
print("Large model trainable parameters:", large_trainable)
print("Large model non_trainable parameters:", large_non_trainable)

# Show how many times larger the big model is.
ratio = large_trainable / max(small_trainable, 1)
print("Large model has", round(ratio, 2), "times more parameters.")




# <font color="#418FDE" size="6.5" uppercase>**Modules and Layers**</font>


In this lecture, you learned to:
- Define custom neural network components by subclassing nn.Module and implementing forward methods. 
- Use common nn layers such as Linear, Conv2d, and Dropout to assemble simple models. 
- Inspect and manage model parameters, including initialization and parameter counting. 

In the next Module (Module 3), we will go over 'Training Workflow'