<a href="https://colab.research.google.com/github/rallm/IUST-DL-Fall2025/blob/main/HW4/helper/hw4_p1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import math
import pandas as pd

def calculate_layer_metrics(layers_config, input_size=(128, 128, 3)):
    results = []

    # Initial states
    h, w, c_in = input_size
    current_rf = 1  # Receptive Field starts at 1
    cum_stride = 1  # Cumulative Stride (J)

    total_params = 0
    total_flops = 0

    print(f"Input Image Size: {h}x{w}x{c_in}")
    print("-" * 60)

    for layer in layers_config:
        name = layer['name']
        l_type = layer['type']

        # Default values
        k = layer.get('k', 1)
        s = layer.get('s', 1)
        p = layer.get('p', 0)
        d = layer.get('d', 1)
        c_out = layer.get('c_out', c_in) # If not specified, c_out = c_in (e.g., pooling)

        # Calculate Effective Kernel (for dilation)
        k_eff = k + (k - 1) * (d - 1)

        # --- 1. Output Dimensions ---
        if l_type in ['conv', 'pool']:
            h_out = math.floor((h - k_eff + 2 * p) / s) + 1
            w_out = math.floor((w - k_eff + 2 * p) / s) + 1
        elif l_type == 'linear':
            h_out, w_out = 1, 1 # Flattened
            # c_out is already set in config
        elif l_type == 'flatten':
            h_out, w_out = 1, 1
            c_out = h * w * c_in

        # --- 2. Parameters ---
        params = 0
        if l_type == 'conv':
            # Weights: k*k*c_in*c_out, Bias: c_out
            params = (k * k * c_in * c_out) + c_out
        elif l_type == 'linear':
            # Weights: c_in*c_out, Bias: c_out
            params = (c_in * c_out) + c_out
        # Pooling and Flatten have 0 params

        # --- 3. FLOPs (Approximation: 2 * MACs) ---
        flops = 0
        if l_type == 'conv':
            # 2 * H_out * W_out * (K * K * C_in) * C_out
            flops = 2 * h_out * w_out * (k * k * c_in) * c_out
        elif l_type == 'linear':
            # 2 * Input * Output
            flops = 2 * c_in * c_out
        elif l_type == 'pool':
            # H_out * W_out * C_out * K * K (comparisons/additions)
            flops = h_out * w_out * c_out * k * k

        # --- 4. Receptive Field (RF) ---
        # RF formula: RF_new = RF_old + (K_eff - 1) * cumulative_stride
        if l_type in ['conv', 'pool']:
            current_rf = current_rf + (k_eff - 1) * cum_stride
            cum_stride *= s

        # Store results
        results.append({
            'Layer': name,
            'Input': f"{h}x{w}x{c_in}" if l_type != 'linear' else f"{c_in}",
            'Output Shape': f"{h_out}x{w_out}x{c_out}" if l_type != 'linear' else f"{c_out}",
            'Params': params,
            'FLOPs (M)': flops / 1e6, # Convert to Millions
            'RF': current_rf if l_type in ['conv', 'pool'] else "-"
        })

        # Update totals and next layer inputs
        total_params += params
        total_flops += flops

        if l_type == 'flatten':
            c_in = c_out
            h, w = 1, 1
        else:
            c_in = c_out
            h, w = h_out, w_out

    # Create DataFrame
    df = pd.DataFrame(results)

    # Display totals
    print(f"Total Parameters: {total_params:,}")
    print(f"Total FLOPs: {total_flops / 1e6:.2f} Million")

    return df

In [2]:
# --- Definition of the Network Architecture ---
# Based on the assignment description
architecture = [
    # Layer 1: Conv 7x7, s=1
    {'name': 'L1: Conv2d', 'type': 'conv', 'k': 7, 's': 1, 'p': 0, 'c_out': 32},

    # Layer 2: Conv 5x5, s=2
    {'name': 'L2: Conv2d', 'type': 'conv', 'k': 5, 's': 2, 'p': 0, 'c_out': 64},

    # Layer 3: MaxPool 2x2, s=2
    {'name': 'L3: MaxPool', 'type': 'pool', 'k': 2, 's': 2},

    # Layer 4: Conv 3x3, dilation=2 -> k_eff=5
    {'name': 'L4: Conv2d (d=2)', 'type': 'conv', 'k': 3, 's': 1, 'p': 0, 'd': 2, 'c_out': 128},

    # Layer 5: Conv 3x3, s=1
    {'name': 'L5: Conv2d', 'type': 'conv', 'k': 3, 's': 1, 'p': 0, 'c_out': 128},

    # Layer 6: MaxPool 2x2, s=2
    {'name': 'L6: MaxPool', 'type': 'pool', 'k': 2, 's': 2},

    # Layer 7: Conv 3x3, s=1
    {'name': 'L7: Conv2d', 'type': 'conv', 'k': 3, 's': 1, 'p': 0, 'c_out': 256},

    # Layer 8: AvgPool 2x2, s=2
    {'name': 'L8: AvgPool', 'type': 'pool', 'k': 2, 's': 2},

    # Flatten
    {'name': 'Flatten', 'type': 'flatten'},

    # Layer 9 (Part 1): Linear(N, 1024)
    {'name': 'L9_1: Linear', 'type': 'linear', 'c_out': 1024},

    # Layer 9 (Part 2): Linear(1024, 1024) - "Two consecutive linear layers"
    {'name': 'L9_2: Linear', 'type': 'linear', 'c_out': 1024},

    # Layer 12: Linear(1024, 10) - (Layer 11 is Dropout, ignored for calcs)
    {'name': 'L12: Linear', 'type': 'linear', 'c_out': 10}
]

# Run the calculation
df_results = calculate_layer_metrics(architecture)

Input Image Size: 128x128x3
------------------------------------------------------------
Total Parameters: 5,827,786
Total FLOPs: 803.23 Million


In [3]:
display(df_results)

Unnamed: 0,Layer,Input,Output Shape,Params,FLOPs (M),RF
0,L1: Conv2d,128x128x3,122x122x32,4736,140.028672,7
1,L2: Conv2d,122x122x32,59x59x64,51264,356.4544,11
2,L3: MaxPool,59x59x64,29x29x64,0,0.215296,13
3,L4: Conv2d (d=2),29x29x64,25x25x128,73856,92.16,29
4,L5: Conv2d,25x25x128,23x23x128,147584,156.008448,37
5,L6: MaxPool,23x23x128,11x11x128,0,0.061952,41
6,L7: Conv2d,11x11x128,9x9x256,295168,47.775744,57
7,L8: AvgPool,9x9x256,4x4x256,0,0.016384,65
8,Flatten,4x4x256,1x1x4096,0,0.0,-
9,L9_1: Linear,4096,1024,4195328,8.388608,-
