In [1]:
%load_ext autoreload
%autoreload 2

### Register Custom Modules
In the GoogLeNet YAML file, two custom modules are needed: ***DepthConcat*** and ***Conv2d***. The ***DepthConcat*** module is used to concatenate the outputs of the Inception modules along the depth dimension, while the ***Conv2d*** module is a standard 2D convolutional layer with batch normalization and ReLU activation.

This highlights the idea that the user can choose to implement bottom level modules at a level of granularity that is appropriate for their application.

In [None]:
import torch
from model_composer import ModuleRegistry


@ModuleRegistry.register("DepthConcat")
class DepthConcat(torch.nn.Module):
    """Concatenate the inputs along the specified dimension."""

    def __init__(self, dim=1):
        super().__init__()
        self.dim = dim

    def forward(self, *inputs):
        return torch.cat(inputs, dim=self.dim)


@ModuleRegistry.register("Conv2d")
class Conv2d(torch.nn.Module):
    """A simple wrapper around torch.nn.Conv2d."""

    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super().__init__()
        self.conv2d = torch.nn.Conv2d(
            in_channels, out_channels, kernel_size, stride, padding
        )
        self.bn = torch.nn.BatchNorm2d(out_channels)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        return self.relu(self.bn(self.conv2d(x)))

### Build the Full Model

In [3]:
from model_composer import read_config, ComposableModel

cfg = read_config("example/yaml/GoogLeNet/model.yaml")
model = ComposableModel(**cfg)

Building module GoogLeNet.input
Building module GoogLeNet.convolution_1
Building module GoogLeNet.maxpool_1
Building module GoogLeNet.convolution_2_reduce
Building module GoogLeNet.convolution_2
Building module GoogLeNet.maxpool_2
Building module GoogLeNet.inception_3a
Building module inception_3a.input
Building module inception_3a.conv1x1_3x3reduce
Building module inception_3a.conv1x1_5x5reduce
Building module inception_3a.maxpool
Building module inception_3a.conv1x1
Building module inception_3a.conv3x3
Building module inception_3a.conv5x5
Building module inception_3a.conv1x1_pool_proj
Building module inception_3a.concat
Building module inception_3a.output
Building module GoogLeNet.inception_3b
Building module inception_3b.input
Building module inception_3b.conv1x1_3x3reduce
Building module inception_3b.conv1x1_5x5reduce
Building module inception_3b.maxpool
Building module inception_3b.conv1x1
Building module inception_3b.conv3x3
Building module inception_3b.conv5x5
Building module in

Examine the input source and output destination of the model:

In [4]:
model._inp_src

{'input': ['x'],
 'convolution_1': ['x'],
 'maxpool_1': ['convolution_1'],
 'convolution_2_reduce': ['maxpool_1'],
 'convolution_2': ['convolution_2_reduce'],
 'maxpool_2': ['convolution_2'],
 'inception_3a': ['maxpool_2'],
 'inception_3b': ['inception_3a'],
 'maxpool_3': ['inception_3b'],
 'inception_4a': ['maxpool_3'],
 'inception_4b': ['inception_4a'],
 'inception_4c': ['inception_4b'],
 'inception_4d': ['inception_4c'],
 'inception_4e': ['inception_4d'],
 'maxpool_4': ['inception_4e'],
 'inception_5a': ['maxpool_4'],
 'inception_5b': ['inception_5a'],
 'avgpool': ['inception_5b'],
 'dropout': ['avgpool'],
 'flatten': ['dropout'],
 'linear': ['flatten'],
 'softmax': ['linear'],
 'auxiliary_classifier_4a': ['inception_4a'],
 'auxiliary_classifier_4b': ['inception_4d'],
 'output': ['auxiliary_classifier_4a', 'auxiliary_classifier_4b', 'softmax']}

In [5]:
model._des

{'x': ['convolution_1.input.0'],
 'convolution_1': ['maxpool_1.input.0'],
 'maxpool_1': ['convolution_2_reduce.input.0'],
 'convolution_2_reduce': ['convolution_2.input.0'],
 'convolution_2': ['maxpool_2.input.0'],
 'maxpool_2': ['inception_3a.input.0'],
 'inception_3a': ['inception_3b.input.0'],
 'inception_3b': ['maxpool_3.input.0'],
 'maxpool_3': ['inception_4a.input.0'],
 'inception_4a': ['inception_4b.input.0', 'auxiliary_classifier_4a.input.0'],
 'inception_4b': ['inception_4c.input.0'],
 'inception_4c': ['inception_4d.input.0'],
 'inception_4d': ['inception_4e.input.0', 'auxiliary_classifier_4b.input.0'],
 'inception_4e': ['maxpool_4.input.0'],
 'maxpool_4': ['inception_5a.input.0'],
 'inception_5a': ['inception_5b.input.0'],
 'inception_5b': ['avgpool.input.0'],
 'avgpool': ['dropout.input.0'],
 'dropout': ['flatten.input.0'],
 'flatten': ['linear.input.0'],
 'linear': ['softmax.input.0']}

Verifying the output shape:

In [6]:
from loguru import logger
import torch

input = torch.randn(1, 3, 224, 224)
output = model(input, print_output_shape=True)
for k, out in enumerate(output):
    logger.info(f"Output {k} shape: {out.shape}")

[32m2025-05-11 09:21:03.327[0m | [1mINFO    [0m | [36mmodel_composer.composer[0m:[36mforward[0m:[36m228[0m - [1mModule convolution_1 output shape: [torch.Size([1, 64, 112, 112])][0m
[32m2025-05-11 09:21:03.334[0m | [1mINFO    [0m | [36mmodel_composer.composer[0m:[36mforward[0m:[36m228[0m - [1mModule maxpool_1 output shape: [torch.Size([1, 64, 56, 56])][0m
[32m2025-05-11 09:21:03.340[0m | [1mINFO    [0m | [36mmodel_composer.composer[0m:[36mforward[0m:[36m228[0m - [1mModule convolution_2_reduce output shape: [torch.Size([1, 64, 56, 56])][0m
[32m2025-05-11 09:21:03.350[0m | [1mINFO    [0m | [36mmodel_composer.composer[0m:[36mforward[0m:[36m228[0m - [1mModule convolution_2 output shape: [torch.Size([1, 192, 56, 56])][0m
[32m2025-05-11 09:21:03.355[0m | [1mINFO    [0m | [36mmodel_composer.composer[0m:[36mforward[0m:[36m228[0m - [1mModule maxpool_2 output shape: [torch.Size([1, 192, 28, 28])][0m
[32m2025-05-11 09:21:03.375[0m | [