In [2]:
import os
import sys

sys.path.append(os.path.abspath("./../src"))

In [3]:
from models.moat import MOAT
from models.swin_transformer import SwinTransformer
from models.coatnet import MyCoAtNet
from torchinfo import summary
from fvcore.nn import FlopCountAnalysis, flop_count_table
import torch
from ptflops import get_model_complexity_info


def analyze_model(model, input_size=(1, 1, 64, 64)):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    model = model.to(device)

    print("Model Summary:")
    print(summary(model, input_size=input_size, device=device))

    print("\nFLOPs:")
    input_tensor = torch.randn(*input_size).to(device)
    if model._get_name() == "MOAT":
        model.eval()
        with torch.no_grad():
            macs, params = get_model_complexity_info(
                model,
                (1, 64, 64),
                as_strings=False,
                print_per_layer_stat=False,
                verbose=False,
            )
        print(f"FLOPs: {2*macs}")
    else:
        flops = FlopCountAnalysis(model, input_tensor)
        print(flop_count_table(flops))
    print("=" * 50)

  from .autonotebook import tqdm as notebook_tqdm


## Swin Transformer

In [None]:
architectures = {
    "swin_small": SwinTransformer(
        num_classes=36,
        patch_size=4,
        embed_dim=96,
        depths=[2, 3, 2],
        num_heads=[3, 8, 12],
        window_size=4,
        mlp_ratio=4.0,
        qkv_bias=True,
        ape=False,
        # patch_norm=True,
    ),
    "swin_smaller": SwinTransformer(
        num_classes=36,
        patch_size=4,
        embed_dim=48,
        depths=[2, 2, 2],
        num_heads=[3, 6, 8],
        window_size=2,
        mlp_ratio=4.0,
        qkv_bias=True,
        ape=False,
        # patch_norm=True,
    ),
    "swin_default": SwinTransformer(
        num_classes=36,
        patch_size=4,
        embed_dim=96,
        depths=[2, 2, 6, 2],
        num_heads=[3, 6, 12, 24],
        window_size=4,
        mlp_ratio=4.0,
        qkv_bias=True,
        ape=False,
        # patch_norm=True,
    ),
    "swin_big": SwinTransformer(
        num_classes=36,
        patch_size=4,
        embed_dim=128,
        depths=[2, 4, 18, 2],
        num_heads=[4, 8, 16, 32],
        window_size=8,
        mlp_ratio=4.0,
        qkv_bias=True,
        ape=False,
        # patch_norm=True,
    ),
    "swin_bigger": SwinTransformer(
        num_classes=36,
        patch_size=4,
        embed_dim=192,
        depths=[2, 4, 12, 2],
        num_heads=[4, 8, 16, 24],
        window_size=16,
        mlp_ratio=4.0,
        qkv_bias=True,
        ape=False,
        # patch_norm=True,
    ),
}

for name, model in architectures.items():
    print(f"Analyzing {name}...")
    analyze_model(model)

Analyzing swin_small...
Using device: cpu
Model Summary:
Layer (type:depth-idx)                             Output Shape              Param #
SwinTransformer                                    [1, 36]                   --
├─PatchEmbed: 1-1                                  [1, 256, 96]              --
│    └─Conv2d: 2-1                                 [1, 96, 16, 16]           1,632
│    └─LayerNorm: 2-2                              [1, 256, 96]              192
├─Dropout: 1-2                                     [1, 256, 96]              --
├─ModuleList: 1-3                                  --                        --
│    └─BasicLayer: 2-3                             [1, 64, 192]              --
│    │    └─ModuleList: 3-1                        --                        229,638
│    │    └─PatchMerging: 3-2                      [1, 64, 192]              74,112
│    └─BasicLayer: 2-4                             [1, 16, 384]              --
│    │    └─ModuleList: 3-3                  

## CoAtNet

In [4]:
coatnet_configs = {
    "coatnet_first": MyCoAtNet(
        num_classes=36,
        nums_blocks=[2, 2, 3, 2, 2],
        layer_out_channels=[64, 96, 192, 384, 768],
    ),
    # "coatnet_second": MyCoAtNet(
    #     num_classes=36,
    #     nums_blocks=[2, 3, 4, 5, 2],
    #     layer_out_channels=[64, 128, 256, 512, 1024],
    # ),
    # "coatnet_third": MyCoAtNet(
    #     num_classes=36,
    #     nums_blocks=[3, 4, 6, 8, 3],
    #     layer_out_channels=[64, 128, 256, 512, 1024],
    # ),
    # "coatnet_fourth": MyCoAtNet(
    #     num_classes=36,
    #     nums_blocks=[2, 2, 4, 4, 2],
    #     layer_out_channels=[64, 96, 192, 384, 768],
    # ),
    "coatnet_default": MyCoAtNet(
        num_classes=36,
        nums_blocks=[2, 2, 3, 5, 2],
        layer_out_channels=[64, 96, 192, 384, 768],
    ),
    "coatnet_bigger": MyCoAtNet(
        num_classes=36,
        nums_blocks=[4, 6, 8, 10, 2],
        layer_out_channels=[64, 192, 384, 512, 1024],
    ),
    "coatnet_bigger2": MyCoAtNet(
        num_classes=36,
        nums_blocks=[3, 5, 7, 9, 3],
        layer_out_channels=[192, 256, 512, 768, 1024],
    ),
}

for name, model in coatnet_configs.items():
    print(f"Analyzing {name}...")
    analyze_model(model)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Analyzing coatnet_first...
Using device: cpu
Model Summary:
Layer (type:depth-idx)                                  Output Shape              Param #
MyCoAtNet                                               [36]                      --
├─Sequential: 1-1                                       [1, 64, 30, 30]           --
│    └─Stem: 2-1                                        [1, 64, 30, 30]           --
│    │    └─Conv2d: 3-1                                 [1, 64, 32, 32]           640
│    │    └─BatchNorm2d: 3-2                            [1, 64, 32, 32]           128
│    │    └─GELU: 3-3                                   [1, 64, 32, 32]           --
│    │    └─Conv2d: 3-4                                 [1, 64, 30, 30]           36,928
├─Sequential: 1-2                                       [1, 96, 15, 15]           --
│    └─DownsamplingMBConv: 2-2                          [1, 96, 15, 15]           --
│    │    └─MaxPool2d: 3-5                              [1, 64, 15, 15]        

## MOAT

In [12]:
moat_configs = {
    "moat_default": MOAT(
        in_channels=1,
        num_classes=36,
        img_size=64,
        depths=(3, 5, 9, 3),
        channels=(128, 384, 512, 1024),
        embed_dim=128,
        attn_drop=0.3,
        drop=0.3,
    ),
    "moat_default_window": MOAT(
        in_channels=1,
        num_classes=36,
        img_size=64,
        depths=(3, 5, 9, 3),
        channels=(128, 384, 512, 1024),
        embed_dim=128,
        attn_drop=0.3,
        drop=0.3,
        use_window=True,
        window_size=16,
    ),
    "moat_smaller": MOAT(
        in_channels=1,
        num_classes=36,
        img_size=64,
        depths=(2, 2, 4, 2),
        channels=(128, 256, 396, 512),
        embed_dim=128,
        attn_drop=0.3,
        drop=0.3,
    ),
    "moat_smaller_window": MOAT(
        in_channels=1,
        num_classes=36,
        img_size=64,
        depths=(2, 2, 4, 2),
        channels=(128, 256, 396, 512),
        embed_dim=128,
        attn_drop=0.3,
        drop=0.3,
        use_window=True,
        window_size=8,
    ),
    "moat_bigger": MOAT(
        in_channels=1,
        num_classes=36,
        img_size=64,
        depths=(3, 5, 9, 3),
        channels=(256, 512, 768, 1024),
        embed_dim=256,
        attn_drop=0.3,
        drop=0.3,
    ),
    "moat_bigger_window": MOAT(
        in_channels=1,
        num_classes=36,
        img_size=64,
        depths=(3, 5, 9, 3),
        channels=(256, 512, 768, 1024),
        embed_dim=256,
        attn_drop=0.3,
        drop=0.3,
        use_window=True,
        window_size=16,
    ),
    "moat_bigger2": MOAT(
        in_channels=1,
        num_classes=36,
        img_size=64,
        depths=(3, 5, 7, 9, 3),
        channels=(128, 256, 512, 768, 1024),
        embed_dim=128,
        attn_drop=0.3,
        drop=0.3,
    ),
    "moat_bigger2_window8": MOAT(
        in_channels=1,
        num_classes=36,
        img_size=64,
        depths=(3, 5, 7, 9, 3),
        channels=(128, 256, 512, 768, 1024),
        embed_dim=128,
        attn_drop=0.3,
        drop=0.3,
        use_window=True,
        window_size=8,
    ),
    "moat_bigger2_window16": MOAT(
        in_channels=1,
        num_classes=36,
        img_size=64,
        depths=(3, 5, 7, 9, 3),
        channels=(128, 256, 512, 768, 1024),
        embed_dim=128,
        attn_drop=0.3,
        drop=0.3,
        use_window=True,
        window_size=16,
    ),
}

for name, model in moat_configs.items():
    print(f"Analyzing {name}...")
    analyze_model(model)

Analyzing moat_default...
Using device: cpu
Model Summary:
Layer (type:depth-idx)                                  Output Shape              Param #
MOAT                                                    [1, 36]                   --
├─Sequential: 1-1                                       [1, 128, 32, 32]          --
│    └─Conv2d: 2-1                                      [1, 128, 32, 32]          1,280
│    └─GELU: 2-2                                        [1, 128, 32, 32]          --
│    └─Conv2d: 2-3                                      [1, 128, 32, 32]          147,584
│    └─GELU: 2-4                                        [1, 128, 32, 32]          --
├─ModuleList: 1-2                                       --                        --
│    └─MBConvBlock: 2-5                                 [1, 128, 16, 16]          --
│    │    └─Sequential: 3-1                             [1, 128, 16, 16]          100,928
│    │    └─Sequential: 3-2                             [1, 128, 16, 16] 