In [1]:
import torch

from torchvision.models import resnet50
from torchvision.models import ResNet50_Weights
from models.model import get_model

from torch import nn
import torchinfo
from torchprofile import profile_macs

import timm

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
timm.list_models(pretrained=True)

['bat_resnext26ts.ch_in1k',
 'beit_base_patch16_224.in22k_ft_in22k',
 'beit_base_patch16_224.in22k_ft_in22k_in1k',
 'beit_base_patch16_384.in22k_ft_in22k_in1k',
 'beit_large_patch16_224.in22k_ft_in22k',
 'beit_large_patch16_224.in22k_ft_in22k_in1k',
 'beit_large_patch16_384.in22k_ft_in22k_in1k',
 'beit_large_patch16_512.in22k_ft_in22k_in1k',
 'beitv2_base_patch16_224.in1k_ft_in1k',
 'beitv2_base_patch16_224.in1k_ft_in22k',
 'beitv2_base_patch16_224.in1k_ft_in22k_in1k',
 'beitv2_large_patch16_224.in1k_ft_in1k',
 'beitv2_large_patch16_224.in1k_ft_in22k',
 'beitv2_large_patch16_224.in1k_ft_in22k_in1k',
 'botnet26t_256.c1_in1k',
 'caformer_b36.sail_in1k',
 'caformer_b36.sail_in1k_384',
 'caformer_b36.sail_in22k',
 'caformer_b36.sail_in22k_ft_in1k',
 'caformer_b36.sail_in22k_ft_in1k_384',
 'caformer_m36.sail_in1k',
 'caformer_m36.sail_in1k_384',
 'caformer_m36.sail_in22k',
 'caformer_m36.sail_in22k_ft_in1k',
 'caformer_m36.sail_in22k_ft_in1k_384',
 'caformer_s18.sail_in1k',
 'caformer_s18.s

In [4]:

model = timm.create_model('convnextv2_large.fcmae_ft_in22k_in1k', pretrained=True)

Downloading model.safetensors:   0%|          | 0.00/792M [00:00<?, ?B/s]

In [5]:
model

ConvNeXt(
  (stem): Sequential(
    (0): Conv2d(3, 192, kernel_size=(4, 4), stride=(4, 4))
    (1): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True)
  )
  (stages): Sequential(
    (0): ConvNeXtStage(
      (downsample): Identity()
      (blocks): Sequential(
        (0): ConvNeXtBlock(
          (conv_dw): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
          (norm): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
          (mlp): GlobalResponseNormMlp(
            (fc1): Linear(in_features=192, out_features=768, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (grn): GlobalResponseNorm()
            (fc2): Linear(in_features=768, out_features=192, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): Identity()
        )
        (1): ConvNeXtBlock(
          (conv_dw): Conv2d(192, 192, kernel_size=(7, 7), strid

In [6]:
torchinfo.summary(model, input_size=(1, 3, 224, 224))

Layer (type:depth-idx)                                       Output Shape              Param #
ConvNeXt                                                     [1, 1000]                 --
├─Sequential: 1-1                                            [1, 192, 56, 56]          --
│    └─Conv2d: 2-1                                           [1, 192, 56, 56]          9,408
│    └─LayerNorm2d: 2-2                                      [1, 192, 56, 56]          384
├─Sequential: 1-2                                            [1, 1536, 7, 7]           --
│    └─ConvNeXtStage: 2-3                                    [1, 192, 56, 56]          --
│    │    └─Identity: 3-1                                    [1, 192, 56, 56]          --
│    │    └─Sequential: 3-2                                  [1, 192, 56, 56]          922,176
│    └─ConvNeXtStage: 2-4                                    [1, 384, 28, 28]          --
│    │    └─Sequential: 3-3                                  [1, 384, 28, 28]         

In [3]:

sample_input = torch.randn(1, 3, 224, 224).to(DEVICE)

flops = profile_macs(model, sample_input)

print(flops / 1e9)

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

In [2]:
model = get_model('ConvTransNeXtTiny', pretrained=True, num_classes=200, freeze=True).to(DEVICE)

sample_input = torch.randn(32, 3, 224, 224).to(DEVICE)

model(sample_input).shape



torch.Size([32, 200])

In [3]:
torchinfo.summary(model, input_size=(1, 3, 224, 224))

Layer (type:depth-idx)                             Output Shape              Param #
ConvTransNeXtTiny                                  [1, 200]                  --
├─ConvNeXt: 1-3                                    --                        (recursive)
│    └─Sequential: 2-1                             [1, 384, 14, 14]          --
│    │    └─Conv2dNormActivation: 3-1              [1, 96, 56, 56]           (4,896)
│    │    └─Sequential: 3-2                        [1, 96, 56, 56]           (237,888)
│    │    └─Sequential: 3-3                        [1, 192, 28, 28]          (74,112)
│    │    └─Sequential: 3-4                        [1, 192, 28, 28]          (918,144)
│    │    └─Sequential: 3-5                        [1, 384, 14, 14]          (295,680)
│    │    └─Sequential: 3-6                        [1, 384, 14, 14]          (10,817,280)
├─TransformerEncoder: 1-2                          [196, 1, 384]             --
│    └─ModuleList: 2-2                             --           

In [4]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
sample_input = torch.randn(1, 3, 224, 224).to(DEVICE)

flops = profile_macs(model, sample_input)

print(flops / 1e9)

4.784007569


