### CNN Model

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from transformers import AutoFeatureExtractor, SwinForImageClassification, get_scheduler
from sklearn.metrics import confusion_matrix, precision_score, recall_score
import seaborn as sns

# Check and create output directory
if not os.path.exists('./outputs'):
    os.mkdir('./outputs')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load pre-trained model (Google)
from classification.model.build import EfficientViT_M2
model_efficientViT_M2 = EfficientViT_M2(pretrained='efficientvit_m2')

# Modify the classifier to match the number of classes
model_efficientViT_M2.head.l = nn.Linear(model_efficientViT_M2.head.l.in_features, 10)


In [3]:
from torchsummary import summary

In [4]:
print(model_efficientViT_M2)

EfficientViT(
  (patch_embed): Sequential(
    (0): Conv2d_BN(
      (c): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ReLU()
    (2): Conv2d_BN(
      (c): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (3): ReLU()
    (4): Conv2d_BN(
      (c): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (5): ReLU()
    (6): Conv2d_BN(
      (c): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (blocks1): Sequential(
    (0): EfficientViTBlock(
      (dw0): Residual(


In [6]:
from fvcore.nn import FlopCountAnalysis, parameter_count

model = model_efficientViT_M2
model.eval()  # Switch to evaluation mode
#input = torch.randn(1, 3, 224, 224).to(device)
input = torch.randn(1, 3, 224, 224)
flops = FlopCountAnalysis(model, input)
params = parameter_count(model)

print(f"FLOPs: {flops.total()}")
print(f"Parameters: {params['']}")

Unsupported operator aten::add encountered 64 time(s)
Unsupported operator aten::mul encountered 20 time(s)
Unsupported operator aten::softmax encountered 16 time(s)
Unsupported operator aten::mean encountered 2 time(s)
Unsupported operator aten::sigmoid encountered 2 time(s)


FLOPs: 203533056
Parameters: 3964804


In [None]:
from ptflops import get_model_complexity_info

model = model_efficientViT_M2
flops, params = get_model_complexity_info(model, (3, 224, 224), as_strings=True,
                                         print_per_layer_stat=True)

print(f"FLOPs: {flops}")
print(f"Parameters: {params}")



EfficientViT(
  3.96 M, 99.985% Params, 201.23 MMac, 99.555% MACs, 
  (patch_embed): Sequential(
    97.68 k, 2.464% Params, 49.87 MMac, 24.674% MACs, 
    (0): Conv2d_BN(
      464, 0.012% Params, 5.82 MMac, 2.880% MACs, 
      (c): Conv2d(432, 0.011% Params, 5.42 MMac, 2.681% MACs, 3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, 0.001% Params, 401.41 KMac, 0.199% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ReLU(0, 0.000% Params, 200.7 KMac, 0.099% MACs, )
    (2): Conv2d_BN(
      4.67 k, 0.118% Params, 14.65 MMac, 7.248% MACs, 
      (c): Conv2d(4.61 k, 0.116% Params, 14.45 MMac, 7.149% MACs, 16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, 0.002% Params, 200.7 KMac, 0.099% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (3): ReLU(0, 0.000% Params, 100.35 KMac, 0.050% MACs, )
    (4): Conv2d_BN(
      18.

In [None]:
from thop import profile
from thop import clever_format

model = model_efficientViT_M2
input = torch.randn(1, 3, 224, 224)
macs, params = profile(model, inputs=(input,))
macs, params = clever_format([macs, params], "%.3f")

print(f"MACs: {macs}")
print(f"Parameters: {params}")


[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm1d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
MACs: 203.052M
Parameters: 3.964M


In [None]:
import torch
from torchinfo import summary

# Assume `model_efficientViT_M2` is already defined
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model_efficientViT_M2.to(device)

# Run torchinfo summary
summary(model, input_size=(1, 3, 224, 224), device=device)


Layer (type:depth-idx)                                       Output Shape              Param #
EfficientViT                                                 [1, 10]                   --
├─Sequential: 1-1                                            [1, 128, 14, 14]          --
│    └─Conv2d_BN: 2-1                                        [1, 16, 112, 112]         --
│    │    └─Conv2d: 3-1                                      [1, 16, 112, 112]         432
│    │    └─BatchNorm2d: 3-2                                 [1, 16, 112, 112]         32
│    └─ReLU: 2-2                                             [1, 16, 112, 112]         --
│    └─Conv2d_BN: 2-3                                        [1, 32, 56, 56]           --
│    │    └─Conv2d: 3-3                                      [1, 32, 56, 56]           4,608
│    │    └─BatchNorm2d: 3-4                                 [1, 32, 56, 56]           64
│    └─ReLU: 2-4                                             [1, 32, 56, 56]           --
│