In [None]:
!pip install torchmetrics ptflops

Collecting torchmetrics
  Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m868.8/868.8 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ptflops
  Downloading ptflops-0.7.3-py3-none-any.whl (18 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.3.post0-py3-none-any.whl (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->torchmetrics)
  U

In [None]:
import subprocess
import os
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, datasets, transforms
from torchvision.models import mobilenet_v2, mobilenet_v3_large, mobilenet_v3_small
from torch.optim import SGD
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np
import psutil
import torchmetrics
from sklearn.metrics import confusion_matrix

# Function to get initial GPU memory usage
def initial_gpu_memory():
    if torch.cuda.is_available():
        torch.cuda.empty_cache()  # Release any existing memory
        return torch.cuda.memory_allocated() / (1024 * 1024)  # Convert to MB
    else:
        return 0

# Function to calculate final GPU memory usage
def final_gpu_memory():
    if torch.cuda.is_available():
        return torch.cuda.memory_allocated() / (1024 * 1024)  # Convert to MB
    else:
        return 0

# Function to get GPU power usage
def get_gpu_power_usage():
    result = subprocess.run(
        ['nvidia-smi', '--query-gpu=power.draw', '--format=csv,noheader,nounits'],
        stdout=subprocess.PIPE, text=True
    )
    power_draws = result.stdout.strip().split('\n')
    power_draws = [float(power) for power in power_draws]
    return sum(power_draws) / len(power_draws)

class VOCSegmentationCustom(datasets.VOCSegmentation):
    def __init__(self, root, year='2012', image_set='train', transform=None, target_transform=None, transforms=None):
        super(VOCSegmentationCustom, self).__init__(root, year, image_set, transform, target_transform, transforms)
        self.transform = transform
        self.target_transform = target_transform

    def __getitem__(self, index):
        img = Image.open(self.images[index]).convert('RGB')
        target = Image.open(self.masks[index])

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target

# Transforms
transformer = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

target_transformer = transforms.Compose([
    transforms.Resize((224, 224), interpolation=Image.NEAREST),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: torch.squeeze(x, 0).long())
])

# Initialize custom dataset
train_dataset = VOCSegmentationCustom(
    root='./data', year='2012', image_set='train', transform=transformer, target_transform=target_transformer)
test_dataset = VOCSegmentationCustom(
    root='./data', year='2012', image_set='val', transform=transformer, target_transform=target_transformer)

train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=True)

# Custom IntermediateLayerGetter
class IntermediateLayerGetter(nn.ModuleDict):
    def __init__(self, model, return_layers):
        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
            raise ValueError("return_layers are not present in model")

        orig_return_layers = return_layers
        return_layers = {str(k): str(v) for k, v in return_layers.items()}
        layers = {k: v for k, v in model.named_children() if k in return_layers}
        super(IntermediateLayerGetter, self).__init__(layers)
        self.return_layers = orig_return_layers

    def forward(self, x):
        out = {}
        for name, module in self.named_children():
            x = module(x)
            if name in self.return_layers:
                out_name = self.return_layers[name]
                out[out_name] = x
        return out

# ASPP (Atrous Spatial Pyramid Pooling) Module
class ASPP(nn.Module):
    def __init__(self, in_channels, out_channels, output_stride):
        super(ASPP, self).__init__()
        self.act = nn.ReLU6()
        self.bn_1 = nn.BatchNorm2d(out_channels)
        self.bn_2 = nn.BatchNorm2d(out_channels)
        self.bn_3 = nn.BatchNorm2d(out_channels)
        self.bn_4 = nn.BatchNorm2d(out_channels)
        self.bn_5 = nn.BatchNorm2d(out_channels)
        self.bn_6 = nn.BatchNorm2d(out_channels)

        if output_stride == 16:
            self.operation_1 = nn.Conv2d(in_channels, out_channels, kernel_size=1)
            self.operation_2 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=6, dilation=6)
            self.operation_3 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=12, dilation=12)
            self.operation_4 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=18, dilation=18)
        elif output_stride == 8:
            self.operation_1 = nn.Conv2d(in_channels, out_channels, kernel_size=1)
            self.operation_2 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=12, dilation=12)
            self.operation_3 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=24, dilation=24)
            self.operation_4 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=36, dilation=36)
        else:
            raise ValueError('Output stride must be 8 or 16')

        self.pool = nn.AdaptiveAvgPool2d((1))
        self.conv_pool = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        self.conv = nn.Conv2d(out_channels * 5, out_channels, kernel_size=1)

    def forward(self, x):
        output_1 = self.act(self.bn_1(self.operation_1(x)))
        output_2 = self.act(self.bn_2(self.operation_2(x)))
        output_3 = self.act(self.bn_3(self.operation_3(x)))
        output_4 = self.act(self.bn_4(self.operation_4(x)))
        pool = self.pool(x)
        pool = self.act(self.bn_5(self.conv_pool(pool)))
        pool = F.interpolate(pool, size=x.size()[2:], mode='bilinear', align_corners=True)
        output = torch.cat((output_1, output_2, output_3, output_4, pool), dim=1)
        output = self.act(self.bn_6(self.conv(output)))
        return output

# Deeplab Decoder
class Deeplab(nn.Module):
    def __init__(self, low_feat_ch, high_feat_ch, num_classes, output_stride):
        super(Deeplab, self).__init__()
        self.aspp = ASPP(high_feat_ch, 256, output_stride)
        self.low_conv = nn.Conv2d(low_feat_ch, 48, kernel_size=1)
        self.low_bn = nn.BatchNorm2d(48)
        self.act = nn.ReLU6()
        self.classifier = nn.Sequential(
            nn.Conv2d(304, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, num_classes, kernel_size=1)
        )

    def forward(self, low_features, high_features):
        high_features = self.aspp(high_features)
        low_features = self.act(self.low_bn(self.low_conv(low_features)))
        high_features = F.interpolate(high_features, size=low_features.size()[2:], mode='bilinear', align_corners=True)
        concat_features = torch.cat([high_features, low_features], dim=1)
        output = self.classifier(concat_features)
        return output

# Backbone Loader
def backbone_loader(model_name):
    if model_name == 'mobilenet_v2':
        backbone = models.mobilenet_v2(pretrained=True)
        low_level_features = backbone.features[:4]
        high_level_features = backbone.features[4:-1]
        low_feat_ch = 24
        high_feat_ch = 320
    elif model_name == 'mobilenet_v3_large':
        backbone = mobilenet_v3_large(pretrained=True)
        low_level_features = backbone.features[:7]
        high_level_features = backbone.features[7:]
        low_feat_ch = 40
        high_feat_ch = 960
    elif model_name == 'mobilenet_v3_small':
        backbone = mobilenet_v3_small(pretrained=True)
        low_level_features = backbone.features[:4]
        high_level_features = backbone.features[4:]
        low_feat_ch = 24
        high_feat_ch = 576  # Adjust based on the backbone architecture

    return_layers = {'high_level_features': 'out', 'low_level_features': 'low_level'}
    backbone = IntermediateLayerGetter(nn.ModuleDict({'low_level_features': low_level_features, 'high_level_features': high_level_features}), return_layers=return_layers)
    return backbone, low_feat_ch, high_feat_ch

# Segmentation Model
class SegmentationCustom(nn.Module):
    def __init__(self, num_classes, output_stride, model_name):
        super(SegmentationCustom, self).__init__()
        self.feature_extractor, low_feat_ch, high_feat_ch = backbone_loader(model_name)
        self.deeplab = Deeplab(low_feat_ch=low_feat_ch, high_feat_ch=high_feat_ch, num_classes=num_classes, output_stride=output_stride)

    def forward(self, x):
        original_shape = x.shape[2:]
        features = self.feature_extractor(x)
        output_map = self.deeplab(features['low_level'], features['out'])
        output_map = F.interpolate(output_map, size=original_shape, mode='bilinear', align_corners=True)
        return output_map

# Initialize models
mobilenetv2_model = SegmentationCustom(num_classes=21, output_stride=16, model_name='mobilenet_v2')
mobilenetv3_large_model = SegmentationCustom(num_classes=21, output_stride=16, model_name='mobilenet_v3_large')
mobilenetv3_small_model = SegmentationCustom(num_classes=21, output_stride=16, model_name='mobilenet_v3_small')

if torch.cuda.is_available():
    mobilenetv2_model.cuda()
    mobilenetv3_large_model.cuda()
    mobilenetv3_small_model.cuda()

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer_v2 = SGD(mobilenetv2_model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
optimizer_v3_large = SGD(mobilenetv3_large_model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
optimizer_v3_small = SGD(mobilenetv3_small_model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

# Function to calculate mIoU and PA
def calculate_metrics(predictions, labels, num_classes):
    iou = torchmetrics.JaccardIndex(task='multiclass', num_classes=num_classes).to(predictions.device)
    miou = iou(predictions, labels)

    accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=num_classes).to(predictions.device)
    pa = accuracy(predictions, labels)

    return miou, pa

# Train and Evaluate function with additional metrics and memory usage
def train_and_evaluate(model, optimizer, train_loader, test_loader, epochs=50, save_path=None):

    # initial_gpu_mem = initial_gpu_memory()

    model.train()
    start_time = time.time()
    initial_gpu_mem = initial_gpu_memory()
    initial_power = get_gpu_power_usage()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            if torch.cuda.is_available():
                inputs, labels = inputs.cuda(), labels.cuda()

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)
        print(f'Epoch {epoch + 1}, Loss: {avg_loss}')

    end_time = time.time()
    final_power = get_gpu_power_usage()
    print(f'Training Time: {end_time - start_time:.2f} seconds')
    print(f'Average Power Consumption during Training: {(initial_power + final_power) / 2:.2f} W')

    # Save the trained model
    if save_path:
        torch.save(model.state_dict(), save_path)
        print(f'Model saved to {save_path}')

    # Evaluate the model
    model.eval()
    start_time = time.time()
    initial_gpu_mem = initial_gpu_memory()
    initial_power = get_gpu_power_usage()
    correct = 0
    total = 0
    total_time = 0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            if torch.cuda.is_available():
                inputs, labels = inputs.cuda(), labels.cuda()

            start_time = time.time()
            outputs = model(inputs)
            end_time = time.time()
            total_time += (end_time - start_time)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.nelement()
            correct += (predicted == labels).sum().item()

            all_predictions.append(predicted)
            all_labels.append(labels)

    end_time = time.time()
    final_power = get_gpu_power_usage()
    print(f'Evaluation Time: {end_time - start_time:.2f} seconds')
    print(f'Average Power Consumption during Evaluation: {(initial_power + final_power) / 2:.2f} W')

    # Calculate metrics
    accuracy = correct / total
    avg_inference_time = total_time / len(test_loader)
    gpu_mem_usage = final_gpu_memory() - initial_gpu_mem  # Final GPU memory usage

    all_predictions = torch.cat(all_predictions)
    all_labels = torch.cat(all_labels)

    miou, pa = calculate_metrics(all_predictions, all_labels, num_classes=21)

    return accuracy, avg_inference_time, gpu_mem_usage, miou, pa

# Train and evaluate models
print("Training MobileNetV2...")
metrics_v2 = train_and_evaluate(mobilenetv2_model, optimizer_v2, train_loader, test_loader, save_path='mobilenetv2_model.pth')
print("Training MobileNetV3 Large...")
metrics_v3_large = train_and_evaluate(mobilenetv3_large_model, optimizer_v3_large, train_loader, test_loader, save_path='mobilenetv3_large_model.pth')
print("Training MobileNetV3 Small...")
metrics_v3_small = train_and_evaluate(mobilenetv3_small_model, optimizer_v3_small, train_loader, test_loader, save_path='mobilenetv3_small_model.pth')

# Print results including memory usage
print(f'MobileNetV2 - Accuracy: {metrics_v2[0]}, Inference Time: {metrics_v2[1]}, Final GPU Memory Usage: {metrics_v2[2]} MB, mIoU: {metrics_v2[3]}, Pixel Accuracy: {metrics_v2[4]}')
print(f'MobileNetV3 Large - Accuracy: {metrics_v3_large[0]}, Inference Time: {metrics_v3_large[1]}, Final GPU Memory Usage: {metrics_v3_large[2]} MB, mIoU: {metrics_v3_large[3]}, Pixel Accuracy: {metrics_v3_large[4]}')
print(f'MobileNetV3 Small - Accuracy: {metrics_v3_small[0]}, Inference Time: {metrics_v3_small[1]}, Final GPU Memory Usage: {metrics_v3_small[2]} MB, mIoU: {metrics_v3_small[3]}, Pixel Accuracy: {metrics_v3_small[4]}')


Downloading http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar to ./data/VOCtrainval_11-May-2012.tar


100%|██████████| 1999639040/1999639040 [00:08<00:00, 244497070.88it/s]


Extracting ./data/VOCtrainval_11-May-2012.tar to ./data
Using downloaded and verified file: ./data/VOCtrainval_11-May-2012.tar
Extracting ./data/VOCtrainval_11-May-2012.tar to ./data


Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 168MB/s]
Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_large-8738ca79.pth
100%|██████████| 21.1M/21.1M [00:00<00:00, 145MB/s]
Downloading: "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_small-047dcff4.pth
100%|██████████| 9.83M/9.83M [00:00<00:00, 140MB/s]


Training MobileNetV2...


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return F.conv2d(input, weight, bias, self.stride,


Epoch 1, Loss: 0.4977114103278335
Epoch 2, Loss: 0.22314808431531297
Epoch 3, Loss: 0.20432909162474328
Epoch 4, Loss: 0.19756684178600506
Epoch 5, Loss: 0.19191396646961875
Epoch 6, Loss: 0.18708985810782633
Epoch 7, Loss: 0.18285848083747488
Epoch 8, Loss: 0.18126241639763319
Epoch 9, Loss: 0.179455501540583
Epoch 10, Loss: 0.17614381562690345
Epoch 11, Loss: 0.17356617520658338
Epoch 12, Loss: 0.17257261808429444
Epoch 13, Loss: 0.170735413930854
Epoch 14, Loss: 0.17043988001184399
Epoch 15, Loss: 0.16803126848068367
Epoch 16, Loss: 0.16709178589841947
Epoch 17, Loss: 0.16609032264574855
Epoch 18, Loss: 0.16564019561624851
Epoch 19, Loss: 0.1638390339252089
Epoch 20, Loss: 0.16324953086116686
Epoch 21, Loss: 0.16314644412118562
Epoch 22, Loss: 0.16219873559110018
Epoch 23, Loss: 0.16212120794114612
Epoch 24, Loss: 0.15946678325635236
Epoch 25, Loss: 0.15966355162007467
Epoch 26, Loss: 0.15939698981590011
Epoch 27, Loss: 0.15746194017784937
Epoch 28, Loss: 0.1578534082794676
Epoch 29

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return F.conv2d(input, weight, bias, self.stride,


Epoch 1, Loss: 0.464738916377632
Epoch 2, Loss: 0.22474534495347212
Epoch 3, Loss: 0.2065529067094634
Epoch 4, Loss: 0.19984484773103883
Epoch 5, Loss: 0.19497506984439836
Epoch 6, Loss: 0.19335129000499945
Epoch 7, Loss: 0.18864444562164293
Epoch 8, Loss: 0.18630787750490668
Epoch 9, Loss: 0.183396106522505
Epoch 10, Loss: 0.1813302665543394
Epoch 11, Loss: 0.17980891699288168
Epoch 12, Loss: 0.17718416300355172
Epoch 13, Loss: 0.1756835028934641
Epoch 14, Loss: 0.17331749139999858
Epoch 15, Loss: 0.17168548120325114
Epoch 16, Loss: 0.16997396677326995
Epoch 17, Loss: 0.1692543362780493
Epoch 18, Loss: 0.16728784123651025
Epoch 19, Loss: 0.16764896185625167
Epoch 20, Loss: 0.1664836503007785
Epoch 21, Loss: 0.1655741541766796
Epoch 22, Loss: 0.1655617095276612
Epoch 23, Loss: 0.16306705129187124
Epoch 24, Loss: 0.16274447282966303
Epoch 25, Loss: 0.16261317109575077
Epoch 26, Loss: 0.161859655512028
Epoch 27, Loss: 0.16069733336263772
Epoch 28, Loss: 0.16054123077465562
Epoch 29, Loss

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return F.conv2d(input, weight, bias, self.stride,


Epoch 1, Loss: 0.5017459670297143
Epoch 2, Loss: 0.2269545438743773
Epoch 3, Loss: 0.2101893483274648
Epoch 4, Loss: 0.20235392324575763
Epoch 5, Loss: 0.1978967999722682
Epoch 6, Loss: 0.19401469922998324
Epoch 7, Loss: 0.19190215582953019
Epoch 8, Loss: 0.1905807167494378
Epoch 9, Loss: 0.18786780191522065
Epoch 10, Loss: 0.18639069547255835
Epoch 11, Loss: 0.18414665692720283
Epoch 12, Loss: 0.18249117719883823
Epoch 13, Loss: 0.18163041152110715
Epoch 14, Loss: 0.17916157892366655
Epoch 15, Loss: 0.17904339097186822
Epoch 16, Loss: 0.17763850903835426
Epoch 17, Loss: 0.17791026496157356
Epoch 18, Loss: 0.17567496645410044
Epoch 19, Loss: 0.17599069768068742
Epoch 20, Loss: 0.17374646009839312
Epoch 21, Loss: 0.17367673949116752
Epoch 22, Loss: 0.17370940871587415
Epoch 23, Loss: 0.17263982429796335
Epoch 24, Loss: 0.17101545454490752
Epoch 25, Loss: 0.17073241991250693
Epoch 26, Loss: 0.1690044801960997
Epoch 27, Loss: 0.16979936576213966
Epoch 28, Loss: 0.16787661956686553
Epoch 2

In [None]:
import torch

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

model_path = 'mobilenetv3_small_model.pth'
model = SegmentationCustom(num_classes=21, output_stride=16, model_name='mobilenet_v3_small')
model.load_state_dict(torch.load(model_path))

total_params = count_parameters(model)
print(f"Total trainable parameters: {total_params}")

Total trainable parameters: 6243397




In [None]:
import torch
from ptflops import get_model_complexity_info

model = SegmentationCustom(num_classes=21, output_stride=16, model_name='mobilenet_v2')

# Set input resolution (adjust as needed)
input_res = (3, 224, 224)

# Calculate FLOPs and parameters
macs, params = get_model_complexity_info(model, input_res, as_strings=True,
                                           print_per_layer_stat=True, verbose=True)

print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

SegmentationCustom(
  5.23 M, 100.000% Params, 2.65 GMac, 99.967% MACs, 
  (feature_extractor): IntermediateLayerGetter(
    1.81 M, 34.657% Params, 298.7 MMac, 11.275% MACs, 
    (low_level_features): Sequential(
      15.79 k, 0.302% Params, 86.15 MMac, 3.252% MACs, 
      (0): Conv2dNormActivation(
        928, 0.018% Params, 12.04 MMac, 0.455% MACs, 
        (0): Conv2d(864, 0.017% Params, 10.84 MMac, 0.409% MACs, 3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, 0.001% Params, 802.82 KMac, 0.030% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(0, 0.000% Params, 401.41 KMac, 0.015% MACs, inplace=True)
      )
      (1): InvertedResidual(
        896, 0.017% Params, 11.64 MMac, 0.439% MACs, 
        (conv): Sequential(
          896, 0.017% Params, 11.64 MMac, 0.439% MACs, 
          (0): Conv2dNormActivation(
            352, 0.007% Params, 4.82 MMac, 0.182% MACs, 
            (0): Conv2d(

In [None]:
import torch
from ptflops import get_model_complexity_info

model = SegmentationCustom(num_classes=21, output_stride=16, model_name='mobilenet_v3_large')

# Set input resolution (adjust as needed)
input_res = (3, 224, 224)

# Calculate FLOPs and parameters
macs, params = get_model_complexity_info(model, input_res, as_strings=True,
                                           print_per_layer_stat=True, verbose=True)

print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

Collecting ptflops
  Downloading ptflops-0.7.3-py3-none-any.whl (18 kB)
Installing collected packages: ptflops
Successfully installed ptflops-0.7.3
SegmentationCustom(
  11.14 M, 100.000% Params, 1.14 GMac, 99.718% MACs, 
  (feature_extractor): IntermediateLayerGetter(
    2.97 M, 26.678% Params, 225.8 MMac, 19.831% MACs, 
    (low_level_features): Sequential(
      61.12 k, 0.549% Params, 80.12 MMac, 7.037% MACs, 
      (0): Conv2dNormActivation(
        464, 0.004% Params, 5.82 MMac, 0.511% MACs, 
        (0): Conv2d(432, 0.004% Params, 5.42 MMac, 0.476% MACs, 3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, 0.000% Params, 401.41 KMac, 0.035% MACs, 16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): Hardswish(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
      )
      (1): InvertedResidual(
        464, 0.004% Params, 6.02 MMac, 0.529% MACs, 
        (block): Sequential(
          464, 0.004% Params, 6.02 MMa

In [None]:
import torch
from ptflops import get_model_complexity_info

model = SegmentationCustom(num_classes=21, output_stride=16, model_name='mobilenet_v3_small')

# Set input resolution (adjust as needed)
input_res = (3, 224, 224)

# Calculate FLOPs and parameters
macs, params = get_model_complexity_info(model, input_res, as_strings=True,
                                           print_per_layer_stat=True, verbose=True)

print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

SegmentationCustom(
  6.24 M, 100.000% Params, 832.49 MMac, 99.876% MACs, 
  (feature_extractor): IntermediateLayerGetter(
    927.01 k, 14.848% Params, 58.48 MMac, 7.016% MACs, 
    (low_level_features): Sequential(
      10.49 k, 0.168% Params, 18.12 MMac, 2.174% MACs, 
      (0): Conv2dNormActivation(
        464, 0.007% Params, 5.82 MMac, 0.698% MACs, 
        (0): Conv2d(432, 0.007% Params, 5.42 MMac, 0.650% MACs, 3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, 0.001% Params, 401.41 KMac, 0.048% MACs, 16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): Hardswish(0, 0.000% Params, 0.0 Mac, 0.000% MACs, )
      )
      (1): InvertedResidual(
        744, 0.012% Params, 1.56 MMac, 0.187% MACs, 
        (block): Sequential(
          744, 0.012% Params, 1.56 MMac, 0.187% MACs, 
          (0): Conv2dNormActivation(
            176, 0.003% Params, 602.11 KMac, 0.072% MACs, 
            (0): Conv2d(144, 0.00