In [1]:
import torch
import torchvision.models as models
import torchprofile
import time
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn
from torchsummary import summary

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
model = models.mobilenet_v2(pretrained=True)
model.classifier[1] = nn.Linear(in_features=1280, out_features=2)
model.to(device)
summary(model, (3, 256, 256), batch_size=64)



----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [64, 32, 128, 128]             864
       BatchNorm2d-2         [64, 32, 128, 128]              64
             ReLU6-3         [64, 32, 128, 128]               0
            Conv2d-4         [64, 32, 128, 128]             288
       BatchNorm2d-5         [64, 32, 128, 128]              64
             ReLU6-6         [64, 32, 128, 128]               0
            Conv2d-7         [64, 16, 128, 128]             512
       BatchNorm2d-8         [64, 16, 128, 128]              32
  InvertedResidual-9         [64, 16, 128, 128]               0
           Conv2d-10         [64, 96, 128, 128]           1,536
      BatchNorm2d-11         [64, 96, 128, 128]             192
            ReLU6-12         [64, 96, 128, 128]               0
           Conv2d-13           [64, 96, 64, 64]             864
      BatchNorm2d-14           [64, 96,

In [4]:
input_tensor = torch.randn(1, 3, 256, 256).to(device)

https://github.com/zhijian-liu/torchprofile

In [5]:
# Warm up the GPU
with torch.no_grad():
    for _ in range(10):
        _ = model(input_tensor)

# Measure FLOPs
with torch.no_grad():
    macs = torchprofile.profile_macs(model, args=(input_tensor,))
    flops = 2 * macs  # Convert MACs to FLOPs

# Measure inference time
with torch.no_grad():
    start_time = time.time()
    for _ in range(100):  # Run multiple inferences
        _ = model(input_tensor)
    end_time = time.time()

# Calculate elapsed time per inference and throughput
elapsed_time = (end_time - start_time) / 100  # Average inference time per run
tops = (flops / elapsed_time) / 1e12  # Throughput in TOPs

In [6]:
print(f"FLOPs: {flops / 1e9:.2f} GFLOPs, "
        f"Inference Time: {elapsed_time * 1000:.2f} ms, Throughput: {tops:.2f} TOPs")

FLOPs: 0.80 GFLOPs, Inference Time: 9.10 ms, Throughput: 0.09 TOPs
