<a href="https://colab.research.google.com/github/mobarakol/tutorial_notebooks/blob/main/flops.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Calculating FLOPS using ptflops:
reference: https://pypi.org/project/ptflops/

Installing ptflops library<br>
GFLops = 2 * GMACs <br>
MAC = Multiply–accumulate operation <br>
FLOPS = floating point operations per second

In [1]:
!pip -q install ptflops

  Building wheel for ptflops (setup.py) ... [?25l[?25hdone


In [49]:
import torchvision.models as models
import torch
from ptflops import get_model_complexity_info

with torch.cuda.device(0):
  net = models.densenet121()
  flops, params = get_model_complexity_info(net, (3, 224, 224), as_strings=True,
                                           print_per_layer_stat=False, verbose=True);
  print('{:<30}  {:<8}'.format('Flops: ', flops))
  #print('{:<30}  {:<4} GFLOPS'.format('Flops: ', 2 * float(flops[:4])))
  print('{:<30}  {:<8}'.format('Number of parameters: ', params))

Flops:                          2.88 GMac
Number of parameters:           7.98 M  


Ref: https://arxiv.org/pdf/1905.11946.pdf

In [48]:
import torchvision.models as models
import torch
from ptflops import get_model_complexity_info

with torch.cuda.device(0):
  net = models.densenet169()
  flops, params = get_model_complexity_info(net, (3, 224, 224), as_strings=True,
                                           print_per_layer_stat=False, verbose=True);
  print('Densenet169 {:<22}  {:<8}'.format('Flops: ', flops))
  print('{:<30}  {:<8}'.format('Number of parameters: ', params))


with torch.cuda.device(0):
  net = models.resnet50()
  flops, params = get_model_complexity_info(net, (3, 224, 224), as_strings=True,
                                           print_per_layer_stat=False, verbose=True);
  print('resnet50 {:<22}  {:<8}'.format('Flops: ', flops))
  print('{:<30}  {:<8}'.format('Number of parameters: ', params))

Densenet169 Flops:                  3.42 GMac
Number of parameters:           14.15 M 
resnet50 Flops:                  4.12 GMac
Number of parameters:           25.56 M 


# Using fvcore:
reference: https://github.com/facebookresearch/fvcore/blob/main/docs/flop_count.md

In [30]:
!pip -q install fvcore

[K     |████████████████████████████████| 55 kB 2.5 MB/s 
[K     |████████████████████████████████| 596 kB 27.5 MB/s 
[?25h  Building wheel for fvcore (setup.py) ... [?25l[?25hdone


In [42]:
from fvcore.nn import FlopCountAnalysis
net = models.densenet121()
input = torch.rand([1, 3, 224, 224])
flops = FlopCountAnalysis(net, input)
print(flops.total()/1000000000)

Unsupported operator aten::max_pool2d encountered 1 time(s)
Unsupported operator aten::avg_pool2d encountered 3 time(s)


2.91254912


# Using thop:
ref: https://github.com/Lyken17/pytorch-OpCounter

In [34]:
!pip -q install thop

In [41]:
import torchvision.models as models
from thop import profile
model = models.densenet121()
input = torch.randn(1, 3, 224, 224)
macs, params = profile(model, inputs=(input, ))
print(macs/1000000000)

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_bn() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[91m[WARN] Cannot find rule for <class 'torchvision.models.densenet._DenseLayer'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torchvision.models.densenet._DenseBlock'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register count_avgpool() for <class 'torch.nn.modules.pooling.AvgPool2d'>.
[91m[WARN] Cannot find rule for <class 'torchvision.models.densenet._Transition'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.container.Sequential'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[91m[WARN] Cannot find

# Calculating FLOPS using pynvml:

In [4]:

from pynvml import *
import torch


def convert_bytes(bytes_number):
    # tags = [ "Byte", "Kilobyte", "Megabyte", "GB", "Terabyte" ]

    i = 0
    double_bytes = bytes_number

    while i < 2:
        double_bytes = bytes_number / 1024.0
        i = i + 1
        bytes_number = bytes_number / 1024

    return round(double_bytes, 3)  # + " " + tags[i]


def profile_model(func, arg):
    with torch.profiler.profile(
        activities=[
            torch.profiler.ProfilerActivity.CPU,
        ],
        profile_memory=True,
        with_flops=True,
    ) as p:
        size = func(arg)
    return (
        sum([e.flops for e in p.events() if e.flops is not None]),
        sum([e.cpu_memory_usage for e in p.events() if e.cpu_memory_usage is not None]),
        size,
    )
    

import torchvision.models as models

@torch.no_grad()
def memory_densenet(arg):
    model = models.densenet121()
    model.eval()
    inp = torch.rand([1, 3, 224, 224])
    out = model(inp)

    torch.save(model, "./temp.pth")
    return os.path.getsize("./temp.pth")


flops, _, memory_used = profile_model(memory_densenet, None)
print('{:<30}  {:<8}'.format('Flops: ', flops))
print('{:<30}  {:<8}'.format('Memory used: ', memory_used))


Flops:                          8613052416
Memory used:                    32531347
