<a href="https://colab.research.google.com/github/majauhar/DL_MVA/blob/main/SISR_Efficiency_with_GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# For local environments:
# Local installation on Colab results into non-availability of submodules
# !git clone https://github.com/majauhar/fvcore.git
# !pip install -e fvcore
# !pip install einops

In [9]:
"""
  For Colab environments:
  Wouldn't work for OMNI-SR because of a bug in the original package
  Which I have fixed in my fork.
"""
# %pip install fvcore -q
%pip install einops -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
"""
  Required to run in Colab
"""
!git clone https://github.com/majauhar/DL_MVA.git

Cloning into 'DL_MVA'...
remote: Enumerating objects: 107, done.[K
remote: Counting objects: 100% (107/107), done.[K
remote: Compressing objects: 100% (79/79), done.[K
remote: Total 107 (delta 39), reused 77 (delta 21), pack-reused 0[K
Receiving objects: 100% (107/107), 58.76 KiB | 2.03 MiB/s, done.
Resolving deltas: 100% (39/39), done.


In [2]:
cd DL_MVA/

/content/DL_MVA


In [10]:
import torch
import numpy as np
import einops
from time import perf_counter

In [2]:
"""
  Unsafe for Colab (for Omni-SR, works fine with LESRCNN)
"""
# from fvcore.nn import FlopCountAnalysis
# from fvcore.nn import flop_count_str
# from fvcore.nn import flop_count_table

'\n  Unsafe for Colab (for Omni-SR, works fine with LESRCNN)\n'

In [11]:
# Local imports
from utils.efficiency_results import get_model_flops, get_model_activation
from lesrcnn.model import Net
from omni.model import OmniSR

In [12]:
def cpu_inference(model, input):
    """
        Little function for calculating inference time on CPU
        Averages over 100 inferences
    """
    start_time = perf_counter()
    _ = model(input)
    end_time = perf_counter()
    delta = end_time - start_time

    return delta

In [13]:
# model = Net() # LESRCNN
model = OmniSR() # Omni-SR network

window_size: 8
with_pe True
ffn_bias: 1
window_size: 8
with_pe True
ffn_bias: 1
window_size: 8
with_pe True
ffn_bias: 1
window_size: 8
with_pe True
ffn_bias: 1
window_size: 8
with_pe True
ffn_bias: 1


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [14]:
deltas = []
for _ in range(100):
    input = torch.randn(1, 3, 256, 256)
    deltas.append(cpu_inference(model, input))

average_time = np.array(deltas).mean()
print("inference time: {:.2f} ms".format(average_time * 1e3))

In [19]:
"""
  GPU Inference time
  https://pytorch.org/docs/stable/generated/torch.cuda.Event.html
"""
device = torch.device("cuda")
model.to(device)
input = torch.randn(1, 3, 256, 256, dtype=torch.float).to(device)

In [None]:
# initiate logging
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
iterations = 100
deltas = np.zeros((iterations,1))

# GPU warm-up
with torch.no_grad():
  for _ in range(10):
      _ = model(input)

# Average performance over 100 iterations
with torch.no_grad():
    for i in range(iterations):
        starter.record()

        _ = model(input)

        ender.record()

        torch.cuda.synchronize()
        delta = starter.elapsed_time(ender)
        deltas[i] = delta

average_time = np.sum(deltas) / iterations

print("Average GPU inference time: {:.2f} ms".format(average_time))

In [None]:
"""
To find the number of activations.
Model summary tools based on NTIRE challenge on efficient super-resolution: https://cvlai.net/ntire/2023/
"""
input_dim = (3, 256, 256)
activations, num_conv = get_model_activation(model, input_dim)
activations = activations / 10 ** 6
print("{:>16s} : {:<.4f} [M]".format("#Activations", activations))
print("{:>16s} : {:<d}".format("#Conv2d", num_conv))


flops = get_model_flops(model, input_dim, False)
flops = flops / 10 ** 9
print("{:>16s} : {:<.4f} [G]".format("FLOPs", flops))

num_parameters = sum(map(lambda x: x.numel(), model.parameters()))
num_parameters = num_parameters / 10 ** 6
print("{:>16s} : {:<.4f} [M]".format("#Params", num_parameters))

    #Activations : 173.7359 [M]
         #Conv2d : 26
           FLOPs : 80.1813 [G]
         #Params : 0.6263 [M]


In [None]:
"""
Alternative tool for calculating FLOPs: Fvcore by Facebook research
https://github.com/facebookresearch/fvcore
"""

input = torch.randn(1, 3, 256, 256)
flops = FlopCountAnalysis(model, input)
print("FLOPs: {:.2f} [G]".format(flops.total() / 1e9))

Unsupported operator aten::add encountered 9 time(s)
Unsupported operator aten::pixel_shuffle encountered 2 time(s)


FLOPs: {.3f} [G] 80.026075136


In [None]:
# Layer-wise statistics
print(flop_count_table(flops))

| module                      | #parameters or shape   | #flops   |
|:----------------------------|:-----------------------|:---------|
| model                       | 0.626M                 | 80.026G  |
|  sub_mean.shifter           |  12                    |  0.59M   |
|   sub_mean.shifter.weight   |   (3, 3, 1, 1)         |          |
|   sub_mean.shifter.bias     |   (3,)                 |          |
|  add_mean.shifter           |  12                    |  2.359M  |
|   add_mean.shifter.weight   |   (3, 3, 1, 1)         |          |
|   add_mean.shifter.bias     |   (3,)                 |          |
|  conv1.0                    |  1.728K                |  0.113G  |
|   conv1.0.weight            |   (64, 3, 3, 3)        |          |
|  conv2.0                    |  36.864K               |  2.416G  |
|   conv2.0.weight            |   (64, 64, 3, 3)       |          |
|  conv3.0                    |  4.096K                |  0.268G  |
|   conv3.0.weight            |   (64, 64, 1, 1)

In [None]:
print(flop_count_str(flops))

Net(
  #params: 0.63M, #flops: 80.03G
  (sub_mean): MeanShift(
    #params: 12, #flops: 0.59M
    (shifter): Conv2d(
      3, 3, kernel_size=(1, 1), stride=(1, 1)
      #params: 12, #flops: 0.59M
    )
  )
  (add_mean): MeanShift(
    #params: 12, #flops: 2.36M
    (shifter): Conv2d(
      3, 3, kernel_size=(1, 1), stride=(1, 1)
      #params: 12, #flops: 2.36M
    )
  )
  (conv1): Sequential(
    #params: 1.73K, #flops: 0.11G
    (0): Conv2d(
      3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
      #params: 1.73K, #flops: 0.11G
    )
  )
  (conv2): Sequential(
    #params: 36.86K, #flops: 2.42G
    (0): Conv2d(
      64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
      #params: 36.86K, #flops: 2.42G
    )
    (1): ReLU(inplace=True)
  )
  (conv3): Sequential(
    #params: 4.1K, #flops: 0.27G
    (0): Conv2d(
      64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
      #params: 4.1K, #flops: 0.27G
    )
  )
  (conv4): Sequential(
    