# Measuring inference time and contributions to it from each layer

In this recipe, you will learn how to compute the total number of floating point operations in a network performed in forward pass, as well as the amount of memory consumed. This is useful when you want to understand the limitations of your model and reveal where exactly the bottlenecks are so that you can optimize it.

You can obtain the model FLOPs count and the amount of memory consumed using the `model.getFLOPs` and `model.getMemoryConsumption` methods. Both methods take as input the specified blob shape. Per-layer inference time statistics are available after the forward pass is performed and can be obtained via the `model.getPerfProfile` method, which returns total inference time and per-layer timings, all in ticks.


In [1]:
# Import all of the necessary modules
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os

%matplotlib auto
%pylab inline


def print_image(header,name,np_arr,start_First=0, end_First=1, start_Second=0, end_Second=2,start_3=0, end_3=5):
    print("------  {0:-<25}    Shape{1} {2}: {3:}".format(header, np_arr.shape, name, str(np_arr.dtype)) )
    shapes = np_arr.shape #print(shapes)
    if shapes[0] < end_First:
        end_First = shapes[0]
    if shapes[1] < end_Second:
        end_Second = shapes[1]
    if len(shapes)==3:
        if shapes[2] < end_3:
            end_3 = shapes[2]
    if len(shapes)==3:
        for i in range (start_First,end_First):
            print("[", sep='',end="")
            for j in range (start_Second,end_Second):
                print(np_arr[i,j,start_3:end_3], sep=' ', end=" ")
            print(']')
    if len(shapes)==2:
        for i in range (start_First,end_First):
            print("[", end=" ")
            #print(np_arr[i,start_Second:end_Second],sep=' ',end=" ") cutoff sting by<60
            for k in range (start_Second,end_Second):
                print(np_arr[i,k], end=" ")
            print(']')
def plt_view_image(plt,list_images,figsize=(15,6), axis="off", cmap='gray'):
    #%pylab inline
    plt.figure(figsize=figsize)
    n = len(list_images)  #; print(n)
    plot_number = 1
    for name, img in list_images:
        plt.subplot(1,n,plot_number)
        plt.axis(axis)
        plt.title(name)
        if cmap =='gray':
            plt.imshow(img,cmap='gray' )
        else:
            plt.imshow(img )
        plot_number = plot_number + 1
    plt.show()
#help("modules")   
import sys             
print('\n'.join(sys.path))
print("current folder ==",os.getcwd())
#pip list"

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib
D:\HTML_DOC\Program\opencv\Packt\S05\env
C:\Program Files\Python38\python38.zip
C:\Program Files\Python38\DLLs
C:\Program Files\Python38\lib
C:\Program Files\Python38
d:\html_doc\program\opencv\packt\s05\env

d:\html_doc\program\opencv\packt\s05\env\lib\site-packages
d:\html_doc\program\opencv\packt\s05\env\lib\site-packages\pip-20.0.2-py3.8.egg
d:\html_doc\program\opencv\packt\s05\env\lib\site-packages\win32
d:\html_doc\program\opencv\packt\s05\env\lib\site-packages\win32\lib
d:\html_doc\program\opencv\packt\s05\env\lib\site-packages\Pythonwin
d:\html_doc\program\opencv\packt\s05\env\lib\site-packages\IPython\extensions
C:\Users\polit\.ipython
current folder == D:\HTML_DOC\Program\opencv\Packt\S05\env


In [2]:
model = cv2.dnn.readNetFromCaffe('../data/bvlc_googlenet.prototxt',
                                 '../data/bvlc_googlenet.caffemodel')

In [3]:
print('gflops:', model.getFLOPS((1,3,224,224))*1e-9)

gflops: 3.1904431360000003


In [4]:
w,b = model.getMemoryConsumption((1,3,224,224))
print('weights (mb):{0}, blobs (mb):{1}     w={2}   b={3}'.format(w*1e-6,b*1e-6, w,b))

weights (mb):27.994208, blobs (mb):40.251072     w=27994208   b=40251072


In [5]:
tensor = cv2.dnn.blobFromImage(np.zeros((224,224,3), np.uint8), 1, (224,224))
model.setInput(tensor)
model.forward();



###########################################################
print_image('tensor[0]','tensor[0]',tensor[0],0,3,0,4,0,6)

------  tensor[0]----------------    Shape(3, 224, 224) tensor[0]: float32
[[0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] ]
[[0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] ]
[[0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] ]


In [6]:
total, timings = model.getPerfProfile()
tick2ms = 1e3/cv2.getTickFrequency()
print('inference (ms): {:2f}'.format(total*tick2ms))

inference (ms): 30.434900


In [7]:
layer_names = model.getLayerNames()
print('{: <30} {}'.format('LAYER', 'TIME (ms)'))
for (i,t) in enumerate(timings):
    print('{: <30} {:.2f}'.format(layer_names[i], t[0]*tick2ms))

LAYER                          TIME (ms)
conv1/7x7_s2                   2.73
conv1/relu_7x7                 0.00
pool1/3x3_s2                   0.33
pool1/norm1                    0.46
conv2/3x3_reduce               0.24
conv2/relu_3x3_reduce          0.00
conv2/3x3                      2.75
conv2/relu_3x3                 0.00
conv2/norm2                    2.30
pool2/3x3_s2                   0.29
inception_3a/1x1               0.20
inception_3a/relu_1x1          0.00
inception_3a/3x3_reduce        0.19
inception_3a/relu_3x3_reduce   0.00
inception_3a/3x3               1.05
inception_3a/relu_3x3          0.00
inception_3a/5x5_reduce        0.11
inception_3a/relu_5x5_reduce   0.00
inception_3a/5x5               0.28
inception_3a/relu_5x5          0.00
inception_3a/pool              0.21
inception_3a/pool_proj         0.09
inception_3a/relu_pool_proj    0.00
inception_3a/output            0.00
inception_3b/1x1               0.34
inception_3b/relu_1x1          0.00
inception_3b/3x3_reduce