# Pytorch Profiler Sandbox 01

In [1]:
import torch
import torchvision.models as models
from torch.profiler import profile, record_function, ProfilerActivity

In [2]:
model = models.resnet18()
inputs = torch.randn(5, 3, 224, 224)

In [3]:
with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
    with record_function("model_inference"):
        model(inputs)

In [4]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference        11.12%      12.277ms        99.35%     109.642ms     109.642ms             1  
                     aten::conv2d         0.06%      66.000us        67.91%      74.940ms       3.747ms            20  
                aten::convolution         0.54%     598.000us        67.85%      74.874ms       3.744ms            20  
               aten::_convolution         0.14%     152.000us        67.30%      74.276ms       3.714ms            20  
         aten::mkldnn_convolution        67.05%      73.991ms        67.17%      74.124ms       3.706ms            20  
                 aten::batch_norm       

In [5]:
print(prof.key_averages().table(sort_by="self_cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
         aten::mkldnn_convolution        67.05%      73.991ms        67.17%      74.124ms       3.706ms            20  
                  model_inference        11.12%      12.277ms        99.35%     109.642ms     109.642ms             1  
          aten::native_batch_norm         7.15%       7.892ms         7.34%       8.104ms     405.200us            20  
    aten::max_pool2d_with_indices         7.07%       7.798ms         7.07%       7.798ms       7.798ms             1  
                 aten::clamp_min_         1.07%       1.183ms         1.07%       1.183ms      69.588us            17  
                      aten::addmm       

In [6]:
print(prof.key_averages(group_by_input_shape=True).table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls                                                                      Input Shapes  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  
                  model_inference        11.12%      12.277ms        99.35%     109.642ms     109.642ms             1                                                                                []  
                     aten::conv2d         0.01%      15.000us        15.62%      17.239ms       4.310ms             4                             [[5, 64, 56, 56], [64, 64, 3, 3], [], [], [], 

In [8]:
with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof:
    model(inputs)
    
print(prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         0.47%     362.000us         0.47%     362.000us       1.810us      94.87 Mb      94.87 Mb           200  
    aten::max_pool2d_with_indices         9.99%       7.772ms         9.99%       7.772ms       7.772ms      11.48 Mb      11.48 Mb             1  
                      aten::addmm         0.18%     139.000us         0.20%     153.000us     153.000us      19.53 Kb      19.53 Kb             1  
                       aten::mean         0.01%       8.000us         0.07%      56.000us      56.000us      10.

In [9]:
prof.export_chrome_trace("trace.json")

In [12]:
with profile(activities=[ProfilerActivity.CPU], with_stack=True,) as prof:
    model(inputs)

print(prof.key_averages(group_by_stack_n=5).table(sort_by="self_cpu_time_total", row_limit=2))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ---------------------------------------------------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  Source Location                                            
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ---------------------------------------------------------  
         aten::mkldnn_convolution        78.08%      76.149ms        78.20%      76.264ms       3.813ms            20  runpy.py(87): _run_code                                    
                                                                                                                       ipykernel_launcher.py(17): <module>                        
                                                                                                         

In [14]:
prof.export_stacks("profiler_stacks.txt", "self_cpu_time_total")

In [20]:
import ipyplot

ipyplot.plot_images(
    ['/Users/L029235/local/repos/torch-playground/profiler/perf_viz.svg'], # images should be passed in as an array
    img_width=250,
    force_b64=True # this is important to be able to render the image correctly on GitHub
)

UnidentifiedImageError: cannot identify image file 'perf_viz.svg'