In [1]:
import torch

print(f'PyTorch verion: {torch.__version__}\n')
print('GPU availablability:\n')
print(f'CUDA (NVIDIA):\t \t{torch.cuda.is_available()}')
print(f'MPS (Apple Silicon):\t{torch.backends.mps.is_available() }')

PyTorch verion: 2.0.1

GPU availablability:

CUDA (NVIDIA):	 	False
MPS (Apple Silicon):	True


In [2]:
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
elif torch.backends.mps.is_available():
    DEVICE = torch.device('mps')
else:
    DEVICE = torch.device('cpu')

print(f'Using device: {DEVICE}')

Using device: mps


In [3]:
import torchvision.models as models
from torch.profiler import profile, ProfilerActivity

In [4]:
# CPU profiling

model = models.resnet34()
inputs = torch.randn(5, 3, 224, 224)
model(inputs)

with profile(activities=[ProfilerActivity.CPU],
             profile_memory=True, record_shapes=True) as prof:
    model(inputs)
        
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

STAGE:2023-06-26 19:55:31 13361:511464 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-06-26 19:55:32 13361:511464 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-06-26 19:55:32 13361:511464 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                     aten::conv2d         0.07%     108.000us        80.74%     124.236ms       3.451ms      71.30 Mb           0 b            36  
                aten::convolution         0.23%     360.000us        80.66%     124.128ms       3.448ms      71.30 Mb           0 b            36  
               aten::_convolution         0.01%      17.000us        80.43%     123.768ms       3.438ms      71.30 Mb      -1.91 Mb            36  
                aten::thnn_conv2d         0.25%     380.000us        80.23%     123.463ms       3.430ms      71.

In [5]:
# GPU profiling

if DEVICE == torch.device('cuda'):
    print('CUDA profiling\n')
    
    model = models.resnet34().to(DEVICE)
    inputs = torch.randn(5, 3, 224, 224).to(DEVICE)
    model(inputs)

    with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
                 profile_memory=True, record_shapes=True) as prof:
        model(inputs)
            
    print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

elif DEVICE == torch.device('mps'): 
    print('MPS profiling\n')

    model = models.resnet34().to(DEVICE)
    inputs = torch.randn(5, 3, 224, 224).to(DEVICE)
    model(inputs)

    with profile(activities=[ProfilerActivity.CPU],
                 profile_memory=True, record_shapes=True) as prof:
        model(inputs)
            
    print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10)) 

else: 
    print('no GPU enabled') 

MPS profiling

--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                aten::batch_norm         2.49%     378.000us        44.18%       6.705ms     186.250us           0 b           0 b            36  
    aten::_batch_norm_impl_index         3.48%     528.000us        43.99%       6.677ms     185.472us           0 b           0 b            36  
         aten::native_batch_norm        43.53%       6.607ms        43.53%       6.607ms     183.528us           0 b           0 b            36  
                    aten::conv2d         0.28%      43.000us        19.38%       2.941ms      81.694us 

STAGE:2023-06-26 19:55:32 13361:511464 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-06-26 19:55:32 13361:511464 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-06-26 19:55:32 13361:511464 ActivityProfilerController.cpp:321] Completed Stage: Post Processing
