In [3]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [4]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [5]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [6]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [7]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([5])


In [8]:
from torch.profiler import profile, record_function, ProfilerActivity

In [9]:
with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof:
    with record_function("model_inference"):
        model(X)

STAGE:2024-06-05 22:51:37 33:33 ActivityProfilerController.cpp:312] Completed Stage: Warm Up
STAGE:2024-06-05 22:51:37 33:33 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2024-06-05 22:51:37 33:33 ActivityProfilerController.cpp:322] Completed Stage: Post Processing


In [10]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
       model_inference        37.73%       3.025ms       100.00%       8.018ms       8.018ms             1  
          aten::linear        11.11%     891.000us        30.71%       2.462ms     820.667us             3  
         aten::flatten        22.35%       1.792ms        22.55%       1.808ms       1.808ms             1  
           aten::addmm        18.18%       1.458ms        18.73%       1.502ms     500.667us             3  
            aten::relu         8.72%     699.000us         9.02%     723.000us     361.500us             2  
               aten::t         0.46%      37.000us         0.86%      69.000us      23.000us             3  
       aten::transp

In [11]:
with profile(activities=[
        ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
    with record_function("model_inference"):
        model(X)

print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
----------------------  ------------  ------------  ------------  ------------  ------------  ------------  
       model_inference        55.44%     622.000us       100.00%       1.122ms       1.122ms             1  
         aten::flatten         1.25%      14.000us         2.85%      32.000us      32.000us             1  
            aten::view         1.60%      18.000us         1.60%      18.000us      18.000us             1  
          aten::linear         2.05%      23.000us        36.81%     413.000us     137.667us             3  
               aten::t         3.03%      34.000us         5.17%      58.000us      19.333us             3  
       aten::transpose         1.69%      19.000us         2.14%      24.000us       8.000us             3  
      aten::as_stri

  warn("CUDA is not available, disabling CUDA profiling")
STAGE:2024-06-05 22:51:37 33:33 ActivityProfilerController.cpp:312] Completed Stage: Warm Up
STAGE:2024-06-05 22:51:37 33:33 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2024-06-05 22:51:37 33:33 ActivityProfilerController.cpp:322] Completed Stage: Post Processing


In [12]:
with profile(activities=[ProfilerActivity.CPU],
        profile_memory=True, record_shapes=True) as prof:
    model(X)

print(prof.key_averages())


----------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
----------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
         aten::flatten         2.15%      10.000us         4.94%      23.000us      23.000us           0 b           0 b             1  
            aten::view         2.79%      13.000us         2.79%      13.000us      13.000us           0 b           0 b             1  
          aten::linear         3.65%      17.000us        86.27%     402.000us     134.000us       4.04 Kb           0 b             3  
               aten::t         6.44%      30.000us        10.94%      51.000us      17.000us           0 b           0 b             3  
       aten::transpose         3.43%     

STAGE:2024-06-05 22:51:37 33:33 ActivityProfilerController.cpp:312] Completed Stage: Warm Up
STAGE:2024-06-05 22:51:37 33:33 ActivityProfilerController.cpp:318] Completed Stage: Collection
STAGE:2024-06-05 22:51:37 33:33 ActivityProfilerController.cpp:322] Completed Stage: Post Processing


In [13]:
m=1024
n=4096
k=1

print((m*n*k)/(m*n+n*k+k*m))

0.9987807851743478
