In [1]:
import torchvision
import torch
from time import perf_counter
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
from time import perf_counter

def timer(f, *args):
    torch.cuda.synchronize()  # 确保所有之前的 GPU 操作完成
    start = perf_counter()
    f(*args)
    torch.cuda.synchronize()  # 确保所有 GPU 操作完成
    return (1000 * (perf_counter() - start))

In [18]:
model_ft = torchvision.models.resnet101(pretrained=True)
model_ft.eval()
x_ft = torch.rand(32,3, 224,224)
print(f'pytorch cpu: {np.mean([timer(model_ft,x_ft) for _ in range(10)])}')

pytorch cpu: 888.9345609059092


In [4]:
x_ft_gpu = x_ft.cuda()
model_ft_gpu = torchvision.models.resnet101(pretrained=True).cuda()
model_ft_gpu.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [16]:
print(f'pytorch gpu: {np.mean([timer(model_ft_gpu,x_ft_gpu) for _ in range(10)])}')

pytorch gpu: 52.905369095969945


In [7]:
script_cell = torch.jit.script(model_ft, (x_ft))
print(f'torchscript cpu: {np.mean([timer(script_cell,x_ft) for _ in range(10)])}')




torchscript cpu: 838.6985643010121


In [17]:
script_cell_gpu = torch.jit.script(model_ft_gpu, (x_ft_gpu))
print(f'torchscript gpu: {np.mean([timer(script_cell_gpu,x_ft.cuda()) for _ in range(100)])}')

torchscript gpu: 50.96556553035043
