In [1]:
import torch
import torch.nn as nn

print(f"torch version: {torch.__version__}")

DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

print(f"torch cuda available: {torch.cuda.is_available()}")

import torch, torchtext
from torchtext.models import RobertaClassificationHead
from torchtext.functional import to_tensor
xlmr_large = torchtext.models.XLMR_LARGE_ENCODER
classifier_head = torchtext.models.RobertaClassificationHead(num_classes=2, input_dim = 1024)
model = xlmr_large.get_model(head=classifier_head)
transform = xlmr_large.transform()

torch version: 1.13.1+cu117
torch cuda available: True


Downloading: "https://download.pytorch.org/models/text/xlmr.large.encoder.pt" to /home/andy/.cache/torch/hub/checkpoints/xlmr.large.encoder.pt


  0%|          | 0.00/2.08G [00:00<?, ?B/s]

100%|██████████| 5.07M/5.07M [00:01<00:00, 3.18MB/s]
Downloading: "https://download.pytorch.org/models/text/xlmr.vocab.pt" to /home/andy/.cache/torch/hub/checkpoints/xlmr.vocab.pt


  0%|          | 0.00/4.85M [00:00<?, ?B/s]

In [2]:
small_input_batch = [
               "Hello world",
               "How are you!"
]
big_input_batch = [
               "Hello world",
               "How are you!",
               """`Well, Prince, so Genoa and Lucca are now just family estates of the
Buonapartes. But I warn you, if you don't tell me that this means war,
if you still try to defend the infamies and horrors perpetrated by
that Antichrist- I really believe he is Antichrist- I will have
nothing more to do with you and you are no longer my friend, no longer
my 'faithful slave,' as you call yourself! But how do you do? I see
I have frightened you- sit down and tell me all the news.`

It was in July, 1805, and the speaker was the well-known Anna
Pavlovna Scherer, maid of honor and favorite of the Empress Marya
Fedorovna. With these words she greeted Prince Vasili Kuragin, a man
of high rank and importance, who was the first to arrive at her
reception. Anna Pavlovna had had a cough for some days. She was, as
she said, suffering from la grippe; grippe being then a new word in
St. Petersburg, used only by the elite."""
]


In [3]:
input_batch=big_input_batch

model_input = to_tensor(transform(input_batch), padding_value=1)
output = model(model_input)
output.shape

torch.Size([3, 2])

In [5]:
ITERATIONS=10

print("slow path:")
print("==========")
with torch.autograd.profiler.profile(use_cuda=False) as prof:
  for i in range(ITERATIONS):
    output = model(model_input)
print(prof)

model.eval()

print("fast path:")
print("==========")
with torch.autograd.profiler.profile(use_cuda=False) as prof:
  with torch.no_grad():
    for i in range(ITERATIONS):
      output = model(model_input)
print(prof)

slow path:


STAGE:2023-04-27 19:50:07 1551743:1551743 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2023-04-27 19:50:21 1551743:1551743 ActivityProfilerController.cpp:300] Completed Stage: Collection


---------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                       Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
---------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                   aten::eq         0.01%     922.000us         0.01%     922.000us     922.000us             1  
            aten::embedding         0.03%       4.252ms         0.06%       8.024ms       8.024ms             1  
              aten::reshape         0.00%      11.000us         0.00%      15.000us      15.000us             1  
       aten::_reshape_alias         0.00%       4.000us         0.00%       4.000us       4.000us             1  
         aten::index_select         0.01%     883.000us         0.02%       3.343ms       3.343ms             1  
                aten::empty         0.00%       4.000us         0.00%       4.000us     

STAGE:2023-04-27 19:50:23 1551743:1551743 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2023-04-27 19:50:29 1551743:1551743 ActivityProfilerController.cpp:300] Completed Stage: Collection


-------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                       Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
-------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                   aten::eq         0.00%     141.000us         0.00%     141.000us     141.000us             1  
                            aten::embedding         0.00%      10.000us         0.01%     416.000us     416.000us             1  
                              aten::reshape         0.00%       6.000us         0.00%       8.000us       8.000us             1  
                       aten::_reshape_alias         0.00%       2.000us         0.00%       2.000us       2.000us             1  
                         aten::index_select         0.01%     391.000us         0.01%     

In [7]:
model.encoder.transformer.layers.enable_nested_tensor
model.encoder.transformer.layers.enable_nested_tensor=False

In [8]:
model.to(DEVICE)
model_input = model_input.to(DEVICE)

print("slow path:")
print("==========")
with torch.autograd.profiler.profile(use_cuda=True) as prof:
  for i in range(ITERATIONS):
    output = model(model_input)
print(prof)

model.eval()

print("fast path:")
print("==========")
with torch.autograd.profiler.profile(use_cuda=True) as prof:
  with torch.no_grad():
    for i in range(ITERATIONS):
      output = model(model_input)
print(prof)

slow path:


STAGE:2023-04-27 19:52:37 1551743:1551743 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2023-04-27 19:52:39 1551743:1551743 ActivityProfilerController.cpp:300] Completed Stage: Collection


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                               aten::eq         0.13%       2.492ms         0.13%       2.516ms       2.516ms       2.857ms         0.13%       2.857ms       2.857ms             1  
                                       cudaLaunchKernel         0.00%      24.000us         0.00%      24.000us      24.000us       0.000us         0.00%       0.000us       0.000us             1  
         

STAGE:2023-04-27 19:52:42 1551743:1551743 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2023-04-27 19:52:43 1551743:1551743 ActivityProfilerController.cpp:300] Completed Stage: Collection


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                               aten::eq         0.00%      37.000us         0.00%      51.000us      51.000us      21.000us         0.00%      21.000us      21.000us             1  
                                       cudaLaunchKernel         0.00%      14.000us         0.00%      14.000us      14.000us       0.000us         0.00%       0.000us       0.000us             1  
         

In [9]:
model.encoder.transformer.layers.enable_nested_tensor = True
model.to(DEVICE)
model_input = model_input.to(DEVICE)

print("slow path:")
print("==========")
with torch.autograd.profiler.profile(use_cuda=True) as prof:
  for i in range(ITERATIONS):
    output = model(model_input)
print(prof)

model.eval()

print("fast path:")
print("==========")
with torch.autograd.profiler.profile(use_cuda=True) as prof:
  with torch.no_grad():
    for i in range(ITERATIONS):
      output = model(model_input)
print(prof)

slow path:


STAGE:2023-04-27 19:53:39 1551743:1551743 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2023-04-27 19:53:41 1551743:1551743 ActivityProfilerController.cpp:300] Completed Stage: Collection


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                               aten::eq         0.00%      47.000us         0.01%      65.000us      65.000us     120.000us         0.01%     120.000us     120.000us             1  
                                       cudaLaunchKernel         0.00%      18.000us         0.00%      18.000us      18.000us       0.000us         0.00%       0.000us       0.000us             1  
         

STAGE:2023-04-27 19:53:43 1551743:1551743 ActivityProfilerController.cpp:294] Completed Stage: Warm Up
STAGE:2023-04-27 19:53:44 1551743:1551743 ActivityProfilerController.cpp:300] Completed Stage: Collection


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                               aten::eq         0.01%      40.000us         0.01%      54.000us      54.000us      13.000us         0.00%      13.000us      13.000us             1  
                                       cudaLaunchKernel         0.00%      14.000us         0.00%      14.000us      14.000us       0.000us         0.00%       0.000us       0.000us             1  
         

In [10]:
import gc
gc.collect()

181