### **CMPE 255 - Data Mining Bonus Work 1 - Option 6: TorchScript**

Submitted by: Rutik Sanjay Sangle [016007589]

Pip commands to install required libraries


In [11]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Importing all required libraries


In [12]:
from transformers import BertTokenizer, BertModel
import numpy as np
import torch
from time import perf_counter

In [13]:
def timer(f,*args):   
    
    start = perf_counter()
    f(*args)
    return (1000 * (perf_counter() - start))
    
script_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', torchscript=True)
script_model = BertModel.from_pretrained("bert-base-uncased", torchscript=True)


# Tokenizing input text
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = script_tokenizer.tokenize(text)

# Masking one of the input tokens
masked_index = 8

tokenized_text[masked_index] = '[MASK]'

indexed_tokens = script_tokenizer.convert_tokens_to_ids(tokenized_text)

segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]

# Creating a dummy input
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [14]:
# BERT on CPU
native_model = BertModel.from_pretrained("bert-base-uncased")
np.mean([timer(native_model,tokens_tensor,segments_tensors) for _ in range(100)])

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


84.76321981000129

In [15]:
# BERT on GPU
# Both sample data model need be on the GPU device for the inference to take place
native_gpu = native_model.cuda()
tokens_tensor_gpu = tokens_tensor.cuda()
segments_tensors_gpu = segments_tensors.cuda()
np.mean([timer(native_gpu,tokens_tensor_gpu,segments_tensors_gpu) for _ in range(100)])

10.940632870002673

Script mode is invoked by either torch.jit.trace or torch.jit.script. Here, I am using trace method.

In [16]:
# torch.jit.trace on CPU
traced_model = torch.jit.trace(script_model, [tokens_tensor, segments_tensors])
np.mean([timer(traced_model,tokens_tensor,segments_tensors) for _ in range(100)])

93.28815125000062

In [17]:
# torch.jit.trace on GPU
traced_model_gpu = torch.jit.trace(script_model.cuda(), [tokens_tensor.cuda(), segments_tensors.cuda()])
np.mean([timer(traced_model_gpu,tokens_tensor.cuda(),segments_tensors.cuda()) for _ in range(100)])

204.2789596200049

In [18]:
traced_model.code

'def forward(self,\n    input_ids: Tensor,\n    attention_mask: Tensor) -> Tuple[Tensor, Tensor]:\n  pooler = self.pooler\n  encoder = self.encoder\n  embeddings = self.embeddings\n  embeddings0 = self.embeddings\n  token_type_ids = embeddings0.token_type_ids\n  batch_size = ops.prim.NumToTensor(torch.size(input_ids, 0))\n  _0 = int(batch_size)\n  seq_length = ops.prim.NumToTensor(torch.size(input_ids, 1))\n  _1 = int(seq_length)\n  _2 = int(seq_length)\n  _3 = torch.slice(token_type_ids, 0, 0, 9223372036854775807)\n  buffered_token_type_ids = torch.slice(_3, 1, 0, _2)\n  input = torch.expand(buffered_token_type_ids, [_0, _1])\n  _4 = torch.slice(attention_mask, 0, 0, 9223372036854775807)\n  _5 = torch.unsqueeze(torch.unsqueeze(_4, 1), 2)\n  extended_attention_mask = torch.slice(_5, 3, 0, 9223372036854775807)\n  _6 = torch.rsub(torch.to(extended_attention_mask, 6), 1.)\n  attention_mask0 = torch.mul(_6, CONSTANTS.c0)\n  _7 = (embeddings).forward(input_ids, input, )\n  _8 = (encoder).fo

In [19]:
traced_model_gpu.code

'def forward(self,\n    input_ids: Tensor,\n    attention_mask: Tensor) -> Tuple[Tensor, Tensor]:\n  pooler = self.pooler\n  encoder = self.encoder\n  embeddings = self.embeddings\n  embeddings0 = self.embeddings\n  token_type_ids = embeddings0.token_type_ids\n  batch_size = ops.prim.NumToTensor(torch.size(input_ids, 0))\n  _0 = int(batch_size)\n  seq_length = ops.prim.NumToTensor(torch.size(input_ids, 1))\n  _1 = int(seq_length)\n  _2 = int(seq_length)\n  _3 = torch.slice(token_type_ids, 0, 0, 9223372036854775807)\n  buffered_token_type_ids = torch.slice(_3, 1, 0, _2)\n  input = torch.expand(buffered_token_type_ids, [_0, _1])\n  _4 = torch.slice(attention_mask, 0, 0, 9223372036854775807)\n  _5 = torch.unsqueeze(torch.unsqueeze(_4, 1), 2)\n  extended_attention_mask = torch.slice(_5, 3, 0, 9223372036854775807)\n  _6 = torch.rsub(torch.to(extended_attention_mask, 6), 1.)\n  attention_mask0 = torch.mul(_6, CONSTANTS.c0)\n  _7 = (embeddings).forward(input_ids, input, )\n  _8 = (encoder).fo

In [20]:
import torchvision
import torch
from time import perf_counter
import numpy as np

def timer(f,*args):   
    start = perf_counter()
    f(*args)
    return (1000 * (perf_counter() - start))

In [21]:
# ResNet on CPU
model_ft = torchvision.models.resnet18()
model_ft.eval()
x_ft = torch.rand(1,3, 224,224)
np.mean([timer(model_ft,x_ft) for _ in range(10)])

93.36902669999745

In [22]:
# ResNet on GPU
model_ft_gpu = torchvision.models.resnet18(pretrained=True).cuda()
x_ft_gpu = x_ft.cuda()
model_ft_gpu.eval()
np.mean([timer(model_ft_gpu,x_ft_gpu) for _ in range(10)])

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


3.5253538999995726

Script mode is invoked by either torch.jit.trace or torch.jit.script. Here, I am using script method.

In [23]:
# torch.jit.script on CPU
script_cell = torch.jit.script(model_ft, (x_ft))
np.mean([timer(script_cell,x_ft) for _ in range(10)])

  "`optimize` is deprecated and has no effect. Use `with torch.jit.optimized_execution() instead"


78.72551280000835

In [24]:
# torch.jit.script on GPU
script_cell_gpu = torch.jit.script(model_ft_gpu, (x_ft_gpu))
np.mean([timer(script_cell_gpu,x_ft.cuda()) for _ in range(100)])

2.4173787899962917

In [25]:
script_cell.code

'def forward(self,\n    x: Tensor) -> Tensor:\n  return (self)._forward_impl(x, )\n'

In [26]:
script_cell_gpu.code

'def forward(self,\n    x: Tensor) -> Tensor:\n  return (self)._forward_impl(x, )\n'

In [27]:
torch.jit.save(traced_model,'traced_bert.pt')

In [28]:
loaded = torch.jit.load('traced_bert.pt')