https://pytorch.org/docs/stable/onnx.html#example-end-to-end-alexnet-from-pytorch-to-onnx

https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py

https://pytorch.apachecn.org/docs/1.0/onnx.html

In [3]:
import torch

# Trace-based only

class LoopModel(torch.nn.Module):
    def forward(self, x, y):
        for i in range(y):
            x = x + i
        return x

model = LoopModel()

#offical example
# pass
#dummy_input = torch.ones(2, 3, dtype=torch.long)
#loop_count = torch.tensor(5, dtype=torch.long)
# my test
#also work , two parameters
dummy_input = torch.ones(2, 5, dtype=torch.long)
loop_count = torch.tensor(5, dtype=torch.long)


torch.onnx.export(model, (dummy_input, loop_count), 'loop.onnx', verbose=True)

graph(%0 : Long(2:5, 5:1, requires_grad=0, device=cpu)):
  %2 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={0}]()
  %3 : Long(2:5, 5:1, requires_grad=0, device=cpu) = onnx::Add(%0, %2)
  %4 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={1}]()
  %5 : Long(2:5, 5:1, requires_grad=0, device=cpu) = onnx::Add(%3, %4)
  %6 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={2}]()
  %7 : Long(2:5, 5:1, requires_grad=0, device=cpu) = onnx::Add(%5, %6)
  %8 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={3}]()
  %9 : Long(2:5, 5:1, requires_grad=0, device=cpu) = onnx::Add(%7, %8)
  %10 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={4}]()
  %11 : Long(2:5, 5:1, requires_grad=0, device=cpu) = onnx::Add(%9, %10)
  return (%11)



https://pytorch.org/docs/stable/onnx.html#example-end-to-end-alexnet-from-pytorch-to-onnx
scrip-based exporter

In [5]:
# Mixing tracing and scripting

@torch.jit.script
def loop(x, y):
    for i in range(int(y)):
        x = x + i
    return x

class LoopModel2(torch.nn.Module):
    def forward(self, x, y):
        return loop(x, y)

model = LoopModel2()
dummy_input = torch.ones(2, 3, dtype=torch.long)
loop_count = torch.tensor(5, dtype=torch.long)
torch.onnx.export(model, (dummy_input, loop_count), 'loop.onnx', verbose=True,
                  input_names=['input_data', 'loop_range'])

graph(%input_data : Long(2:3, 3:1, requires_grad=0, device=cpu),
      %loop_range : Long(requires_grad=0, device=cpu),
      %10 : Bool(requires_grad=0, device=cpu)):
  %2 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={1}]()
  %4 : Long(2:3, 3:1, requires_grad=0, device=cpu) = onnx::Loop(%loop_range, %10, %input_data) # <ipython-input-5-e5e16e1b7555>:5:4
    block0(%i.1 : Long(device=cpu), %cond : bool, %x.6 : Long(2:3, 3:1, requires_grad=0, device=cpu)):
      %8 : LongTensor = onnx::Add(%x.6, %i.1) # <ipython-input-5-e5e16e1b7555>:6:12
      %9 : bool = onnx::Cast[to=9](%2)
      -> (%9, %8)
  return (%4)



apply_chunking_to_forward test

In [32]:
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data.distributed import DistributedSampler

from transformers import DistilBertTokenizer
from transformers import DistilBertForSequenceClassification, AdamW, DistilBertConfig
from transformers import get_linear_schedule_with_warmup

#enc = BertTokenizer.from_pretrained("bert-base-uncased")
enc = DistilBertTokenizer.from_pretrained('distilbert-base-uncased', do_lower_case=True)

# Tokenizing input text
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
#text = "[CLS] Who was Jim Henson ?"
tokenized_text = enc.tokenize(text)

# for deubg
print("tokenized_text: {}".format(tokenized_text))

# Masking one of the input tokens
masked_index = 8
tokenized_text[masked_index] = '[MASK]'
indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)

print("indexed_tokens: {}".format(indexed_tokens))
segments_ids = [0]

# Creating a dummy input
# but you need to move tensors to GPU
#https://github.com/huggingface/transformers/issues/227
# discuss convertion
#https://discuss.pytorch.org/t/best-way-to-convert-a-list-to-a-tensor/59949/2
#torch.tensor
#tokens_tensor = torch.tensor([indexed_tokens])
#segments_tensors = torch.tensor([segments_ids])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("create input for device: {}".format(device))
tokens_tensor = torch.tensor([indexed_tokens]).to(device)
segments_tensors = torch.tensor([segments_ids]).to(device)
dummy_input = tokens_tensor


# for debug
print("tokens_tensor shape for chunk: {}".format(tokens_tensor[0].shape[0])) 
for token_tensor in tokens_tensor:
    print("token_tensor shape for chunk: {}".format(token_tensor.shape[0]))


# for deubg
# 14 tokens for output
print("tokens_tensor shape: {}".format(tokens_tensor.shape))
print("segments_tensor shape: {}".format(segments_tensors.shape))

print("tokens_tensor: {}".format(tokens_tensor))
print("segments_tensor: {}".format(segments_tensors))


# Initializing the model with the torchscript flag
# Flag set to True even though it is not necessary as this model does not have an LM Head.
#config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
#    num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, torchscript=True)

# Instantiating the model
#model = BertModel(config)

# The model needs to be in evaluation mode
#model.eval()

# If you are instantiating the model with `from_pretrained` you can also easily set the TorchScript flag
#model = BertModel.from_pretrained("bert-base-uncased", torchscript=True)
# classfication only  0 and 1 so set ut to 2
num_labels = 2
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=num_labels,
                                                            output_attentions=False, output_hidden_states=False)




torch.onnx.export(model, dummy_input, 'traced_distill_bert.onnx', verbose=True)





Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

In [None]:
alex net
https://medium.com/@whyaitchyou/alexnet-%E6%9E%B6%E6%A7%8B%E6%A6%82%E8%BF%B0-988113c06b4b
https://zhuanlan.zhihu.com/p/51387600
https://github.com/onnx/tutorials/blob/master/tutorials/PytorchOnnxExport.ipynb
https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py

In [None]:
#using torchscript to output pytroch model
# original example

#output to onnx
#https://huggingface.co/transformers/serialization.html


In [16]:
from transformers import BertModel, BertTokenizer, BertConfig
import torch

enc = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenizing input text
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = enc.tokenize(text)

# for deubg
print("tokenized_text: {}".format(tokenized_text))

# Masking one of the input tokens
masked_index = 8
tokenized_text[masked_index] = '[MASK]'
indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]

# Creating a dummy input
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
dummy_input = [tokens_tensor, segments_tensors]

# for deubg
# 14 tokens for output
print("tokens_tensor: {}".format(tokens_tensor.shape))
print("segments_tensor: {}".format(segments_tensors.shape))

# Initializing the model with the torchscript flag
# Flag set to True even though it is not necessary as this model does not have an LM Head.
config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
    num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, torchscript=True)

# Instantiating the model
#soruce code
#https://huggingface.co/transformers/_modules/transformers/models/bert/modeling_bert.html#BertModel
# but do not find @torch.jit.script_nmethod.... why it can works
model = BertModel(config)

# The model needs to be in evaluation mode
model.eval()

# If you are instantiating the model with `from_pretrained` you can also easily set the TorchScript flag
model = BertModel.from_pretrained("bert-base-uncased", torchscript=True)

# Creating the trace
#https://pytorch.org/docs/stable/generated/torch.jit.trace.html
traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors])

print(traced_model.graph)
# if want to want to download, uncomment it 
#torch.jit.save(traced_model, "traced_bert.pt")

tokenized_text: ['[CLS]', 'who', 'was', 'jim', 'henson', '?', '[SEP]', 'jim', 'henson', 'was', 'a', 'puppet', '##eer', '[SEP]']
tokens_tensor: torch.Size([1, 14])
segments_tensor: torch.Size([1, 14])
graph(%self.1 : __torch__.transformers.modeling_bert.___torch_mangle_1668.BertModel,
      %input_ids : Long(1:14, 14:1, requires_grad=0, device=cpu),
      %attention_mask.1 : Long(1:14, 14:1, requires_grad=0, device=cpu)):
  %3393 : __torch__.transformers.modeling_bert.___torch_mangle_1667.BertPooler = prim::GetAttr[name="pooler"](%self.1)
  %3388 : __torch__.transformers.modeling_bert.___torch_mangle_1664.BertEncoder = prim::GetAttr[name="encoder"](%self.1)
  %2979 : __torch__.transformers.modeling_bert.___torch_mangle_1458.BertEmbeddings = prim::GetAttr[name="embeddings"](%self.1)
  %634 : int = prim::Constant[value=0]() # C:\Users\chiecha.REDMOND\Miniconda3\envs\azureml\lib\site-packages\transformers\modeling_bert.py:795:0
  %635 : int = aten::size(%input_ids, %634) # C:\Users\chiecha

In [None]:
#using torchscript to output
# change to distillation bert

#output to onnx
#https://huggingface.co/transformers/serialization.html

In [20]:
from transformers import DistilBertTokenizer
from transformers import DistilBertForSequenceClassification, AdamW, DistilBertConfig
import torch


#enc = BertTokenizer.from_pretrained("bert-base-uncased")
enc = DistilBertTokenizer.from_pretrained('distilbert-base-uncased', do_lower_case=True)

# Tokenizing input text
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = enc.tokenize(text)

# for deubg
print("tokenized_text: {}".format(tokenized_text))

# Masking one of the input tokens
masked_index = 8
tokenized_text[masked_index] = '[MASK]'
indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
segments_ids = [0]

# Creating a dummy input
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
dummy_input = [tokens_tensor, segments_tensors]

# for deubg
# 14 tokens for output
print("tokens_tensor shape: {}".format(tokens_tensor.shape))
print("segments_tensor shape: {}".format(segments_tensors.shape))

print("tokens_tensor: {}".format(tokens_tensor.shape))
print("segments_tensor: {}".format(segments_tensors.shape))


# Initializing the model with the torchscript flag
# Flag set to True even though it is not necessary as this model does not have an LM Head.
#config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
#    num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, torchscript=True)

# Instantiating the model
#model = BertModel(config)

# The model needs to be in evaluation mode
#model.eval()

# If you are instantiating the model with `from_pretrained` you can also easily set the TorchScript flag
#model = BertModel.from_pretrained("bert-base-uncased", torchscript=True)
# classfication only  0 and 1 so set ut to 2
num_labels = 2
#model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=num_labels,
#                                                            output_attentions=False, output_hidden_states=False)

model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=num_labels,
                                                            output_attentions=False, output_hidden_states=False,torchscript=True)


#torch.onnx.export(model, dummy_input, 'traced_distill_bert.onnx', verbose=True)

# Creating the trace
traced_model = torch.jit.trace(model, tokens_tensor)
print(traced_model.graph)

# if want to want to download, uncomment it 
#torch.jit.save(traced_model, "traced_distill_bert.pt")



tokenized_text: ['[CLS]', 'who', 'was', 'jim', 'henson', '?', '[SEP]', 'jim', 'henson', 'was', 'a', 'puppet', '##eer', '[SEP]']
tokens_tensor: torch.Size([1, 14])
segments_tensor: torch.Size([1, 1])
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weigh

#remove torchscript
# change to distillation bert
#output to onnx
# this one seems to work but this should be cpu version
# if want , follow this code to move to gpu version to test
# https://github.com/huggingface/transformers/issues/227
#https://huggingface.co/transformers/serialization.html

In [23]:
from transformers import DistilBertTokenizer
from transformers import DistilBertForSequenceClassification, AdamW, DistilBertConfig
import torch


#enc = BertTokenizer.from_pretrained("bert-base-uncased")
enc = DistilBertTokenizer.from_pretrained('distilbert-base-uncased', do_lower_case=True)

# Tokenizing input text
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = enc.tokenize(text)

# for deubg
print("tokenized_text: {}".format(tokenized_text))

# Masking one of the input tokens
masked_index = 8
tokenized_text[masked_index] = '[MASK]'
indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
segments_ids = [0]

# Creating a dummy input
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
dummy_input = tokens_tensor

# for deubg
# 14 tokens for output
print("tokens_tensor shape: {}".format(tokens_tensor.shape))
print("segments_tensor shape: {}".format(segments_tensors.shape))

print("tokens_tensor: {}".format(tokens_tensor))
print("segments_tensor: {}".format(segments_tensors))


# Initializing the model with the torchscript flag
# Flag set to True even though it is not necessary as this model does not have an LM Head.
#config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
#    num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, torchscript=True)

# Instantiating the model
#model = BertModel(config)

# The model needs to be in evaluation mode
#model.eval()

# If you are instantiating the model with `from_pretrained` you can also easily set the TorchScript flag
#model = BertModel.from_pretrained("bert-base-uncased", torchscript=True)
# classfication only  0 and 1 so set ut to 2
num_labels = 2
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=num_labels,
                                                            output_attentions=False, output_hidden_states=False)




torch.onnx.export(model, dummy_input, 'traced_distill_bert.onnx', verbose=True)


# if want to want to download, uncomment it 
#torch.jit.save(traced_model, "traced_distill_bert.pt")

tokenized_text: ['[CLS]', 'who', 'was', 'jim', 'henson', '?', '[SEP]', 'jim', 'henson', 'was', 'a', 'puppet', '##eer', '[SEP]']
tokens_tensor shape: torch.Size([1, 14])
segments_tensor shape: torch.Size([1, 1])
tokens_tensor: tensor([[  101,  2040,  2001,  3958, 27227,  1029,   102,  3958,   103,  2001,
          1037, 13997, 11510,   102]])
segments_tensor: tensor([[0]])
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification fr

#remove torchscript
# change to distillation bert
#output to onnx
# gpu version
# https://github.com/huggingface/transformers/issues/227
#https://huggingface.co/transformers/serialization.html

In [31]:
from transformers import DistilBertTokenizer
from transformers import DistilBertForSequenceClassification, AdamW, DistilBertConfig
import torch


#enc = BertTokenizer.from_pretrained("bert-base-uncased")
enc = DistilBertTokenizer.from_pretrained('distilbert-base-uncased', do_lower_case=True)

# Tokenizing input text
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
#text = "[CLS] Who was Jim Henson ?"
tokenized_text = enc.tokenize(text)

# for deubg
print("tokenized_text: {}".format(tokenized_text))

# Masking one of the input tokens
masked_index = 8
tokenized_text[masked_index] = '[MASK]'
indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)

print("indexed_tokens: {}".format(indexed_tokens))
segments_ids = [0]

# Creating a dummy input
# but you need to move tensors to GPU
#https://github.com/huggingface/transformers/issues/227
# discuss convertion
#https://discuss.pytorch.org/t/best-way-to-convert-a-list-to-a-tensor/59949/2
#torch.tensor
#tokens_tensor = torch.tensor([indexed_tokens])
#segments_tensors = torch.tensor([segments_ids])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("create input for device: {}".format(device))
tokens_tensor = torch.tensor([indexed_tokens]).to(device)
segments_tensors = torch.tensor([segments_ids]).to(device)
dummy_input = tokens_tensor


# for debug
print("tokens_tensor shape for chunk: {}".format(tokens_tensor[0].shape[0])) 
for token_tensor in tokens_tensor:
    print("token_tensor shape for chunk: {}".format(token_tensor.shape[0]))


# for deubg
# 14 tokens for output
print("tokens_tensor shape: {}".format(tokens_tensor.shape))
print("segments_tensor shape: {}".format(segments_tensors.shape))

print("tokens_tensor: {}".format(tokens_tensor))
print("segments_tensor: {}".format(segments_tensors))


# Initializing the model with the torchscript flag
# Flag set to True even though it is not necessary as this model does not have an LM Head.
#config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
#    num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, torchscript=True)

# Instantiating the model
#model = BertModel(config)

# The model needs to be in evaluation mode
#model.eval()

# If you are instantiating the model with `from_pretrained` you can also easily set the TorchScript flag
#model = BertModel.from_pretrained("bert-base-uncased", torchscript=True)
# classfication only  0 and 1 so set ut to 2
num_labels = 2
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=num_labels,
                                                            output_attentions=False, output_hidden_states=False)




torch.onnx.export(model, dummy_input, 'traced_distill_bert.onnx', verbose=True)


# if want to want to download, uncomment it 
#torch.jit.save(traced_model, "traced_distill_bert.pt")

tokenized_text: ['[CLS]', 'who', 'was', 'jim', 'henson', '?', '[SEP]', 'jim', 'henson', 'was', 'a', 'puppet', '##eer', '[SEP]']
indexed_tokens: [101, 2040, 2001, 3958, 27227, 1029, 102, 3958, 103, 2001, 1037, 13997, 11510, 102]
create input for device: cpu
tokens_tensor shape for chunk: 14
token_tensor shape for chunk: 14
tokens_tensor shape: torch.Size([1, 14])
segments_tensor shape: torch.Size([1, 1])
tokens_tensor: tensor([[  101,  2040,  2001,  3958, 27227,  1029,   102,  3958,   103,  2001,
          1037, 13997, 11510,   102]])
segments_tensor: tensor([[0]])
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with a

apply_chunking_to_forward() test