# Configuring the environement

On a clean environment run the following script to set up proper neuron versions
```
./installation.sh
```

In [1]:
#!/usr/bin/env python

# USE conda_python3 environment!!!

import torch
import torch_neuron
import transformers
from transformers import BertTokenizer
from transformers import BertModel
import math
from transformers import AutoTokenizer, AutoModelForSequenceClassification

sentence1="If you set your goals ridiculously high and it's a failure, you will fail above everyone else's success."
sentence2="The greatest glory in living lies not in never falling, but in rising every time we fall."
sentence3="If you set your goals ridiculously high and it's a failure, you will fail above everyone else's success. The greatest glory in living lies not in never falling, but in rising every time we fall. If you set your goals ridiculously high and it's a failure, you will fail above everyone else's success."

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

encoded_sentence = tokenizer.encode_plus(sentence1, sentence3, max_length=128, pad_to_max_length=True, return_tensors="pt")

example_inputs = encoded_sentence['input_ids'], encoded_sentence['attention_mask'], encoded_sentence['token_type_ids']
model_neuron = torch.neuron.trace(model, example_inputs, compiler_args=['-O2'], verbose=1, compiler_workdir='./compile')

Truncation was not explicitely activated but `max_length` is provided a specific value, please use `truncation=True` to explicitely truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
  position_ids = self.position_ids[:, :seq_length]
  input_tensor.shape == tensor_shape for input_tensor in input_tensors
INFO:Neuron:Optimize = None
INFO:Neuron:Compiler args type is <class 'list'> value is ['-O2']


-- Use compile_from_neff function
-- Use create_runnable function


### Saving the model

In [None]:
model_neuron.save('neuron_compiled_bert_model.pt')

In [None]:
%%sh
tar -czvf model.tar.gz neuron_compiled_bert_model.pt
aws s3 cp model.tar.gz s3://inf1-compiled-bert-model/

In [41]:
import torch
import torch_neuron
import transformers
from transformers import BertTokenizer
from transformers import BertModel
import math
from transformers import AutoTokenizer, AutoModelForSequenceClassification

sentence1="If you set your goals ridiculously high and it's a failure, you will fail above everyone else's success."
sentence2="The greatest glory in living lies not in never falling, but in rising every time we fall."
sentence3="If you set your goals ridiculously high and it's a failure, you will fail above everyone else's success. The greatest glory in living lies not in never falling, but in rising every time we fall. If you set your goals ridiculously high and it's a failure, you will fail above everyone else's success."

sentence1="The animal didn't cross the street because it was too tired."

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
tokenize_sentence = tokenizer.tokenize(sentence1)

encoded_sentence = tokenizer.encode_plus(sentence1, sentence3, max_length=128, pad_to_max_length=True, return_tensors="pt")
example_inputs = encoded_sentence['input_ids'], encoded_sentence['attention_mask'], encoded_sentence['token_type_ids']


traced = torch.jit.trace( model, example_inputs )
torch._C._jit_pass_inline(traced.graph)
print( "=== Pre compile graph")
print( traced.graph )


Truncation was not explicitely activated but `max_length` is provided a specific value, please use `truncation=True` to explicitely truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


=== Pre compile graph
graph(%self.1 : __torch__.transformers.modeling_bert.___torch_mangle_1934.BertModel,
      %input_ids : Long(1, 128),
      %attention_mask.1 : Long(1, 128),
      %input.2 : Long(1, 128)):
  %3386 : __torch__.transformers.modeling_bert.___torch_mangle_1933.BertPooler = prim::GetAttr[name="pooler"](%self.1)
  %3381 : __torch__.transformers.modeling_bert.___torch_mangle_1930.BertEncoder = prim::GetAttr[name="encoder"](%self.1)
  %2972 : __torch__.transformers.modeling_bert.___torch_mangle_1724.BertEmbeddings = prim::GetAttr[name="embeddings"](%self.1)
  %641 : int = prim::Constant[value=0]() # /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/transformers/modeling_utils.py:258:0
  %642 : int = prim::Constant[value=0]() # /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/transformers/modeling_utils.py:258:0
  %643 : int = prim::Constant[value=9223372036854775807]() # /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/tr

In [42]:
model_neuron = torch.jit.load("neuron_compiled_bert_model.pt")
torch._C._jit_pass_inline(traced.graph)
traced = torch.jit.trace( model_neuron, example_inputs )
print( "=== Post compile graph")
print( traced.graph )

=== Pre compile graph
graph(%self : __torch__.torch_neuron.convert.AwsNeuronGraphModule,
      %tensor.1 : Tensor,
      %tensor0.1 : Tensor,
      %argument_3.1 : Tensor):
  %129 : Function = prim::Constant[name="neuron_function"]()
  %117 : Tensor = prim::Constant[value=<Tensor>]() # /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/torch_neuron/resolve_function.py:52:0
  %111 : Tensor = prim::Constant[value=<Tensor>]() # /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/torch_neuron/resolve_function.py:52:0
  %106 : Tensor = prim::Constant[value=<Tensor>]() # /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/torch_neuron/resolve_function.py:52:0
  %89 : Tensor = prim::Constant[value=<Tensor>]() # /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/torch_neuron/native_ops/aten.py:25:0
  %61 : Tensor = prim::Constant[value={-10000}]() # /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/torch_neuron/resolve_f

In [25]:
tokenize_sentence

['the',
 'animal',
 'didn',
 "'",
 't',
 'cross',
 'the',
 'street',
 'because',
 'it',
 'was',
 'too',
 'tired',
 '.']

In [26]:
encoded_sentence

tensor([[ 101, 1996, 4111, 2134, 1005, 1056, 2892, 1996, 2395, 2138, 2009, 2001,
         2205, 5458, 1012,  102,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0]])

In [27]:
encoded_sentence.shape


torch.Size([1, 128])

In [28]:
model = BertModel.from_pretrained('bert-base-uncased')
out = model(encoded_sentence,return_dict=True)

In [29]:
out.keys()

odict_keys(['last_hidden_state', 'pooler_output'])

In [30]:
out['last_hidden_state']

tensor([[[-0.5109,  0.4340,  0.3485,  ..., -0.4597,  0.4636, -0.7299],
         [-0.1760, -0.1786, -0.9261,  ...,  0.2803,  1.2572, -0.3424],
         [ 0.0323, -0.0284, -0.1806,  ..., -0.4487,  0.1431,  0.0424],
         ...,
         [ 0.3229, -0.2139,  0.8898,  ..., -0.8492, -0.0889, -1.1546],
         [ 0.2862, -0.0026,  0.8554,  ..., -0.7923, -0.2182, -1.1506],
         [ 0.2518,  0.1158,  0.7950,  ..., -0.7763, -0.2980, -1.3862]]],
       grad_fn=<NativeLayerNormBackward>)

In [31]:
out['last_hidden_state'].shape

torch.Size([1, 128, 768])