# Convert the TensorFlow-Trained BERT Model to a PyTorch Model

## Install TensorFlow and Transformers

In [1]:
!pip install -q tensorflow==2.1.0 --upgrade --ignore-installed 
!pip install -q transformers==2.8.0 

[31mERROR: After October 2020 you may experience errors when installing or updating packages. This is because pip will change the way that it resolves dependency conflicts.

We recommend you use --use-feature=2020-resolver to test your packages with the new resolver before it becomes the default.

spyder 4.1.2 requires pyqt5<5.13; python_version >= "3", which is not installed.
spyder 4.1.2 requires pyqtwebengine<5.13; python_version >= "3", which is not installed.
pytest-astropy 0.8.0 requires pytest-cov>=2.0, which is not installed.
pytest-astropy 0.8.0 requires pytest-filter-subpackage>=0.1, which is not installed.
jupyterlab 1.2.6 requires jupyterlab_server~=1.0.0, but you'll have jupyterlab-server 1.1.0 which is incompatible.
awswrangler 1.2.0 requires numpy~=1.18.0, but you'll have numpy 1.19.1 which is incompatible.
awscli 1.18.119 requires rsa<=4.5.0,>=3.1.2, but you'll have rsa 4.6 which is incompatible.[0m


### _It's OK if you see ERRORs ^^ above ^^.  Please ignore._

## Install PyTorch and TorchServe

TorchServe is a flexible and easy to use tool for serving PyTorch models: 
* https://github.com/pytorch/serve
* https://github.com/pytorch/serve/blob/master/docs/README.md


In [2]:
!pip install -q torch==1.5.0 --upgrade --ignore-installed
!pip install -q torchserve==0.1.1
!pip install -q torch-model-archiver==0.1.1

[31mERROR: After October 2020 you may experience errors when installing or updating packages. This is because pip will change the way that it resolves dependency conflicts.

We recommend you use --use-feature=2020-resolver to test your packages with the new resolver before it becomes the default.

awswrangler 1.2.0 requires numpy~=1.18.0, but you'll have numpy 1.19.1 which is incompatible.[0m


### _It's OK if you see ERRORs ^^ above ^^.  Please ignore._

# Restart the Kernel

In [None]:
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")

In [1]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

# PRE-REQUISITE: 

## You need to have succesfully run the previous notebooks in the `TRAINING` before you continue with this notebook. 

In [2]:
%store -r training_job_name

In [3]:
try:
    training_job_name
    print('+++++++++++++++++++++++++++++++')
    print('SUCCESS. You can continue.')
    print("+++++++++++++++++++++++++++++++")
except NameError:
    print('+++++++++++++++++++++++++++++++')
    print('STOP: Please run the previous notebooks in the TRAIN section before you continue.')
    print('+++++++++++++++++++++++++++++++')

+++++++++++++++++++++++++++++++
SUCCESS. You can continue.
+++++++++++++++++++++++++++++++


In [4]:
print('Previous training_job_name: {}'.format(training_job_name))

Previous training_job_name: tensorflow-training-2020-08-22-19-35-37-636


# Download the TensorFlow-Trained Model Locally

In [5]:
models_dir = './models'

In [6]:
# Download the model and output artifacts from AWS S3
!aws s3 cp s3://$bucket/$training_job_name/output/model.tar.gz $models_dir/model.tar.gz

download: s3://sagemaker-us-west-2-250107111215/tensorflow-training-2020-08-22-19-35-37-636/output/model.tar.gz to models/model.tar.gz


In [7]:
import tarfile
import pickle as pkl

#!ls -al ./models

tar = tarfile.open('{}/model.tar.gz'.format(models_dir))
tar.extractall(path=models_dir)
tar.close()

In [8]:
!ls -al $models_dir

total 487052
drwxrwxr-x  7 ec2-user ec2-user      4096 Aug 22 19:57 .
drwxrwxr-x 11 ec2-user ec2-user      4096 Aug 22 19:57 ..
drwxr-xr-x  2 ec2-user ec2-user      4096 Aug 22 19:44 code
drwxr-xr-x  2 ec2-user ec2-user      4096 Aug 22 19:44 metrics
-rw-rw-r--  1 ec2-user ec2-user 498712368 Aug 22 19:45 model.tar.gz
drwxr-xr-x  2 ec2-user ec2-user      4096 Aug 22 19:38 tensorboard
drwxr-xr-x  3 ec2-user ec2-user      4096 Aug 22 19:38 tensorflow
drwxr-xr-x  3 ec2-user ec2-user      4096 Aug 22 19:38 transformers


In [9]:
transformer_model_dir = '{}/transformers/fine-tuned/'.format(models_dir)

!ls -al $transformer_model_dir

total 261692
drwxr-xr-x 2 ec2-user ec2-user      4096 Aug 22 19:43 .
drwxr-xr-x 3 ec2-user ec2-user      4096 Aug 22 19:38 ..
-rw-r--r-- 1 ec2-user ec2-user      1358 Aug 22 19:43 config.json
-rw-r--r-- 1 ec2-user ec2-user 267959068 Aug 22 19:43 tf_model.h5


In [10]:
cat $transformer_model_dir/config.json

{
  "_num_labels": 5,
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "bad_words_ids": null,
  "bos_token_id": null,
  "decoder_start_token_id": null,
  "dim": 768,
  "do_sample": false,
  "dropout": 0.1,
  "early_stopping": false,
  "eos_token_id": null,
  "finetuning_task": null,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "is_decoder": false,
  "is_encoder_decoder": false,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "length_penalty": 1.0,
  "max_length": 20,
  "max_position_embeddings": 512,
  "min_length": 0,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "no_repeat_ngram_size": 0,
  "num_beams": 1,
  "num_return_sequences": 1,
  "output_attentions": false,
  "output

# Convert the TensorFlow Model to PyTorch

In [11]:
from transformers import DistilBertForSequenceClassification # PyTorch version

loaded_pytorch_model = DistilBertForSequenceClassification.from_pretrained(transformer_model_dir,
                                                                     id2label={
                                                                       0: 1,
                                                                       1: 2,
                                                                       2: 3,
                                                                       3: 4,
                                                                       4: 5
                                                                     },
                                                                     label2id={
                                                                       1: 0,
                                                                       2: 1,
                                                                       3: 2,
                                                                       4: 3,
                                                                       5: 4
                                                                     },
                                                                  from_tf=True)

In [12]:
print(type(loaded_pytorch_model))
print(loaded_pytorch_model)

<class 'transformers.modeling_distilbert.DistilBertForSequenceClassification'>
DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
   

# Predict with the PyTorch Model

In [13]:
from transformers import DistilBertTokenizer

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

inference_device = -1 # CPU: -1, GPU: 0
print('inference_device {}'.format(inference_device))

inference_device -1


In [14]:
from transformers import TextClassificationPipeline

inference_pipeline = TextClassificationPipeline(model=loaded_pytorch_model, 
                                                tokenizer=tokenizer,
                                                framework='pt',
                                                device=inference_device) 

In [15]:
review = """I loved it!  I will recommend this to everyone."""
print(review, inference_pipeline(review))

I loved it!  I will recommend this to everyone. [{'label': 1, 'score': 0.229841}]


In [16]:
review = """Really bad.  I hope they don't make this anymore."""
print(review, inference_pipeline(review))

Really bad.  I hope they don't make this anymore. [{'label': 2, 'score': 0.22936572}]


# Save The Transformer/PyTorch Model with `.save_pretrained()`

In [17]:
pytorch_models_dir = './models/transformers/pytorch'

In [18]:
!mkdir -p $pytorch_models_dir

In [19]:
loaded_pytorch_model.save_pretrained(pytorch_models_dir)

In [20]:
!ls -al $pytorch_models_dir

total 261588
drwxrwxr-x 2 ec2-user ec2-user      4096 Aug 22 19:57 .
drwxr-xr-x 4 ec2-user ec2-user      4096 Aug 22 19:57 ..
-rw-rw-r-- 1 ec2-user ec2-user      1302 Aug 22 19:57 config.json
-rw-rw-r-- 1 ec2-user ec2-user 267852933 Aug 22 19:57 pytorch_model.bin


# Load and Predict

In [21]:
pytorch_model = DistilBertForSequenceClassification.from_pretrained(pytorch_models_dir)

In [22]:
from transformers import TextClassificationPipeline

inference_pipeline = TextClassificationPipeline(model=pytorch_model, 
                                                tokenizer=tokenizer,
                                                framework='pt',
                                                device=inference_device) 

In [23]:
review = """I loved it!  I will recommend this to everyone."""
print(review, inference_pipeline(review))

I loved it!  I will recommend this to everyone. [{'label': 1, 'score': 0.229841}]


# Upload Transformer/PyTorch Model to S3

In [24]:
transformer_pytorch_model_name = 'pytorch_model.bin'

In [25]:
transformer_pytorch_model_s3_uri = 's3://{}/models/transformer-pytorch/'.format(bucket)
print(transformer_pytorch_model_s3_uri)

s3://sagemaker-us-west-2-250107111215/models/transformer-pytorch/


In [26]:
!aws s3 cp --recursive $pytorch_models_dir $transformer_pytorch_model_s3_uri

upload: models/transformers/pytorch/config.json to s3://sagemaker-us-west-2-250107111215/models/transformer-pytorch/config.json
upload: models/transformers/pytorch/pytorch_model.bin to s3://sagemaker-us-west-2-250107111215/models/transformer-pytorch/pytorch_model.bin


In [27]:
!aws s3 ls $transformer_pytorch_model_s3_uri

2020-08-22 19:57:18       1302 config.json
2020-08-22 19:57:18  267852933 pytorch_model.bin


In [28]:
%store transformer_pytorch_model_name

Stored 'transformer_pytorch_model_name' (str)


In [29]:
%store transformer_pytorch_model_s3_uri

Stored 'transformer_pytorch_model_s3_uri' (str)


In [30]:
%store

Stored variables and their in-db values:
autopilot_endpoint_name                          -> 'automl-dm-ep-22-16-47-12'
balance_dataset                                  -> True
comprehend_endpoint_arn                          -> 'arn:aws:comprehend:us-west-2:250107111215:documen
df_dataset_metrics                               ->         entity                   instance         
experiment_name                                  -> 'Amazon-Customer-Reviews-BERT-Experiment-159812492
header_train_s3_uri                              -> 's3://sagemaker-us-west-2-250107111215/data/amazon
max_seq_length                                   -> 64
noheader_train_s3_uri                            -> 's3://sagemaker-us-west-2-250107111215/data/amazon
prepare_trial_component_name                     -> 'TrialComponent-2020-08-22-193529-qhjz'
processed_test_data_s3_uri                       -> 's3://sagemaker-us-west-2-250107111215/sagemaker-s
processed_train_data_s3_uri                      -> 's3://

In [None]:
%%javascript
Jupyter.notebook.save_checkpoint();
Jupyter.notebook.session.delete();