In [None]:
!pip show torch

In [None]:
!pip install 'sagemaker[local]' --upgrade

In [17]:
import os
import numpy as np
import pandas as pd
import sagemaker
from sagemaker.local import LocalSession

# sagemaker_session = sagemaker.Session()
sagemaker_session = LocalSession()
sagemaker_session.config = {'local': {'local_code': True}}

bucket = "xxxx"
prefix = "rucha/sagemaker/DEMO-pytorch-bert"

role = sagemaker.get_execution_role()

In [None]:
# Download data

if not os.path.exists("./cola_public_1.1.zip"):
    !curl -o ./cola_public_1.1.zip https://nyu-mll.github.io/CoLA/cola_public_1.1.zip
if not os.path.exists("./cola_public/"):
    !unzip cola_public_1.1.zip

In [6]:
# Get sentences and labels
# Let us take a quick look at our data. First we read in the training data. The only two columns we need are the sentence itself and its label.

df = pd.read_csv(
    "./cola_public/raw/in_domain_train.tsv",
    sep="\t",
    header=None,
    usecols=[1, 3],
    names=["label", "sentence"],
)
sentences = df.sentence.values
labels = df.label.values

In [None]:
len(sentences)

In [None]:
print(sentences[20:25])
print(labels[20:25])

In [8]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df)
train.to_csv("./cola_public/train.csv", index=False)
test.to_csv("./cola_public/test.csv", index=False)

In [9]:
inputs_train = sagemaker_session.upload_data("./cola_public/train.csv", bucket=bucket, key_prefix=prefix)
inputs_test = sagemaker_session.upload_data("./cola_public/test.csv", bucket=bucket, key_prefix=prefix)

In [None]:
# Run training
# Training script

!pygmentize code/train_deploy.py

In [None]:
# Train on Amazon SageMaker
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
    entry_point="train_deploy.py",
    source_dir="code",
    role=role,
    framework_version="1.5.0",
    py_version="py3",
    instance_count=1,  # this script only support distributed training for GPU instances.
    instance_type="local",
    hyperparameters={
        "epochs": 1,
        "num_labels": 2,
        "backend": "gloo",
    }
)
estimator.fit({"training": inputs_train, "testing": inputs_test})

In [19]:
# Host

predictor = estimator.deploy(initial_instance_count=1, instance_type='local')#, accelerator_type='local_sagemaker_notebook')


Attaching to tmpspwlmnz0_algo-1-903iv_1
[36malgo-1-903iv_1  |[0m Collecting regex
[36malgo-1-903iv_1  |[0m   Downloading regex-2020.11.13-cp36-cp36m-manylinux2014_x86_64.whl (723 kB)
[K     |████████████████████████████████| 723 kB 17.5 MB/s eta 0:00:01
[36malgo-1-903iv_1  |[0m [?25hCollecting sentencepiece
[36malgo-1-903iv_1  |[0m   Downloading sentencepiece-0.1.94-cp36-cp36m-manylinux2014_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 77.7 MB/s eta 0:00:01
[36malgo-1-903iv_1  |[0m [?25hCollecting sacremoses
[36malgo-1-903iv_1  |[0m   Downloading sacremoses-0.0.43.tar.gz (883 kB)
[K     |████████████████████████████████| 883 kB 62.8 MB/s eta 0:00:01
[36malgo-1-903iv_1  |[0m [?25hCollecting transformers==2.3.0
[36malgo-1-903iv_1  |[0m   Downloading transformers-2.3.0-py3-none-any.whl (447 kB)
[K     |████████████████████████████████| 447 kB 60.3 MB/s eta 0:00:01
[36malgo-1-903iv_1  |[0m Building wheels for collected packages: sacremoses
[

In [20]:
#from sagemaker.predictor import json_deserializer, json_serializer

#predictor.content_type = "application/json"
#predictor.accept = "application/json"
predictor.serializer = sagemaker.serializers.JSONSerializer()
predictor.deserializer = sagemaker.deserializers.JSONDeserializer()

In [21]:
result = predictor.predict("Somebody just left - guess who.")
print(np.argmax(result, axis=1))

[36malgo-1-903iv_1  |[0m 2020-12-17 09:23:54,573 [INFO ] W-9004-model com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 180
[36malgo-1-903iv_1  |[0m 2020-12-17 09:23:54,573 [INFO ] W-9004-model ACCESS_LOG - /172.18.0.1:45918 "POST /invocations HTTP/1.1" 200 183
[1]


In [None]:
predictor.delete_endpoint()

In [None]:
estimator.model_data

In [25]:
!rm -r model

In [None]:
%%sh -s $estimator.model_data
mkdir model
aws s3 cp $1 model/ 
tar xvzf model/model.tar.gz --directory ./model

In [None]:
# The following code converts our model into the TorchScript format:
!pip install transformers==2.11.0
import subprocess
import torch
from transformers import BertForSequenceClassification

model_torchScript = BertForSequenceClassification.from_pretrained("model/", torchscript=True)
device = "cpu"
for_jit_trace_input_ids = [0] * 64
for_jit_trace_attention_masks = [0] * 64
for_jit_trace_input = torch.tensor([for_jit_trace_input_ids])
for_jit_trace_masks = torch.tensor([for_jit_trace_input_ids])

traced_model = torch.jit.trace(
    model_torchScript, [for_jit_trace_input.to(device), for_jit_trace_masks.to(device)]
)
torch.jit.save(traced_model, "traced_bert.pt")

subprocess.call(["tar", "-czvf", "traced_bert.tar.gz", "traced_bert.pt"])

In [None]:
!pygmentize code/deploy_ei.py

In [None]:
role

In [36]:
sagemaker_session

<sagemaker.local.local_session.LocalSession at 0x7f4e2e77c128>

In [None]:
# Next we upload TorchScript model to S3 and deploy using Elastic Inference. 
# The accelerator_type=ml.eia2.xlarge parameter is how we attach the Elastic Inference accelerator to our endpoint.

from sagemaker.pytorch import PyTorchModel

instance_type = "local"#'ml.m5.large'
accelerator_type = "local_sagemaker_notebook"#'ml.eia2.xlarge'

# TorchScript model
tar_filename = 'traced_bert.tar.gz'

# Returns S3 bucket URL
print('Upload tarball to S3')
model_data = sagemaker_session.upload_data(path=tar_filename, bucket=bucket, key_prefix=prefix)

endpoint_name = 'bert-ei-traced-{}-{}'.format(instance_type, accelerator_type).replace('.', '').replace('_', '')

pytorch = PyTorchModel(
    model_data=model_data,
    role=role,
    entry_point='deploy_ei.py',
    source_dir='code',
    framework_version='1.3.1',
    py_version='py3',
    sagemaker_session=sagemaker_session
)


In [38]:
predictor = pytorch.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    accelerator_type=accelerator_type,
    endpoint_name=endpoint_name,
    wait=True
)


Attaching to tmp1nu51x07_algo-1-1h732_1
[36malgo-1-1h732_1  |[0m Collecting regex
[36malgo-1-1h732_1  |[0m   Downloading regex-2020.11.13-cp36-cp36m-manylinux2014_x86_64.whl (723 kB)
[K     |████████████████████████████████| 723 kB 19.9 MB/s eta 0:00:01
[36malgo-1-1h732_1  |[0m [?25hCollecting sentencepiece
[36malgo-1-1h732_1  |[0m   Downloading sentencepiece-0.1.94-cp36-cp36m-manylinux2014_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 74.9 MB/s eta 0:00:01
[36malgo-1-1h732_1  |[0m [?25hCollecting sacremoses
[36malgo-1-1h732_1  |[0m   Downloading sacremoses-0.0.43.tar.gz (883 kB)
[K     |████████████████████████████████| 883 kB 83.0 MB/s eta 0:00:01
[36malgo-1-1h732_1  |[0m [?25hCollecting transformers==2.3.0
[36malgo-1-1h732_1  |[0m   Downloading transformers-2.3.0-py3-none-any.whl (447 kB)
[K     |████████████████████████████████| 447 kB 61.0 MB/s eta 0:00:01
[36malgo-1-1h732_1  |[0m Collecting click
[36malgo-1-1h732_1  |[0m   Downloa

In [None]:
result = predictor.predict("Somebody just left - guess who.")
print(np.argmax(result, axis=1))

[36malgo-1-1h732_1  |[0m 2020-12-17 09:54:04,616 [WARN ] pool-2-thread-1 com.amazonaws.ml.mms.metrics.MetricCollector - worker pid is not available yet.
[36malgo-1-1h732_1  |[0m 2020-12-17 09:54:06,430 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /home/model-server/tmp/.mms.sock.9000
[36malgo-1-1h732_1  |[0m 2020-12-17 09:54:06,431 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /home/model-server/tmp/.mms.sock.9000.
[36malgo-1-1h732_1  |[0m 2020-12-17 09:54:06,845 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - PyTorch version 1.3.1 available.
[36malgo-1-1h732_1  |[0m 2020-12-17 09:54:07,135 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Loading BERT tokenizer...
[36malgo-1-1h732_1  |[0m 2020-12-17 09:54:07,135 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-bas

In [None]:
predictor.delete_endpoint()