In [1]:
# https://github.com/huggingface/notebooks/blob/main/sagemaker/17_custom_inference_script/sagemaker-notebook.ipynb

In [2]:
%pip install sagemaker --upgrade
import sagemaker
print(f"sagemaker version: {sagemaker.__version__}")

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting sagemaker
  Downloading sagemaker-2.111.0.tar.gz (577 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m577.4/577.4 KB[0m [31m46.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting schema
  Downloading schema-0.7.5-py2.py3-none-any.whl (17 kB)
Collecting botocore<1.28.0,>=1.27.75
  Downloading botocore-1.27.89-py3-none-any.whl (9.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.2/9.2 MB[0m [31m31.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
Building wheels for collected packages: sagemaker
  Building wheel for sagemaker (setup.py) ... [?25ldone
[?25h  Created wheel for sagemaker: filename=sagemaker-2.111.0-py2.py3-none-any.whl size=793049 sha256=053abfae86ee9de438a78c1363a7c3c1cc84a581c779c77373a667903c288f47
  Stored in directory: /home/ec2-user/.cache/pip/wheels/45/89/ba/395399028fac032ce574184ddf



sagemaker version: 2.111.0


In [3]:
import sagemaker
import boto3
sess = sagemaker.Session()
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()
try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::700263173549:role/service-role/AmazonSageMaker-ExecutionRole-20221007T221519
sagemaker bucket: sagemaker-eu-central-1-700263173549
sagemaker session region: eu-central-1


In [4]:
!mkdir code

In [5]:
!cp inference.py code/inference.py

In [6]:
%%writefile code/requirements.txt
git+https://github.com/facebookresearch/detectron2.git

Writing code/requirements.txt


In [55]:
#IMPORTANT: change the detectron2 to a pre-built wheel: https://discuss.huggingface.co/t/sagemaker-serverless-inference-for-layoutlmv2-model/14186

In [1]:
# download the model from S3 to disk
!aws s3 cp --recursive "s3://models-3it4j90/layoutxlm-2021-12-12T21-34-59/" ./inference-model

download: s3://models-3it4j90/layoutxlm-2021-12-12T21-34-59/training_args.bin to inference-model/training_args.bin
download: s3://models-3it4j90/layoutxlm-2021-12-12T21-34-59/config.json to inference-model/config.json
download: s3://models-3it4j90/layoutxlm-2021-12-12T21-34-59/pytorch_model.bin to inference-model/pytorch_model.bin


In [8]:
!rm -rf inference-model/code && cp -r code/ inference-model/code/ && rm -rf inference-model/code/.ipynb_checkpoints

In [9]:
!cd inference-model && rm -rf model.tar.gz && tar zcvf model.tar.gz *

code/
code/inference.py
code/requirements.txt
config.json
pytorch_model.bin
training_args.bin


In [10]:
!aws s3 cp inference-model/model.tar.gz "s3://models-3it4j90/layoutxlm-2021-12-12T21-34-59-inference/model.tar.gz"

upload: inference-model/model.tar.gz to s3://models-3it4j90/layoutxlm-2021-12-12T21-34-59-inference/model.tar.gz


In [11]:
from sagemaker.huggingface.model import HuggingFaceModel
from sagemaker.serverless import ServerlessInferenceConfig

huggingface_model = HuggingFaceModel(
    model_data="s3://models-3it4j90/layoutxlm-2021-12-12T21-34-59-inference/model.tar.gz",
    role=role,
    transformers_version="4.17",
    pytorch_version="1.10",
    py_version='py38',
)

predictor = huggingface_model.deploy(
    endpoint_name="lolbalmodel",
    
    # option 1: without serverless
    initial_instance_count=1,
    instance_type="ml.t2.large",
    
    # option 2: serverless
    #serverless_inference_config=ServerlessInferenceConfig(
    #    memory_size_in_mb=6144, # this is only half of what it should be (6 GB)...
    #    max_concurrency=2,
    #),
)

--------------!