In [18]:
%load_ext dotenv
%dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


## Huggingface integration with Sagemaker

In [19]:
import sagemaker
import boto3
sess = sagemaker.Session()

iam = boto3.client('iam')
role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

print(f"sagemaker role arn: {role}")

sagemaker role arn: arn:aws:iam::295763662460:role/sagemaker_execution_role


# LEGACY CODE: Deploying with HuggingFace container

In [20]:
model_id = 'yesidcanoc/image-captioning-swin-tiny-distilgpt2'
model_name=model_id.split("/")[-1]
s3_location=f"s3://{sess.default_bucket()}/custom_inference/{model_name}/model.tar.gz"
print(s3_location)

s3://sagemaker-us-east-1-295763662460/custom_inference/image-captioning-swin-tiny-distilgpt2/model.tar.gz


In [21]:
!tar zcvf model.tar.gz src

a src
a src/.DS_Store
a src/code
a src/code/__init__.py
a src/code/inference.py


In [22]:
!aws s3 cp model.tar.gz $s3_location

upload: ./model.tar.gz to s3://sagemaker-us-east-1-295763662460/custom_inference/image-captioning-swin-tiny-distilgpt2/model.tar.gz


In [27]:
from sagemaker.huggingface import HuggingFaceModel
from sagemaker.serializers import IdentitySerializer
from sagemaker.serverless import ServerlessInferenceConfig

# Hub Model configuration. https://huggingface.co/models

# The "HF_MODEL_ID" env var should cause the huggingface container to look
# for the model within HF's repository, rather than within our custom
# inference artifact

# This will allow us to override inference.py without needing to download
# the model locally and then re-upload it
env_vars = {
  'HF_MODEL_ID':model_id, # model_id from hf.co/models
  'HF_TASK':'image-to-text', # NLP task you want to use for predictions
  'DEVICE': 'cpu',
  'S3_MODEL_LOCATION': s3_location
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
  #  env=env_vars,
   model_data=s3_location,
   role=role, # iam role with permissions to create an Endpoint
   transformers_version="4.26", # transformers version used
   pytorch_version="1.13", # pytorch version used
   py_version="py39", # python version of the DLC
)

In [28]:
content_type = "image/png"
serializer = IdentitySerializer(content_type=content_type)

serverless_config = ServerlessInferenceConfig(max_concurrency=4, memory_size_in_mb=3072)

# deploy model to SageMaker Serverless Inference
predictor = huggingface_model.deploy(
   serverless_inference_config=serverless_config,
   serializer=serializer
)

----!

In [29]:
from PIL import Image

In [30]:
# Trying with byte representation of images
# with open("data/img1_medium.png", "rb") as data_file:
#   image_data = data_file.read()
img_url = "https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png"
img = Image.open("data/img1_medium.png")
with open("data/img2.png", "rb") as image:
    f = image.read()
    res = predictor.predict(data=bytearray(f))

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from model with message "{
  "code": 400,
  "type": "InternalServerException",
  "message": "(\"You need to define one of the following [\u0027audio-classification\u0027, \u0027automatic-speech-recognition\u0027, \u0027feature-extraction\u0027, \u0027text-classification\u0027, \u0027token-classification\u0027, \u0027question-answering\u0027, \u0027table-question-answering\u0027, \u0027visual-question-answering\u0027, \u0027document-question-answering\u0027, \u0027fill-mask\u0027, \u0027summarization\u0027, \u0027translation\u0027, \u0027text2text-generation\u0027, \u0027text-generation\u0027, \u0027zero-shot-classification\u0027, \u0027zero-shot-image-classification\u0027, \u0027conversational\u0027, \u0027image-classification\u0027, \u0027image-segmentation\u0027, \u0027image-to-text\u0027, \u0027object-detection\u0027, \u0027zero-shot-object-detection\u0027, \u0027depth-estimation\u0027, \u0027video-classification\u0027] as env \u0027HF_TASK\u0027.\", 403)"
}
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/huggingface-pytorch-inference-2023-10-09-21-01-35-947 in account 295763662460 for more information.

In [None]:
from transformers import pipeline

pipe = pipeline("image-to-text", model="yesidcanoc/image-captioning-swin-tiny-distilgpt2")

Downloading (…)lve/main/config.json: 100%|██████████| 78.4k/78.4k [00:00<00:00, 11.4MB/s]
Downloading pytorch_model.bin: 100%|██████████| 507M/507M [00:10<00:00, 47.3MB/s] 
Downloading (…)rocessor_config.json: 100%|██████████| 241/241 [00:00<00:00, 205kB/s]
Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [None]:
pipe(img)



[{'generated_text': 'A mountain view with trees and mountains in the background.        '}]

In [None]:
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
url_image = Image.open(requests.get(url, stream=True).raw)
local_image = Image.open("data/img2.png")
text = "A picture of"

In [None]:
data = {
  "inputs": {
    "text": text,
    "images": url_image
  }
}

In [None]:
res = predictor.predict(data=local_image)

ValueError: Object of type <class 'PIL.PngImagePlugin.PngImageFile'> is not Data serializable.

In [None]:
print(local_image.size)