In [1]:
%load_ext dotenv
%dotenv

In [2]:
import os
print(os.environ.get("AWS_PROFILE"))

administrator_access


In [3]:
import boto3, json, sagemaker, time
import torch
from sagemaker import get_execution_role
from transformers import AutoProcessor

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
sess = boto3.Session()
sm = sess.client("sagemaker")
sm_client = boto3.client(service_name="sagemaker")
sagemaker_session = sagemaker.Session(boto_session=sess)
role = get_execution_role()
client = boto3.client("sagemaker-runtime")

In [8]:
triton_image_uri = "{account_id}.dkr.ecr.{region}.amazonaws.com/sagemaker-tritonserver:22.05-py3".format(
    account_id=os.environ["AWS_ACCOUNT_ID"], region="us-east-1"
)

In [10]:
# Our LLM, Llama, was downloaded via https://github.com/facebookresearch/llama?fbclid=IwAR3DJH6IFhcRGQJGRjUFFV1aXMjGJxXqMy5O_A9Hs1aNrl3Tdm5bUIHfSrA
# since it was easier to run Meta's provided shell script than download from Huggingface. The 7billion parameter model was chosen.

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [13]:
from transformers import BlipConfig, BlipForConditionalGeneration, AutoProcessor

processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

file_path = "models/blip/1/model.pt"

torch.save(model, file_path)
# model = torch.load(file_path)

Downloading (…)okenizer_config.json: 100%|██████████| 506/506 [00:00<00:00, 389kB/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 4.95MB/s]
Downloading (…)/main/tokenizer.json: 100%|██████████| 711k/711k [00:00<00:00, 20.9MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 125/125 [00:00<00:00, 463kB/s]


In [18]:
# Archive model and upload to S3
! tar -C models -czf blip.tar.gz blip
sagemaker_session.upload_data(path="blip.tar.gz", key_prefix="models")

In [12]:
model_uri = "s3://sagemaker-us-east-1-295763662460/models/blip.tar.gz"

In [1]:
! aws ecr get-login-password --region us-east-1
! docker login --username AWS --password-stdin 295763662460.dkr.ecr.us-east-1.amazonaws.com


Unknown options: login,--username,AWS,--password-stdin,295763662460.dkr.ecr.us-east-1.amazonaws.com


In [None]:
! docker build -t inference . 
! docker push 295763662460.dkr.ecr.us-east-1.amazonaws.com/sagemaker-tritonserver_22.05-py3:0.0.0 

In [13]:
sm_model_name = "blip-pt-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

container = {
    "Image": triton_image_uri,
    "ModelDataUrl": model_uri,
    "Environment": {"SAGEMAKER_TRITON_DEFAULT_MODEL_NAME": "blip"},
}

create_model_response = sm_client.create_model(
    ModelName=sm_model_name, ExecutionRoleArn=role, PrimaryContainer=container
)

print("Model Arn: " + create_model_response["ModelArn"])

ClientError: An error occurred (ValidationException) when calling the CreateModel operation: The execution role ARN "arn:aws:iam::295763662460:role/aws-reserved/sso.amazonaws.com/AWSReservedSSO_llm_img_captioning_bdddeec369a566a3" is invalid. Please ensure that the role exists and that its trust relationship policy allows the action "sts:AssumeRole" for the service principal "sagemaker.amazonaws.com".

In [20]:
## This part is commented out since the model is too big for 

# bs = 1
# width = 256
# height = 256
# channels = 3
# dummy_inputs = torch.rand(bs, channels, width, height).to(device)
# model = model.eval()
# model.to(device)

# traced_model = torch.jit.trace(model, dummy_inputs)

## Huggingface integration with Sagemaker

In [126]:
import boto3

iam = boto3.client('iam')
role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

print(f"sagemaker role arn: {role}")

sagemaker role arn: arn:aws:iam::295763662460:role/sagemaker_execution_role


In [145]:
from sagemaker.huggingface import HuggingFaceModel
from sagemaker.serializers import DataSerializer, IdentitySerializer
from sagemaker.serverless import ServerlessInferenceConfig

# Hub Model configuration. https://huggingface.co/models
hub = {
  'HF_MODEL_ID':'yesidcanoc/image-captioning-swin-tiny-distilgpt2', # model_id from hf.co/models
  'HF_TASK':'image-to-text' # NLP task you want to use for predictions
}

# create Hugging Face Model Class
# TODO: Make this a realtime model
huggingface_model = HuggingFaceModel(
   env=hub,
   role=role, # iam role with permissions to create an Endpoint
   transformers_version="4.26", # transformers version used
   pytorch_version="1.13", # pytorch version used
   py_version="py39", # python version of the DLC
)

In [146]:
content_type = "image/png"
serializer = IdentitySerializer(content_type=content_type)

serverless_config = ServerlessInferenceConfig(max_concurrency=4, memory_size_in_mb=3072)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
   serverless_inference_config=serverless_config,
   serializer=serializer
)

----!

In [147]:
from PIL import Image

In [151]:
# Trying with byte representation of images
# with open("data/img1_medium.png", "rb") as data_file:
#   image_data = data_file.read()
img_url = "https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png"
img = Image.open("data/img1_medium.png")
with open("data/img2.png", "rb") as image:
    f = image.read()
    res = predictor.predict(data=bytearray(f))

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from model with message "{
  "code": 400,
  "type": "InternalServerException",
  "message": "\u0027str\u0027 object has no attribute \u0027decode\u0027"
}
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/huggingface-pytorch-inference-2023-09-29-21-02-47-788 in account 295763662460 for more information.

In [152]:
from transformers import pipeline

pipe = pipeline("image-to-text", model="yesidcanoc/image-captioning-swin-tiny-distilgpt2")

Downloading (…)lve/main/config.json: 100%|██████████| 78.4k/78.4k [00:00<00:00, 11.4MB/s]
Downloading pytorch_model.bin: 100%|██████████| 507M/507M [00:10<00:00, 47.3MB/s] 
Downloading (…)rocessor_config.json: 100%|██████████| 241/241 [00:00<00:00, 205kB/s]
Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [154]:
pipe(img)



[{'generated_text': 'A mountain view with trees and mountains in the background.        '}]

In [77]:
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
url_image = Image.open(requests.get(url, stream=True).raw)
local_image = Image.open("data/img2.png")
text = "A picture of"

In [78]:
data = {
  "inputs": {
    "text": text,
    "images": url_image
  }
}

In [79]:
res = predictor.predict(data=local_image)

ValueError: Object of type <class 'PIL.PngImagePlugin.PngImageFile'> is not Data serializable.

In [None]:
print(local_image.size)