## Set Up

In [1]:
!pip install sagemaker ipywidgets --upgrade --quiet

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
aiobotocore 2.0.1 requires botocore<1.22.9,>=1.22.8, but you have botocore 1.26.10 which is incompatible.[0m[31m
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/python3/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [2]:
import sagemaker, boto3, json
from sagemaker import get_execution_role

aws_role = get_execution_role()
aws_region = boto3.Session().region_name
sess = sagemaker.Session()

## Select A Model

In [3]:
from ipywidgets import Dropdown

# download JumpStart model_manifest file.
boto3.client("s3").download_file(
    f"jumpstart-cache-prod-{aws_region}", "models_manifest.json", "models_manifest.json"
)
with open("models_manifest.json", "rb") as json_file:
    model_list = json.load(json_file)

# filter-out all the Image Embedding models from the manifest list.
image_embedding_models = []
for model in model_list:
    model_id = model["model_id"]
    if "-icembedding-" in model_id and model_id not in image_embedding_models:
        image_embedding_models.append(model_id)

# display the model-ids in a dropdown to select a model for inference.
model_dropdown = Dropdown(
    options=image_embedding_models,
    value="tensorflow-icembedding-bit-m-r101x1-ilsvrc2012-featurevector-1",
    description="Select a model",
    style={"description_width": "initial"},
    layout={"width": "max-content"},
)

In [4]:
display(model_dropdown)

Dropdown(description='Select a model', layout=Layout(width='max-content'), options=('tensorflow-icembedding-bi…

In [5]:
# model_version="*" fetches the latest version of the model
model_id, model_version = model_dropdown.value, "*"

print(f'model id: {model_id}, model version: {model_version}=====')

model id: tensorflow-icembedding-efficientnet-b0-featurevector-1, model version: *=====


## Retrieve JumpStart Artifacts & Deploy an Endpoint

In [6]:
from sagemaker import image_uris, model_uris, script_uris, hyperparameters
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base
from sagemaker.serverless import serverless_inference_config


endpoint_name = name_from_base(f"jumpstart-example-infer-{model_id}")

inference_instance_type = "ml.p2.xlarge"

# Retrieve the inference docker container uri. This is the base Tensorflow container image for the default model above.
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,  # automatically inferred from model_id
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=inference_instance_type,
)

# Retrieve the inference script uri. This includes all dependencies and scripts for model loading, inference handling etc.
deploy_source_uri = script_uris.retrieve(
    model_id=model_id, model_version=model_version, script_scope="inference"
)


# Retrieve the model uri. This includes the model and model parameters.
model_uri = model_uris.retrieve(
    model_id=model_id, model_version=model_version, model_scope="inference"
)


# Create the SageMaker model instance
model = Model(
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    model_data=model_uri,
    entry_point="inference.py",  # entry point file in source_dir and present in deploy_source_uri
    role=aws_role,
    predictor_cls=Predictor,
    name=endpoint_name,
)

In [7]:
severless_config = serverless_inference_config.ServerlessInferenceConfig(memory_size_in_mb=1024, 
                                                                         max_concurrency=10)

In [8]:
# deploy the Model. Note that we need to pass Predictor class when we deploy model through Model class,
# for being able to run inference through the sagemaker API.
model_predictor = model.deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    predictor_cls=Predictor,
    endpoint_name=endpoint_name,
    serverless_inference_config = severless_config,
)

---------------!

## Query endpoint

In [9]:
def query(model_predictor, image_file_name):
    """Query the model predictor."""

    with open(image_file_name, "rb") as file:
        input_img_rb = file.read()

    query_response = model_predictor.predict(
        input_img_rb,
        {
            "ContentType": "application/x-image",
            "Accept": "application/json",
        },
    )
    return query_response


def parse_response(query_response):
    """Parse response and return the embedding."""

    model_predictions = json.loads(query_response)
    translation_text = model_predictions["embedding"]
    return translation_text

In [12]:
img_name = 'images/0016fde3-0910-4cc1-8ef6-90e15f271073.jpg'
query_response = query(model_predictor, img_name)
embedding = parse_response(query_response)

print(f'length of the embedding vector = {len(embedding)}')

length of the embedding vector = 1280


In [13]:
from IPython.core.display import HTML
import glob
  
for img_name in glob.glob('images/*'):
    
    query_response = query(model_predictor, img_name)
    embedding = parse_response(query_response)
    first_5element_embeddings = "{" + ", ".join([str(id) for id in embedding[:5]]) + "}"
    display(
        HTML(
            f'<img src={img_name} alt={img_name} align="left" style="width: 250px;"/>'
            f"<figcaption>First-5 elements of the feature vector (embedding) are: {first_5element_embeddings}</figcaption>"
            f"<figcaption>Total length of the feature vector (embedding) is: {len(embedding)}</figcaption>"
        )
    )