### Import necessary library

In [1]:
import time
import json
import base64
import requests

import boto3
import sagemaker



sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\tochi\AppData\Local\sagemaker\sagemaker\config.yaml


### Sagemaker setup

In [2]:
sm_client = boto3.client(service_name="sagemaker")
runtime = boto3.client(service_name="sagemaker-runtime")
account = boto3.client("sts").get_caller_identity().get("Account")

sess = sagemaker.Session()
boto_session = boto3.session.Session()
region = boto_session.region_name

role = f"arn:aws:iam::{account}:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole"

### Download CLIP-ViT-B-32 model

In [4]:
from huggingface_hub import snapshot_download

snapshot_download(repo_id="sentence-transformers/clip-ViT-B-32", local_dir="./CLIP-ViT-B-32")

### Package and Upload CLIP-ViT-B-32 model to s3 bucket

In [40]:
%cd  CLIP-ViT-B-32
!tar zcvf model.tar.gz *

In [41]:
bucket = "aws-portfolio-projects"
model_artifact = sagemaker.Session().upload_data("model.tar.gz", bucket=bucket, key_prefix="CLIP-ViT-B-32")
print("S3 Model Path:", model_artifact)

### Create the Model

In [5]:
# Model artifact in S3 
model_data = "s3://aws-portfolio-projects/CLIP-ViT-B-32/model.tar.gz"

# Generate unique name for the model 
current_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
model_name = "normalized-b32-model-" + current_time

# Hugging Face DLC container URI (for HF + PyTorch)
container_image_uri = "763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-cpu-py39-ubuntu20.04"

# Create the model
create_model_response = sm_client.create_model(
    ModelName=model_name,
    ExecutionRoleArn=role,
    PrimaryContainer={
        "Image": container_image_uri,
        "Mode": "SingleModel",
        "ModelDataUrl": model_data,
        "Environment": {
            "HF_TASK": "feature-extraction"
        }
    }
)

print("Created Model:", create_model_response["ModelArn"])

Created Model: arn:aws:sagemaker:us-east-2:930627915954:model/normalized-b32-model-2025-08-27-10-38-25


### Create an Endpoint Configuration with Serverless Inference

In [6]:
# Generate unique name for the endpoint config name
current_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
endpoint_config_name = "normalized-b32-model-endpoint-config-" + current_time

create_endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "VariantName": "AllTrafficVariant",
            "ModelName": model_name,
            "ServerlessConfig": {
                "MemorySizeInMB": 5120,        
                "MaxConcurrency": 5,
                "ProvisionedConcurrency": 1
            }
        }
    ]
)

print("Created Endpoint Config:", create_endpoint_config_response["EndpointConfigArn"])

Created Endpoint Config: arn:aws:sagemaker:us-east-2:930627915954:endpoint-config/normalized-b32-model-endpoint-config-2025-08-27-10-40-15


### Create or Update the Endpoint

In [7]:
endpoint_name = "normalized-b32-model"

# Check if endpoint exists
try:
    sm_client.describe_endpoint(EndpointName=endpoint_name)
    # If it exists, update
    response = sm_client.update_endpoint(
        EndpointName=endpoint_name,
        EndpointConfigName=endpoint_config_name
    )
    print("Updated Endpoint:", response["EndpointArn"])
    
except sm_client.exceptions.ClientError as e:
    if "Could not find endpoint" in str(e):
        # Create the endpoint
        response = sm_client.create_endpoint(
            EndpointName=endpoint_name,
            EndpointConfigName=endpoint_config_name
        )
        print("Created Endpoint:", response["EndpointArn"])
    else:
        raise e

Created Endpoint: arn:aws:sagemaker:us-east-2:930627915954:endpoint/normalized-b32-model


### Wait for the endpoint to be in service

In [8]:
print("Waiting for endpoint to be in service...")

while True:
    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
    status = resp["EndpointStatus"]
    print("Endpoint status:", status)
    if status == "InService":
        print("✅ Endpoint is ready!")
        break
    elif status == "Failed":
        raise Exception("Endpoint creation failed")
    time.sleep(10)

Waiting for endpoint to be in service...
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: InService
✅ Endpoint is ready!


### Test deployed model

In [9]:
pwd

'C:\\Users\\tochi\\OneDrive\\Documents\\AWS_Projects\\sagemaker-cost-optimization'

In [8]:
%cd  ..

In [9]:
%cd  sagemaker-cost-optimization

In [10]:
with open("aws_login.png", "rb") as image_file:
    encoded_image = base64.b64encode(image_file.read()).decode('utf-8')


input_data = {
    "inputs": {
        "image": encoded_image 
    }
}

# Convert to JSON string
payload = json.dumps(input_data)

# Invoke the endpoint
response = runtime.invoke_endpoint(
    EndpointName="normalized-b32-model",
    ContentType="application/json",
    Body=payload
)

# Read the raw response body (bytes -> string)
response = response["Body"].read().decode()

# Print response
print("Raw response from SageMaker endpoint:")
print(response)

Raw response from SageMaker endpoint:
{"object": "list", "data": [{"object": "embedding", "index": 0, "embedding": [0.0066422351736321, 0.004895574351542614, -0.018706036851959033, 0.004662472853023341, 0.04510598618566244, -0.013993906502128825, -9.325842974767581e-05, -0.00012819223574102892, 0.009970531988663314, 0.011693910556755336, 0.022426834809983958, 0.012249128650215647, -0.035945628896813527, 0.00021407623818882568, -0.029337305160487985, 0.04781863083922653, 0.009619789789457416, 0.013744609590783474, 0.025819747195330277, 0.00017121583952888717, 0.0868025640456482, 0.03205825530144295, -0.002890173008613393, -0.009452101193794495, 0.0038173492489438424, 0.022394205287845154, -0.0223957611288003, 0.03160075753651388, -0.0007961750070793659, 0.023058468851576756, -0.003921940729050102, 0.045612086006306805, -0.02794219896841739, 0.03558561707772098, 0.10422789646745112, -0.02490249123850219, -0.009725057096965634, -0.005157083607834415, -0.00835640020113253, -0.2080916398755