This notebook contains code to download the CLIP-ViT-B-32 model from Hugging Face and deploy it to a SageMaker serverless endpoint.

### Download  CLIP-ViT-B-32 model

In [1]:
from huggingface_hub import snapshot_download

snapshot_download(repo_id="sentence-transformers/clip-ViT-B-32", local_dir="./CLIP-ViT-B-32")


  from .autonotebook import tqdm as notebook_tqdm
Fetching 11 files: 100%|██████████████████████████████████████████████████| 11/11 [00:00<00:00, 733.03it/s]


'C:\\Users\\tochi\\OneDrive\\Documents\\Upwork_Job\\clip-embedding-api\\CLIP-ViT-B-32'

### Package and Upload CLIP-ViT-B-32 model to s3 bucket

In [20]:
%cd  CLIP-ViT-B-32
!tar zcvf model.tar.gz *

C:\Users\tochi\OneDrive\Documents\Upwork_Job\clip-embedding-api\CLIP-ViT-B-32


a .cache
a .gitattributes
a 0_CLIPModel
a code
a config_sentence_transformers.json
a model.tar.gz
a modules.json
a README.md
a code/.ipynb_checkpoints
a code/inference.py
a code/requirements.txt
a code/.ipynb_checkpoints/inference-checkpoint.py
a code/.ipynb_checkpoints/requirements-checkpoint.txt
a 0_CLIPModel/config.json
a 0_CLIPModel/merges.txt
a 0_CLIPModel/preprocessor_config.json
a 0_CLIPModel/pytorch_model.bin
a 0_CLIPModel/special_tokens_map.json
a 0_CLIPModel/tokenizer_config.json
a 0_CLIPModel/vocab.json
a .cache/huggingface
a .cache/huggingface/.gitignore
a .cache/huggingface/download
a .cache/huggingface/download/.gitattributes.metadata
a .cache/huggingface/download/0_CLIPModel
a .cache/huggingface/download/config_sentence_transformers.json.metadata
a .cache/huggingface/download/modules.json.metadata
a .cache/huggingface/download/README.md.metadata
a .cache/huggingface/download/0_CLIPModel/config.json.metadata
a .cache/huggingface/download/0_CLIPModel/merges.txt.metadata
a 

In [21]:
import sagemaker
import boto3

sess = sagemaker.Session()
account = boto3.client("sts").get_caller_identity().get("Account")
role = f"arn:aws:iam::{account}:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole"

In [22]:
bucket = "huggingface-clip-models"
model_artifact = sagemaker.Session().upload_data("model.tar.gz", bucket=bucket, key_prefix="CLIP-ViT-B-32")
print("S3 Model Path:", model_artifact)

S3 Model Path: s3://huggingface-clip-models/ CLIP-ViT-B-32/model.tar.gz


### Create a Sagemaker Huggingface Model

In [23]:
from sagemaker.huggingface.model import HuggingFaceModel

# Create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data = model_artifact,
   role=role,                                          
   transformers_version="4.26",                          
   pytorch_version="1.13",                              
   py_version='py39',
   env={"HF_TASK": "feature-extraction"}
)

In [24]:
from sagemaker.serverless import ServerlessInferenceConfig

# Define serverless configuration
serverless_config = ServerlessInferenceConfig(
    memory_size_in_mb=3072,
    max_concurrency=5       
)

In [25]:
predictor = huggingface_model.deploy(
    serverless_inference_config=serverless_config,
    endpoint_name="normalized-b32-model-new"
)

----!

### Test Deployed Endpoint

In [26]:
%cd  ..

C:\Users\tochi\OneDrive\Documents\Upwork_Job\clip-embedding-api


In [27]:
# Prepare base64 encoded image
import base64

# Load an image (make sure to use an RGB image like PNG or JPG)
with open("aws_login.png", "rb") as image_file:
    encoded_image = base64.b64encode(image_file.read()).decode('utf-8')

# Define payload with base64 image
data = {
  "inputs": {
    "image": encoded_image
  }
}

# Send prediction request to endpoint
response = predictor.predict(data)
print(len(response))
print(response)

512
[0.0066422351736321, 0.004895574351542614, -0.018706036851959033, 0.004662472853023341, 0.04510598618566244, -0.013993906502128825, -9.325842974767581e-05, -0.00012819223574102892, 0.009970531988663314, 0.011693910556755336, 0.022426834809983958, 0.012249128650215647, -0.035945628896813527, 0.00021407623818882568, -0.029337305160487985, 0.04781863083922653, 0.009619789789457416, 0.013744609590783474, 0.025819747195330277, 0.00017121583952888717, 0.0868025640456482, 0.03205825530144295, -0.002890173008613393, -0.009452101193794495, 0.0038173492489438424, 0.022394205287845154, -0.0223957611288003, 0.03160075753651388, -0.0007961750070793659, 0.023058468851576756, -0.003921940729050102, 0.045612086006306805, -0.02794219896841739, 0.03558561707772098, 0.10422789646745112, -0.02490249123850219, -0.009725057096965634, -0.005157083607834415, -0.00835640020113253, -0.2080916398755594, -0.026392351308188622, -0.03848864087248608, 0.024818999233678134, -0.005943139896982759, 0.01905420012255

### Test APIgateway endpoint

In [2]:
import requests
import json

endpoint_url = "https://h97gqolr43.execute-api.us-east-1.amazonaws.com/invokeb32"

# Choose either text or base64-encoded image payload
payload = {
    "text": "This is a test input for embedding generation.",
    "model": "L-14 model"
}

headers = {
    "Content-Type": "application/json"
}

response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload))

# Print response
print("Status Code:", response.status_code)
print("Response JSON:", response.json())

Status Code: 200
Response JSON: [0.011923573989487268, -0.018119171316957568, -0.007777713759076017, -0.024523632179645708, 0.021243225137650906, -0.014535462527528674, -0.0009346780733012712, -0.08583268636966415, 0.056130660709245765, -0.027807284845884184, 0.04026264622737679, 0.040342555281874415, -0.02025530149575023, -0.0012651359929189577, 0.0006966800712683024, 0.01795241555484599, 0.0150648393345375, 0.004731666521387965, -0.03526948178262685, -0.01438115357683699, 0.020475981624088223, -0.009689774416737391, 0.011830157256098517, -0.033388600518867786, -0.005361675404840186, 0.02888838232453725, -0.013431207152269692, 0.004939794145445378, 0.006149110850135625, -0.013676516999534453, 0.020107299979875124, -0.008012475984026251, 0.05609658165468725, 0.023891956500086152, 0.029208176097102696, -0.023943093463291, 0.00610437060267315, 0.004409554727138781, 0.013335901076936506, -0.0450772829573579, -0.03607407360817796, -0.020010174149201438, -0.03919555272452589, 0.018471714120