This notebook contains code to download the CLIP-ViT-L-14 model from Hugging Face and deploy it to a SageMaker serverless endpoint.

### Download  CLIP-ViT-L-14 model

In [1]:
from huggingface_hub import snapshot_download

snapshot_download(repo_id="sentence-transformers/clip-ViT-L-14", local_dir="./CLIP-ViT-L-14")


  from .autonotebook import tqdm as notebook_tqdm
Fetching 12 files: 100%|█████████████████████████████████████████████| 12/12 [00:30<00:00,  2.50s/it]


'C:\\Users\\tochi\\OneDrive\\Documents\\Upwork_Job\\clip-embedding-api\\CLIP-ViT-L-14'

### Package and Upload CLIP-ViT-L-14 model to s3 bucket

In [1]:
%cd  CLIP-ViT-L-14
!tar --exclude='.cache' -zcvf model.tar.gz *

C:\Users\tochi\OneDrive\Documents\Upwork_Job\clip-embedding-api\CLIP-ViT-L-14


a .cache
a .gitattributes
a 0_CLIPModel
a code
a config_sentence_transformers.json
a model.tar.gz
a modules.json
a README.md
a code/.ipynb_checkpoints
a code/inference.py
a code/requirements.txt
a code/.ipynb_checkpoints/inference-checkpoint.py
a code/.ipynb_checkpoints/requirements-checkpoint.txt
a 0_CLIPModel/config.json
a 0_CLIPModel/merges.txt
a 0_CLIPModel/preprocessor_config.json
a 0_CLIPModel/pytorch_model.bin
a 0_CLIPModel/special_tokens_map.json
a 0_CLIPModel/tokenizer.json
a 0_CLIPModel/tokenizer_config.json
a 0_CLIPModel/vocab.json
a .cache/huggingface
a .cache/huggingface/.gitignore
a .cache/huggingface/download
a .cache/huggingface/download/.gitattributes.metadata
a .cache/huggingface/download/0_CLIPModel
a .cache/huggingface/download/config_sentence_transformers.json.metadata
a .cache/huggingface/download/modules.json.metadata
a .cache/huggingface/download/README.md.metadata
a .cache/huggingface/download/0_CLIPModel/config.json.metadata
a .cache/huggingface/download/0_CLI

In [2]:
import sagemaker
import boto3

sess = sagemaker.Session()
account = boto3.client("sts").get_caller_identity().get("Account")
role = f"arn:aws:iam::{account}:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole"



sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\tochi\AppData\Local\sagemaker\sagemaker\config.yaml


In [3]:
bucket = "huggingface-clip-models"
model_artifact = sess.upload_data("model.tar.gz", bucket=bucket, key_prefix="CLIP-ViT-L-14")
print("S3 Model Path:", model_artifact)

S3 Model Path: s3://huggingface-clip-models/CLIP-ViT-L-14/model.tar.gz


### Create a Sagemaker Huggingface Model

In [4]:
from sagemaker.huggingface.model import HuggingFaceModel

# Create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data = model_artifact,
   role=role,                                          
   transformers_version="4.26",                          
   pytorch_version="1.13",                              
   py_version='py39',
   env={"HF_TASK": "feature-extraction"}
    
)

In [5]:
from sagemaker.serverless import ServerlessInferenceConfig

# Define serverless configuration
serverless_config = ServerlessInferenceConfig(
    memory_size_in_mb=5120,
    max_concurrency=5       
)

In [6]:
predictor = huggingface_model.deploy(
    serverless_inference_config=serverless_config,
    endpoint_name="l14-clip-model-v1"
)

-----!

### Test Deployed Endpoint

In [7]:
%cd  ..

C:\Users\tochi\OneDrive\Documents\Upwork_Job\clip-embedding-api


In [8]:
# Prepare base64 encoded image
import base64

# Load an image (make sure to use an RGB image like PNG or JPG)
with open("aws_login.png", "rb") as image_file:
    encoded_image = base64.b64encode(image_file.read()).decode('utf-8')

# Define payload with base64 image
data = {
  "inputs": {
    "image": encoded_image
  }
}

# Send prediction request to endpoint
response = predictor.predict(data)
print(len(response))
print(response)

768
[0.37861865758895874, -0.2048639953136444, 0.22263170778751373, -0.6245788931846619, -0.055607572197914124, -0.06229861080646515, -0.5927261710166931, 0.009563378989696503, -0.2391338348388672, -0.10730738937854767, 0.057211458683013916, 0.11848915368318558, -0.09497398883104324, 0.2676980495452881, 0.19893492758274078, 0.32281601428985596, -0.2885391414165497, -0.35819971561431885, -0.4916241765022278, 0.026667796075344086, 0.015242338180541992, 0.2755463421344757, 0.4547341763973236, 0.16531586647033691, -0.778900146484375, 0.08384381979703903, -0.4893878698348999, 0.1691056489944458, -0.253076434135437, 0.17060652375221252, -0.6237554550170898, 0.029045745730400085, 0.009264398366212845, -0.3716703951358795, 0.4490283131599426, -0.567813515663147, -0.19414737820625305, 0.3332390785217285, 0.639544665813446, 0.4740416407585144, 0.3455004096031189, -0.005832865834236145, -0.14018262922763824, 0.6296952962875366, -0.31594616174697876, 0.5030624270439148, 0.18537959456443787, 0.2746

### Test APIgateway Endpoint

In [None]:
import requests
import json

endpoint_url = "https://kbke5izmza.execute-api.us-east-1.amazonaws.com/prod/invokel14"

# Choose either text or base64-encoded image payload
payload = {
    "text": "This is a test input for embedding generation.",
    "model": "L-14 model"
}

headers = {
    "Content-Type": "application/json"
}

response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload))

# Print response
print("Status Code:", response.status_code)
print("Response JSON:", response.json())