In [2]:
# Cell 1 — Imports & Load
import io
import json
import numpy as np
from PIL import Image
import torch
from transformers import CLIPProcessor, CLIPModel

# Load the full CLIP model & processor from Hugging Face
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Quick smoke‑test
assert hasattr(model, "get_image_features") and hasattr(model, "get_text_features")
print("✅ Model and processor loaded")


config.json:   0%|          | 0.00/4.19k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/862k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

✅ Model and processor loaded


In [3]:
# Cell 2 — model_fn
def model_fn(model_dir=None):
    """
    In SageMaker this would load from disk; here we just return our in‑memory objects.
    """
    return {"model": model, "processor": processor}

# Unit test
ctx = model_fn()
assert "model" in ctx and "processor" in ctx
print("✅ model_fn OK")


✅ model_fn OK


In [5]:
# Cell 3 — input_fn
def input_fn(request_body, content_type):
    """
    - For images: accepts raw bytes + image/* MIME;
    - For text: accepts JSON bytes b'{"inputs": "..."}'
    """
    if content_type in ("application/x-image", "image/jpeg", "image/png"):
        return Image.open(io.BytesIO(request_body)).convert("RGB")
    if content_type == "application/json":
        payload = json.loads(request_body)
        # Expect {"inputs": str or [str]}
        return payload["inputs"]
    raise ValueError(f"Unsupported content type: {content_type}")

# Unit tests
# 3a) Image
with open("/content/Untitled.jpg","rb") as f:
    img = input_fn(f.read(), "application/x-image")
assert isinstance(img, Image.Image)
# 3b) Text
txt = input_fn(b'{"inputs":"a cat"}', "application/json")
assert txt == "a cat"
print("✅ input_fn OK")


✅ input_fn OK


In [6]:
# Cell 4 — predict_fn
def predict_fn(input_data, context):
    """
    - If PIL.Image → image features
    - If str or list[str] → text features
    """
    model = context["model"]
    proc  = context["processor"]

    if isinstance(input_data, Image.Image):
        inputs = proc(images=input_data, return_tensors="pt")
        with torch.no_grad():
            feats = model.get_image_features(**inputs)
    else:
        # either single str or list of str
        texts = input_data if isinstance(input_data, list) else [input_data]
        inputs = proc(text=texts, return_tensors="pt", padding=True)
        with torch.no_grad():
            feats = model.get_text_features(**inputs)

    # Normalize
    feats = feats / feats.norm(p=2, dim=-1, keepdim=True)
    return feats.cpu().numpy()

# Unit tests
ctx = model_fn()
# 4a) Image
with open("/content/Untitled.jpg","rb") as f:
    img = Image.open(io.BytesIO(f.read())).convert("RGB")
emb_img = predict_fn(img, ctx)
assert isinstance(emb_img, np.ndarray) and emb_img.shape[1] == model.config.projection_dim
# 4b) Text
emb_txt = predict_fn("a cat", ctx)
assert isinstance(emb_txt, np.ndarray) and emb_txt.shape[1] == model.config.projection_dim
print("✅ predict_fn OK")


✅ predict_fn OK


In [11]:
# Cell 5 — output_fn
def output_fn(prediction: np.ndarray, response_content_type="application/json"):
    """
    Serialize the NumPy array to JSON for SageMaker.
    """
    if response_content_type == "application/json":
        # ensure list of lists
        data = prediction.tolist()
        return json.dumps(data), "application/json"
    raise ValueError(f"Unsupported response content: {response_content_type}")

# Unit tests
out_body, ctype = output_fn(np.zeros((1,model.config.projection_dim)))
assert ctype=="application/json"
decoded = json.loads(out_body)
assert isinstance(decoded, list) and len(decoded[0])==model.config.projection_dim
print("✅ output_fn OK")


✅ output_fn OK
