# Multimodal Embedding Models

## Setup

In [None]:
import io
import base64
import boto3
import json
import os
from PIL import Image
from typing import List

### Define an AWS Bedrock client for making API calls

In [None]:
session = boto3.Session()

# Note there is not any client configuration beyond the name of the service because of the environment variables set up in this notebook environment. How nice!
client = session.client("bedrock-runtime")

## Define helper methods for calling AWS Bedrock

### adapted from: [Classification with Image Embedding with AWS Bedrock Titan Multimodal and Vector DB](https://medium.com/@chikim79/classification-with-image-embedding-with-aws-bedrock-titan-multimodal-and-vector-db-5966cc456582)

In [None]:
def readFileAsBase64(file_path):
    with Image.open(file_path) as img:
        img = img.convert("RGB")  # Convert to RGB mode
        resized_img = img.resize((200,200)) # Resize the image so API calls don't fail
        buffered = io.BytesIO()
        resized_img.save(buffered, format="JPEG")
        base64_string = base64.b64encode(buffered.getvalue()).decode("utf8")
    return base64_string

def construct_bedrock_body(base64_string: str) -> str:
    """Construct the request body for the Bedrock API."""
    return json.dumps(
        {
            "inputImage": base64_string,
            "embeddingConfig": {"outputEmbeddingLength": 1024},
        }
    )

def get_image_embedding(file_path: str) -> List[float]:
    """Get image embedding from Bedrock API."""
    base64_string = readFileAsBase64(file_path)
    body = construct_bedrock_body(base64_string)

    try:
        response = client.invoke_model(
            body=body,
            modelId="amazon.titan-embed-image-v1",
            accept="application/json",
            contentType="application/json",
        )

        response_body = json.loads(response.get("body").read())
        return response_body["embedding"]
    except Exception as e:
        raise ValueError(f"Error raised by image embedding endpoint: {e}")

## Generate the embedding from a sample image

In [None]:
embedding = get_image_embedding("../../images/mteb_leaderboard.png")

In [None]:
print(len(embedding))

## Exercises

- Take what you've learned from `embeddings/01_comparing_embeddings` and experiment with comparing emeddings images and/or text inputs.

### Discussion Questions

- Images and text "living" in the same semantic space is powerful! What are some of the implications for adding multimodal capability to an embedding model?
- Search around the Internet for other modalities that people are talking about. Do any other modalities look intriguing for your collections or materials?