# Amazon Nova Multimodal Embeddings with LlamaIndex
Integrate Amazon Nova multimodal embedding models with LlamaIndex for document processing and retrieval.

In [None]:
!pip install llama-index llama-index-vector-stores-faiss boto3 faiss-cpu --upgrade

In [None]:
import boto3
import json
import base64
from typing import List, Any
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core import Document, VectorStoreIndex, StorageContext, Settings
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core.schema import ImageDocument
from IPython.display import Image, display
import faiss
import os

# Disable LLM to avoid OpenAI API key requirement
Settings.llm = None

## Custom Nova Embedding Class for LlamaIndex

In [None]:
class NovaMultimodalEmbedding(BaseEmbedding):
    def __init__(self, model_id: str = 'amazon.nova-2-multimodal-embeddings-v1:0', region: str = 'us-east-1', **kwargs):
        super().__init__(**kwargs)
        object.__setattr__(self, 'model_id', model_id)
        object.__setattr__(self, 'region', region)
        object.__setattr__(self, 'bedrock', boto3.client("bedrock-runtime", region_name=region))
        object.__setattr__(self, '_embed_dim', 3072)
    
    @classmethod
    def class_name(cls) -> str:
        return "NovaMultimodalEmbedding"
    
    def _get_query_embedding(self, query: str) -> List[float]:
        return self._embed_text(query, "GENERIC_RETRIEVAL")
    
    def _get_text_embedding(self, text: str) -> List[float]:
        return self._embed_text(text, "GENERIC_INDEX")
    
    async def _aget_query_embedding(self, query: str) -> List[float]:
        return self._get_query_embedding(query)
    
    async def _aget_text_embedding(self, text: str) -> List[float]:
        return self._get_text_embedding(text)
    
    def _embed_text(self, text: str, purpose: str = "GENERIC_INDEX") -> List[float]:
        request_body = {
            "taskType": "SINGLE_EMBEDDING",
            "singleEmbeddingParams": {
                "embeddingPurpose": purpose,
                "embeddingDimension": self._embed_dim,
                "text": {"truncationMode": "NONE", "value": text}
            }
        }
        
        response = self.bedrock.invoke_model(
            modelId=self.model_id,
            body=json.dumps(request_body)
        )
        
        result = json.loads(response['body'].read())
        return result["embeddings"][0]["embedding"]
    
    def embed_image(self, image_path: str, purpose: str = "GENERIC_INDEX") -> List[float]:
        with open(image_path, 'rb') as f:
            image_data = base64.b64encode(f.read()).decode('utf-8')
        
        request_body = {
            "taskType": "SINGLE_EMBEDDING",
            "singleEmbeddingParams": {
                "embeddingPurpose": purpose,
                "embeddingDimension": self._embed_dim,
                "image": {
                    "format": "png",
                    "detailLevel": "DOCUMENT_IMAGE",
                    "source": {"bytes": image_data}
                }
            }
        }
        
        response = self.bedrock.invoke_model(
            modelId=self.model_id,
            body=json.dumps(request_body)
        )
        
        result = json.loads(response['body'].read())
        return result["embeddings"][0]["embedding"]

In [None]:
# Initialize Nova embeddings
embeddings = NovaMultimodalEmbedding()

## Multimodal Document Processing with LlamaIndex

Create separate indexes for text and image content using LlamaIndex's native document types.

In [None]:
class MultimodalLlamaIndex:
    def __init__(self, embedding_model: NovaMultimodalEmbedding):
        self.embedding_model = embedding_model
        self.text_index = None
        self.image_index = None
        self.documents = []
    
    def add_documents(self, text_docs: List[str], image_paths: List[str] = None, metadata_list: List[dict] = None):
        """Add text and image documents to separate indexes"""
        # Create text documents
        documents = []
        for i, text in enumerate(text_docs):
            metadata = metadata_list[i] if metadata_list and i < len(metadata_list) else {}
            metadata["doc_id"] = i
            doc = Document(text=text, metadata=metadata)
            documents.append(doc)
        
        # Create text index
        faiss_index = faiss.IndexFlatIP(self.embedding_model._embed_dim)
        vector_store = FaissVectorStore(faiss_index=faiss_index)
        storage_context = StorageContext.from_defaults(vector_store=vector_store)
        self.text_index = VectorStoreIndex.from_documents(
            documents, storage_context=storage_context, embed_model=self.embedding_model
        )
        
        # Create image index if images provided
        if image_paths:
            image_documents = []
            for i, img_path in enumerate(image_paths):
                if os.path.exists(img_path):
                    metadata = metadata_list[i] if metadata_list and i < len(metadata_list) else {}
                    metadata["doc_id"] = i
                    metadata["image_path"] = img_path
                    # Create embedding manually for images
                    embedding = self.embedding_model.embed_image(img_path)
                    doc = Document(text=f"Image: {img_path}", metadata=metadata, embedding=embedding)
                    image_documents.append(doc)
            
            if image_documents:
                faiss_index_img = faiss.IndexFlatIP(self.embedding_model._embed_dim)
                vector_store_img = FaissVectorStore(faiss_index=faiss_index_img)
                storage_context_img = StorageContext.from_defaults(vector_store=vector_store_img)
                self.image_index = VectorStoreIndex.from_documents(
                    image_documents, storage_context=storage_context_img, embed_model=self.embedding_model
                )
    
    def search_text(self, query: str, k: int = 3):
        """Search text content"""
        if self.text_index:
            retriever = self.text_index.as_retriever(similarity_top_k=k)
            return retriever.retrieve(query)
        return []
    
    def search_images(self, image_path: str, k: int = 3):
        """Search similar images using image query"""
        if self.image_index and os.path.exists(image_path):
            query_embedding = self.embedding_model.embed_image(image_path, "GENERIC_RETRIEVAL")
            retriever = self.image_index.as_retriever(similarity_top_k=k)
            # Use text query as placeholder since we're using embedding directly
            return retriever.retrieve(f"Image query: {image_path}")
        return []
    
    def multimodal_search(self, text_query: str = None, image_path: str = None, k: int = 5):
        """Combined text and image search"""
        results = []
        
        if text_query and self.text_index:
            text_results = self.search_text(text_query, k//2 + 1)
            results.extend([(node, "text") for node in text_results])
        
        if image_path and self.image_index:
            image_results = self.search_images(image_path, k//2 + 1)
            results.extend([(node, "image") for node in image_results])
        
        return results[:k]

In [None]:
# Create sample data
sample_texts = [
    "We present Amazon Nova Premier, our most capable multimodal foundation model and teacher for model distillation.",
    "Vector databases enable semantic search capabilities",
    "Amazon S3 provides scalable object storage for data backup and archiving.",
    "Amazon SageMaker AI is a fully managed machine learning (ML) service.",
    "Introducing Amazon Nova foundation models: Frontier intelligence and industry leading price performance"
]

sample_images = [
    "./images/nova-premier-pdf-screenshot.png",
    None,  # No image for vector database text
    None,  # No image for S3 text
    "./images/sagemaker-ai-workflow-screenshot.png",
    None   # No image for Nova blog text
]

sample_metadata = [
    {"source": "nova_premier_model_card", "category": "ai"},
    {"source": "vector_info", "category": "ai"},
    {"source": "aws_s3", "category": "storage"},
    {"source": "amazon_sagemaker", "category": "ai"},
    {"source": "nova_blog", "category": "ai"}
]

# Filter out None images
valid_images = [img for img in sample_images if img is not None]

# Initialize multimodal index
multimodal_index = MultimodalLlamaIndex(embeddings)
multimodal_index.add_documents(sample_texts, valid_images, sample_metadata)

print(f"Created LlamaIndex with {len(sample_texts)} text documents and {len(valid_images)} image documents")

In [None]:
# Demonstrate text search
query = "Amazon Nova"
text_results = multimodal_index.search_text(query, k=2)

print(f"Text search results for: '{query}'\n")
for i, node in enumerate(text_results):
    print(f"{i+1}. {node.text}")
    print(f"   Score: {node.score:.4f}")
    print(f"   Category: {node.metadata.get('category', 'N/A')}")
    print(f"   Source: {node.metadata.get('source', 'N/A')}\n")

In [None]:
# Demo: Use LlamaIndex query engine for natural language responses
if multimodal_index.text_index:
    query_engine = multimodal_index.text_index.as_query_engine(llm=None)
    response = query_engine.query("What is Amazon Nova?")
    print(f"Query: What is Amazon Nova?")
    print(f"Response: {response}")
    print(f"Source nodes: {len(response.source_nodes)}")

In [None]:
class EnhancedLlamaIndexSearch:
    def __init__(self, multimodal_index: MultimodalLlamaIndex):
        self.multimodal_index = multimodal_index
    
    def search_and_display(self, text_query: str = None, image_query_path: str = None, k: int = 3):
        """Search and display results with images"""
        results = self.multimodal_index.multimodal_search(text_query, image_query_path, k)
        
        print(f"=== LlamaIndex Multimodal Search Results ===")
        if text_query:
            print(f"Text Query: '{text_query}'")
        if image_query_path:
            print(f"Image Query: {image_query_path}")
            if os.path.exists(image_query_path):
                display(Image(filename=image_query_path, width=400))
        print(f"Found {len(results)} results\n")
        
        for i, (node, search_type) in enumerate(results):
            print(f"--- Result {i+1} [{search_type.upper()}] ---")
            print(f"Content: {node.text}")
            print(f"Score: {node.score:.4f}")
            print(f"Source: {node.metadata.get('source', 'N/A')}")
            print(f"Category: {node.metadata.get('category', 'N/A')}")
            
            # Display image if available
            if 'image_path' in node.metadata:
                image_path = node.metadata['image_path']
                if os.path.exists(image_path):
                    print(f"Associated image: {image_path}")
                    display(Image(filename=image_path, width=400))
                else:
                    print(f"Image not found: {image_path}")
            
            print("\n")

In [None]:
# Create enhanced search instance
enhanced_search = EnhancedLlamaIndexSearch(multimodal_index)

# Demo: Search with text query and display associated images
enhanced_search.search_and_display(text_query="Amazon Nova", k=2)

In [None]:
# Demo: Image-based search if images exist
query_image = "./images/nova-mlp-pdf-screenshot.png"
if os.path.exists(query_image):
    enhanced_search.search_and_display(image_query_path=query_image, k=1)
else:
    print(f"Query image not found: {query_image}")