# Amazon Nova Multimodal Embeddings with LangChain
Integrate Amazon Nova multimodal embedding models with LangChain for document processing and retrieval.

In [None]:
!pip install langchain-core langchain-community boto3 faiss-cpu --upgrade

In [None]:
import boto3
import json
import base64
from typing import List
from langchain_core.embeddings import Embeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from IPython.display import Image, display
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

## Custom Nova Embedding Class for LangChain

In [None]:
class NovaMultimodalEmbeddings(Embeddings):
    def __init__(self, model_id: str = 'amazon.nova-2-multimodal-embeddings-v1:0', region: str = 'us-east-1'):
        self.model_id = model_id
        self.bedrock = boto3.client("bedrock-runtime", region_name=region)
        self.dim = 3072
    
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed multiple text documents"""
        embeddings = []
        for text in texts:
            embedding = self._embed_text(text)
            embeddings.append(embedding)
        return embeddings
    
    def embed_query(self, text: str) -> List[float]:
        """Embed a single query text"""
        return self._embed_text(text, "GENERIC_RETRIEVAL")
    
    def _embed_text(self, text: str, purpose="GENERIC_INDEX") -> List[float]:
        """Generate embedding for text"""
        request_body = {
            "taskType": "SINGLE_EMBEDDING",
            "singleEmbeddingParams": {
                "embeddingPurpose": purpose,
                "embeddingDimension": self.dim,
                "text": {"truncationMode": "NONE", "value": text}
            }
        }
        
        response = self.bedrock.invoke_model(
            modelId=self.model_id,
            body=json.dumps(request_body)
        )
        
        result = json.loads(response['body'].read())
        return result["embeddings"][0]["embedding"]

    def embed_image_query(self, image_path: str) -> List[float]:
        """Embed a single query image"""
        return self.embed_image(image_path, "GENERIC_RETRIEVAL")
    
    def embed_image(self, image_path: str, purpose="GENERIC_INDEX") -> List[float]:
        """Embed an image from file path"""
        with open(image_path, 'rb') as f:
            image_data = base64.b64encode(f.read()).decode('utf-8')
        
        request_body = {
            "taskType": "SINGLE_EMBEDDING",
            "singleEmbeddingParams": {
                "embeddingPurpose": "GENERIC_INDEX",
                "embeddingDimension": self.dim,
                "image": {
                    "format": "png",
                    "detailLevel": "DOCUMENT_IMAGE",
                    "source": {"bytes": image_data}
                }
            }
        }
        
        response = self.bedrock.invoke_model(
            modelId=self.model_id,
            body=json.dumps(request_body)
        )
        
        result = json.loads(response['body'].read())
        return result["embeddings"][0]["embedding"]

In [None]:
# Initialize Nova embeddings
embeddings = NovaMultimodalEmbeddings()

## Multimodal Document Processing

Demonstrate how to process documents containing both text and images using Nova's multimodal capabilities.

In [None]:
import os
from pathlib import Path

class MultimodalDocument:
    def __init__(self, text: str, image_path: str = None, metadata: dict = None):
        self.text = text
        self.image_path = image_path
        self.metadata = metadata or {}
    
    def has_image(self) -> bool:
        return self.image_path is not None and os.path.exists(self.image_path)

class MultimodalVectorStore:
    def __init__(self, embeddings: NovaMultimodalEmbeddings):
        self.embeddings = embeddings
        self.text_vectorstore = None
        self.image_vectorstore = None
        self.documents = []
    
    def add_documents(self, multimodal_docs: List[MultimodalDocument]):
        """Add multimodal documents to the vector store"""
        text_docs = []
        image_docs = []
        
        for i, doc in enumerate(multimodal_docs):
            self.documents.append(doc)
            
            # Add text document
            text_metadata = {**doc.metadata, "doc_id": i, "type": "text"}
            text_docs.append(Document(page_content=doc.text, metadata=text_metadata))
            
            # Add image document if exists
            if doc.has_image():
                image_metadata = {**doc.metadata, "doc_id": i, "type": "image", "image_path": doc.image_path}
                image_docs.append(Document(page_content=f"Image: {doc.image_path}", metadata=image_metadata))
        
        # Create text vector store
        if text_docs:
            self.text_vectorstore = FAISS.from_documents(text_docs, self.embeddings)
        
        # Create image vector store
        if image_docs:
            image_embeddings = [self.embeddings.embed_image(doc.metadata["image_path"]) for doc in image_docs]
            self.image_vectorstore = FAISS.from_embeddings(
                [(doc.page_content, emb) for doc, emb in zip(image_docs, image_embeddings)],
                self.embeddings
            )
    
    def search_text(self, query: str, k: int = 3):
        """Search text content"""
        if self.text_vectorstore:
            return self.text_vectorstore.similarity_search(query, k=k)
        return []
    
    def search_images(self, image_path: str, k: int = 3):
        """Search similar images"""
        if self.image_vectorstore:
            query_embedding = self.embeddings.embed_image_query(image_path)
            return self.image_vectorstore.similarity_search_by_vector(query_embedding, k=k)
        return []
    
    def multimodal_search(self, text_query: str = None, image_path: str = None, k: int = 5):
        """Combined text and image search"""
        results = []
        
        if text_query:
            text_results = self.search_text(text_query, k=k//2 + 1)
            results.extend([(doc, "text") for doc in text_results])
        
        if image_path:
            image_results = self.search_images(image_path, k=k//2 + 1)
            results.extend([(doc, "image") for doc in image_results])
        
        return results[:k]

In [None]:
# Create sample multimodal documents
# Note: In a real scenario, you would have actual image files

sample_multimodal_docs = [
    MultimodalDocument(
        text="We present Amazon Nova Premier, our most capable multimodal foundation model and teacher for model distillation.",
        image_path= "./images/nova-premier-pdf-screenshot.png",
        metadata={"source": "nova_premier_model_card", "category": "ai"}
    ),
    MultimodalDocument(
        text="Vector databases enable semantic search capabilities",
        metadata={"source": "vector_info", "category": "ai"}
    ),
    MultimodalDocument(
        text="Amazon S3 provides scalable object storage for data backup and archiving.",
        metadata={"source": "aws_s3", "category": "storage"}
    ),
    MultimodalDocument(
        text="Amazon SageMaker AI is a fully managed machine learning (ML) service. ",
        image_path="./images/sagemaker-ai-workflow-screenshot.png",
        metadata={"source": "amazon_sagemaker", "category": "ai"}
    ),
    MultimodalDocument(
        text="Introducing Amazon Nova foundation models: Frontier intelligence and industry leading price performance",
        metadata={"source": "nova_blog", "category": "ai"}
    ),
]

# Initialize multimodal vector store
multimodal_store = MultimodalVectorStore(embeddings)
multimodal_store.add_documents(sample_multimodal_docs)

print("Multimodal vector store created with text documents")
print(f"Added {len(sample_multimodal_docs)} multimodal documents")

In [None]:
# Demonstrate text search in multimodal store
query = "Amazon Nova"
text_results = multimodal_store.search_text(query, k=2)

print(f"Text search results for: '{query}'\n")
for i, doc in enumerate(text_results):
    print(f"{i+1}. {doc.page_content}")
    print(f"   Category: {doc.metadata.get('category', 'N/A')}")
    print(f"   Source: {doc.metadata.get('source', 'N/A')}\n")

## Multimodal Search with Image Display

Enhanced search functionality that displays retrieved images alongside text results.

In [None]:
class EnhancedMultimodalSearch:
    def __init__(self, multimodal_store: MultimodalVectorStore):
        self.multimodal_store = multimodal_store
    
    def search_and_display(self, text_query: str = None, image_query_path: str = None, k: int = 3):
        """Search and display results with images"""
        results = self.multimodal_store.multimodal_search(text_query, image_query_path, k)
        
        print(f"=== Multimodal Search Results ===")
        if text_query:
            print(f"Text Query: '{text_query}'")
        if image_query_path:
            print(f"Image Query: {image_query_path}")
            display(Image(filename=image_query_path, width=400))
        print(f"Found {len(results)} results\n")
        
        for i, (doc, search_type) in enumerate(results):
            print(f"--- Result {i+1} [{search_type.upper()}] ---")
            print(f"Content: {doc.page_content}")
            print(f"Source: {doc.metadata.get('source', 'N/A')}")
            print(f"Category: {doc.metadata.get('category', 'N/A')}")
            
            # Display image if available
            if 'image_path' in doc.metadata:
                image_path = doc.metadata['image_path']
                try:
                    display(Image(filename=image_path, width=400))
                except:
                    print(f"Could not display image: {image_path}")
            if "Image:" in doc.page_content:
                image_path = doc.page_content.replace("Image: ", "")
                try:
                    display(Image(filename=image_path, width=400))
                except:
                    print(f"Could not display image: {image_path}")
            
            # Get original document for additional context
            doc_id = doc.metadata.get('doc_id')
            if doc_id is not None and doc_id < len(self.multimodal_store.documents):
                original_doc = self.multimodal_store.documents[doc_id]
                if original_doc.has_image() and 'image_path' not in doc.metadata:
                    print(f"Associated image: {original_doc.image_path}")
                    try:
                        display(Image(filename=original_doc.image_path, width=400))
                    except:
                        print(f"Could not display image: {original_doc.image_path}")
            
            print("\n")

In [None]:
# Create enhanced search instance
enhanced_search = EnhancedMultimodalSearch(multimodal_store)

# Demo: Search with text query and display associated images
enhanced_search.search_and_display(text_query="Amazon Nova", k=1)

In [None]:
query_image = "./images/nova-mlp-pdf-screenshot.png"
if os.path.exists(query_image):
    enhanced_search.search_and_display(image_query_path=query_image, k=1)
else:
    print(f"Query image not found: {query_image}")
    print("Please ensure sample images exist in the ./images/ directory")