In [None]:
# ! pip install setuptools==69.5.1 numpy==1.21.3 torch torchvision ftfy faiss-cpu==1.7.4 openai-clip langchain langchain-community langchain-experimental langchain-openai open_clip_torch 'arize-phoenix[evals]'

In [120]:
import glob

paths = glob.glob("./images/*.jpeg", recursive=True)

In [121]:
import os

from dotenv import load_dotenv

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

In [122]:
import phoenix as px

session = px.launch_app()

WARNI [phoenix.session.session] Existing running Phoenix instance detected! Shutting it down and starting a new instance...


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [123]:
from phoenix.trace.langchain import LangChainInstrumentor

LangChainInstrumentor().instrument()

WARNI [opentelemetry.instrumentation.instrumentor] Attempting to instrument while already instrumented


In [124]:
import base64
from io import BytesIO

from PIL import Image


def convert_to_base64(pil_image):
    """
    Convert PIL images to Base64 encoded strings

    :param pil_image: PIL image
    :return: Re-sized Base64 string
    """

    buffered = BytesIO()
    pil_image.save(buffered, format="JPEG")  # You can change the format if needed
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return img_str


from langchain_core.documents import Document


lc_docs = []


for path in paths:
    pil_image = Image.open(path)
    image_b64 = convert_to_base64(pil_image)
    doc = Document(page_content=image_b64, metadata={"path": path})
    lc_docs.append(doc)

In [141]:
import chromadb
from langchain_community.vectorstores import Chroma
from langchain_experimental.open_clip import OpenCLIPEmbeddings

vector_store = Chroma(
    collection_name="mm_rag_clip_photos", embedding_function=OpenCLIPEmbeddings()
)

# Get image URIs with .jpg extension only
image_uris = sorted(
    [
        os.path.join("./images", image_name)
        for image_name in os.listdir("./images")
        if image_name.endswith(".jpeg")
    ]
)

# Add images
vector_store.add_images(uris=image_uris)

['c1f39282-ec75-4635-a392-bc2b47620eae',
 '182a5174-331b-4e92-9563-731ba4f648ed',
 'b841b56c-65a7-4596-b5b9-6752c00111e4',
 '5c2cd026-2914-4e40-8cb4-8b20c1b698d8',
 '052167ae-ca6b-4425-b8af-f89dab28570f',
 'fffceed6-d7f0-4758-bea1-b6afdf4a17cd',
 '1ce38271-c7f4-4050-b66b-782db9e1f6da',
 '3f866b31-8010-450b-8b01-b44164dec2cf',
 '66f6a97f-c0dd-4cac-b6e3-ee36dc810db5',
 'f4407180-7940-4947-ab49-0fe6b717d939']

In [142]:
retriever = vector_store.as_retriever()

In [143]:
import io

from PIL import Image


def is_base64(s):
    """Check if a string is Base64 encoded"""
    try:
        return base64.b64encode(base64.b64decode(s)) == s.encode()
    except Exception:
        return False


def split_image_text_types(docs):
    """Split numpy array images and texts"""
    images = []
    text = []
    for doc in docs:
        doc = doc.page_content  # Extract Document contents
        if is_base64(doc):
            images.append(doc)
        else:
            text.append(doc)
    return {"images": images, "texts": text}

In [144]:
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_community.chat_models import ChatOllama


def prompt_func(data_dict):
    # Joining the context texts into a single string
    formatted_texts = "\n".join(data_dict["context"]["texts"])
    messages = []

    # Adding image(s) to the messages if present
    if data_dict["context"]["images"]:
        image_message = {
            "type": "image_url",
            "image_url": f"data:image/jpeg;base64,{data_dict['context']['images'][0]}",
        }
        messages.append(image_message)

    # Adding the text message for analysis
    text_message = {
        "type": "text",
        "text": (
            "As an animal expert, your task is to analyze and interpret images. "
            "Alongside the images, you will be "
            "provided with related text to offer context. Both will be retrieved from a vectorstore based "
            "on user-input keywords. Please use your extensive knowledge and analytical skills to provide a "
            "comprehensive summary that includes:\n"
            "- A detailed description of the visual elements in the image.\n"
            "- Connections between the image and the related text.\n\n"
            f"User-provided keywords: {data_dict['question']}\n\n"
            "Text and / or tables:\n"
            f"{formatted_texts}"
        ),
    }
    messages.append(text_message)

    return [HumanMessage(content=messages)]


foundation = ChatOllama(model="llava", temperature=0)

# RAG pipeline
chain = (
    {
        "context": retriever | RunnableLambda(split_image_text_types),
        "question": RunnablePassthrough(),
    }
    | RunnableLambda(prompt_func)
    | foundation
    | StrOutputParser()
)

In [146]:
chain.invoke("puppy")

ERROR [openinference.instrumentation.langchain._tracer] Failed to get attribute.
Traceback (most recent call last):
  File "/Users/nicktroast/Workspace/rag_cookbooks/workshop_series/.venv/lib/python3.12/site-packages/openinference/instrumentation/langchain/_tracer.py", line 274, in wrapper
    yield from wrapped(*args, **kwargs)
  File "/Users/nicktroast/Workspace/rag_cookbooks/workshop_series/.venv/lib/python3.12/site-packages/openinference/instrumentation/langchain/_tracer.py", line 426, in _parse_message_data
    assert isinstance(content, str), f"expected str, found {type(content)}"
           ^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: expected str, found <class 'list'>


' The image shows a young golden retriever puppy standing on what appears to be a stone pathway. The puppy is looking directly at the camera with its tongue out, giving an impression of playfulness or anticipation. It has a vibrant coat of golden fur and is wearing a purple collar with a small tag attached.\n\nThe background features a well-maintained lawn and a building that resembles a traditional institutional structure, possibly a school or university given the architectural style. The setting suggests an outdoor campus environment.\n\nThe related text provided does not seem to have any direct connection to the image of the puppy. It is unclear how this text relates to the image without additional context. '

In [147]:
docs = retriever.invoke("puppy", k=3)

for doc in docs:
    print(doc)

page_content='/9j/4AAQSkZJRgABAQEAYABgAAD//gA7Q1JFQVRPUjogZ2QtanBlZyB2MS4wICh1c2luZyBJSkcgSlBFRyB2ODApLCBxdWFsaXR5ID0gNzAK/9sAQwAKBwcIBwYKCAgICwoKCw4YEA4NDQ4dFRYRGCMfJSQiHyIhJis3LyYpNCkhIjBBMTQ5Oz4+PiUuRElDPEg3PT47/9sAQwEKCwsODQ4cEBAcOygiKDs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7/8AAEQgEOAeAAwEiAAIRAQMRAf/EAB8AAAEFAQEBAQEBAAAAAAAAAAABAgMEBQYHCAkKC//EALUQAAIBAwMCBAMFBQQEAAABfQECAwAEEQUSITFBBhNRYQcicRQygZGhCCNCscEVUtHwJDNicoIJChYXGBkaJSYnKCkqNDU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6g4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2drh4uPk5ebn6Onq8fLz9PX29/j5+v/EAB8BAAMBAQEBAQEBAQEAAAAAAAABAgMEBQYHCAkKC//EALURAAIBAgQEAwQHBQQEAAECdwABAgMRBAUhMQYSQVEHYXETIjKBCBRCkaGxwQkjM1LwFWJy0QoWJDThJfEXGBkaJicoKSo1Njc4OTpDREVGR0hJSlNUVVZXWFlaY2RlZmdoaWpzdHV2d3h5eoKDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uLj5OXm5+jp6vLz9PX29/j5+v/aAAwDAQACEQMRAD8A4U6IucYIzSf2CCTwa2/+Ep0E5A3gjvjrUQ8TaGG4d8H1FYalmR/YgHC0w6Jz8zE10en6lp+rTu

In [148]:
chain.invoke("cat laying down on white background")

ERROR [openinference.instrumentation.langchain._tracer] Failed to get attribute.
Traceback (most recent call last):
  File "/Users/nicktroast/Workspace/rag_cookbooks/workshop_series/.venv/lib/python3.12/site-packages/openinference/instrumentation/langchain/_tracer.py", line 274, in wrapper
    yield from wrapped(*args, **kwargs)
  File "/Users/nicktroast/Workspace/rag_cookbooks/workshop_series/.venv/lib/python3.12/site-packages/openinference/instrumentation/langchain/_tracer.py", line 426, in _parse_message_data
    assert isinstance(content, str), f"expected str, found {type(content)}"
           ^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: expected str, found <class 'list'>


' The image shows a cat lying down on what appears to be a white carpeted floor. The cat has striking green eyes and is looking directly at the camera, giving an impression of curiosity or attentiveness. Its fur is a mix of dark and light colors, with the darker areas being concentrated around its face and ears.\n\nThe cat\'s position suggests it might be resting or sleeping, as it is stretched out comfortably on the floor. The white background provides a stark contrast to the cat\'s fur, making the feline the clear focus of the image.\n\nIn terms of connections with the provided keywords:\n- "cat laying down" - This accurately describes the posture and activity of the cat in the image.\n- "white background" - The flooring is indeed white, which aligns with this keyword.\n\nThe image does not provide any additional context or details that would allow for a more comprehensive summary beyond what has been described. '

In [150]:
docs = retriever.invoke("cat laying down on white background", k=3)

for doc in docs:
    print(doc)

page_content='/9j/4QC8RXhpZgAASUkqAAgAAAAGABIBAwABAAAAAQAAABoBBQABAAAAVgAAABsBBQABAAAAXgAAACgBAwABAAAAAgAAABMCAwABAAAAAQAAAGmHBAABAAAAZgAAAAAAAABIAAAAAQAAAEgAAAABAAAABgAAkAcABAAAADAyMTABkQcABAAAAAECAwAAoAcABAAAADAxMDABoAMAAQAAAP//AAACoAQAAQAAALAEAAADoAQAAQAAAKMCAAAAAAAA/+ICKElDQ19QUk9GSUxFAAEBAAACGAAAAAAEMAAAbW50clJHQiBYWVogAAAAAAAAAAAAAAAAYWNzcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAPbWAAEAAAAA0y0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJZGVzYwAAAPAAAAB0clhZWgAAAWQAAAAUZ1hZWgAAAXgAAAAUYlhZWgAAAYwAAAAUclRSQwAAAaAAAAAoZ1RSQwAAAaAAAAAoYlRSQwAAAaAAAAAod3RwdAAAAcgAAAAUY3BydAAAAdwAAAA8bWx1YwAAAAAAAAABAAAADGVuVVMAAABYAAAAHABzAFIARwBCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABYWVogAAAAAAAAb6IAADj1AAADkFhZWiAAAAAAAABimQAAt4UAABjaWFlaIAAAAAAAACSgAAAPhAAAts9wYXJhAAAAAAAEAAAAAmZmAADypwAADVkAABPQAAAKWwAAAAAAAAAAWFlaIAAAAAAAAPbWAAEAAAAA0y1tbHVjAAAAAAAAAAEAAAAMZW5VUwAAACAAAAAcAEcAbwBvAGcAbABlACAASQBuAGMALgAgAD