In [None]:
# ! pip install setuptools==69.5.1 numpy==1.21.3 torch torchvision ftfy faiss-cpu==1.7.4 openai-clip langchain langchain-community langchain-experimental langchain-openai open_clip_torch 'arize-phoenix[evals]'

In [2]:
from langchain_core.documents import Document
import glob
paths = glob.glob('./images/*.jpeg', recursive=True)
from langchain_community.vectorstores import FAISS

In [3]:
from dotenv import load_dotenv
import os

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

In [1]:
import phoenix as px
session = px.launch_app()

  from .autonotebook import tqdm as notebook_tqdm


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [4]:
from phoenix.trace.langchain import LangChainInstrumentor

LangChainInstrumentor().instrument()

In [7]:
from langchain_experimental.open_clip import OpenCLIPEmbeddings
import base64

In [8]:
lc_docs = []
def encode_image(path):
    with open(path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

for path in paths:
    doc = Document(
        page_content=encode_image(path),
        lookup_str = '',
        metadata ={
            'source': path
        },
        lookup_index=0
    )
    lc_docs.append(doc)

In [9]:
vector_store = FAISS.from_documents(lc_docs, embedding=OpenCLIPEmbeddings())

In [10]:
retriever = vector_store.as_retriever()

In [11]:
import base64
import io
from io import BytesIO

import numpy as np
from PIL import Image

def resize_base64_image(base64_string, size=(128, 128)):
    """
    Resize an image encoded as a Base64 string.

    Args:
    base64_string (str): Base64 string of the original image.
    size (tuple): Desired size of the image as (width, height).

    Returns:
    str: Base64 string of the resized image.
    """
    # Decode the Base64 string
    img_data = base64.b64decode(base64_string)
    img = Image.open(io.BytesIO(img_data))

    # Resize the image
    resized_img = img.resize(size, Image.LANCZOS)

    # Save the resized image to a bytes buffer
    buffered = io.BytesIO()
    resized_img.save(buffered, format=img.format)

    # Encode the resized image to Base64
    return base64.b64encode(buffered.getvalue()).decode("utf-8")


def is_base64(s):
    """Check if a string is Base64 encoded"""
    try:
        return base64.b64encode(base64.b64decode(s)) == s.encode()
    except Exception:
        return False


def split_image_text_types(docs):
    """Split numpy array images and texts"""
    images = []
    text = []
    for doc in docs:
        doc = doc.page_content  # Extract Document contents
        if is_base64(doc):
            # Resize image to avoid OAI server error
            images.append(
                resize_base64_image(doc, size=(250, 250))
            )  # base64 encoded str
        else:
            text.append(doc)
    return {"images": images, "texts": text}

In [12]:
from operator import itemgetter

from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI


def prompt_func(data_dict):
    # Joining the context texts into a single string
    formatted_texts = "\n".join(data_dict["context"]["texts"])
    messages = []

    # Adding image(s) to the messages if present
    if data_dict["context"]["images"]:
        image_message = {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{data_dict['context']['images'][0]}"
            },
        }
        messages.append(image_message)

    # Adding the text message for analysis
    text_message = {
        "type": "text",
        "text": (
            "As an animal lover, your task is to analyze and interpret images of cute animals, "
            "Please use your extensive knowledge and analytical skills to provide a "
            "summary that includes:\n"
            "- A detailed description of the visual elements in the image.\n"
            f"User-provided keywords: {data_dict['question']}\n\n"
            "Text and / or tables:\n"
            f"{formatted_texts}"
        ),
    }
    messages.append(text_message)

    return [HumanMessage(content=messages)]


foundation = ChatOpenAI(temperature=0, model="gpt-4-vision-preview", max_tokens=1024)

# RAG pipeline
chain = (
    {
        "context": retriever | RunnableLambda(split_image_text_types),
        "question": RunnablePassthrough(),
    }
    | RunnableLambda(prompt_func)
    | foundation
    | StrOutputParser()
)

In [13]:
chain.invoke("german shepard")

ERROR [openinference.instrumentation.langchain._tracer] Failed to get attribute.
Traceback (most recent call last):
  File "/Users/yujian/Documents/workspace/rag_cookbooks/p310/lib/python3.10/site-packages/openinference/instrumentation/langchain/_tracer.py", line 274, in wrapper
    yield from wrapped(*args, **kwargs)
  File "/Users/yujian/Documents/workspace/rag_cookbooks/p310/lib/python3.10/site-packages/openinference/instrumentation/langchain/_tracer.py", line 426, in _parse_message_data
    assert isinstance(content, str), f"expected str, found {type(content)}"
AssertionError: expected str, found <class 'list'>


"The image shows a German Shepherd dog standing on a grassy surface. The dog is positioned in profile, allowing a clear view of its body structure and markings. The German Shepherd has a well-defined, muscular build, indicative of the breed's strength and agility. Its coat is predominantly tan with a black saddle marking that extends over the back and sides, and the black coloration continues onto the tail. The dog's face has the characteristic black mask, with black coloration around the eyes, ears, and extending down the muzzle.\n\nThe German Shepherd's ears are erect and pointed, which is typical for the breed and suggests attentiveness or interest in its environment. Its eyes are not clearly visible in the image, but the direction of its gaze seems forward, possibly focusing on something or someone outside of the frame. The dog's tongue is out, which could indicate that it is panting, possibly due to exercise or warm weather.\n\nThe background is blurred but appears to be a garden 

In [14]:
docs = retriever.invoke("german shepard", k=3)

for doc in docs:
    print(doc.metadata)

{'source': './images/dog_1.jpeg'}
{'source': './images/cat_5.jpeg'}
{'source': './images/cat_4.jpeg'}
{'source': './images/cat_3.jpeg'}


In [15]:
chain.invoke("cat laying down on white background")

ERROR [openinference.instrumentation.langchain._tracer] Failed to get attribute.
Traceback (most recent call last):
  File "/Users/yujian/Documents/workspace/rag_cookbooks/p310/lib/python3.10/site-packages/openinference/instrumentation/langchain/_tracer.py", line 274, in wrapper
    yield from wrapped(*args, **kwargs)
  File "/Users/yujian/Documents/workspace/rag_cookbooks/p310/lib/python3.10/site-packages/openinference/instrumentation/langchain/_tracer.py", line 426, in _parse_message_data
    assert isinstance(content, str), f"expected str, found {type(content)}"
AssertionError: expected str, found <class 'list'>


"The image shows a cat lying down on a carpeted floor, not a white background as mentioned in the keywords. The cat appears to be a domestic short-haired with a tabby pattern, characterized by its striped and mottled coat of gray and black. Its eyes are a striking green, and they are wide open, giving the cat an alert and curious expression.\n\nThe cat is resting on its side, with its head turned towards the camera, allowing for a clear view of its face. One of its front paws is stretched out, while the other seems to be tucked underneath its body. The cat's ears are in a neutral position, neither flattened nor perked up, which usually indicates a state of relaxation or contentment.\n\nIn the background, there is a small blue ball, suggesting that the cat may have been playing before settling down in this position. The carpet is a neutral color, providing a soft texture that contrasts with the cat's fur. The lighting in the image is soft and diffused, casting gentle shadows and highlig

In [16]:
docs = retriever.invoke("cat laying down on white background", k=3)

for doc in docs:
    print(doc.metadata)

{'source': './images/cat_4.jpeg'}
{'source': './images/dog_1.jpeg'}
{'source': './images/cat_5.jpeg'}
{'source': './images/cat_3.jpeg'}


In [25]:
docs = retriever.invoke("cat showing teeth with open mouth", k=3)

for doc in docs:
    print(doc.metadata)

{'source': './images/cat_4.jpeg'}
{'source': './images/dog_1.jpeg'}
{'source': './images/cat_5.jpeg'}
{'source': './images/dog_4.jpeg'}


In [26]:
chain.invoke("cat showing teeth with open mouth")

ERROR [openinference.instrumentation.langchain._tracer] Failed to get attribute.
Traceback (most recent call last):
  File "/Users/yujian/Documents/workspace/rag_cookbooks/p310/lib/python3.10/site-packages/openinference/instrumentation/langchain/_tracer.py", line 274, in wrapper
    yield from wrapped(*args, **kwargs)
  File "/Users/yujian/Documents/workspace/rag_cookbooks/p310/lib/python3.10/site-packages/openinference/instrumentation/langchain/_tracer.py", line 426, in _parse_message_data
    assert isinstance(content, str), f"expected str, found {type(content)}"
AssertionError: expected str, found <class 'list'>


"The image shows a close-up of a cat lying on its side on a carpeted floor. The cat appears to be a domestic short-haired with a tabby pattern, characterized by its striped fur in shades of gray and black, with a hint of brown. Its eyes are a striking green color, wide open and looking directly at the camera, which adds to its engaging expression.\n\nThe cat's mouth is slightly open, revealing its small white teeth, which could be interpreted as a playful gesture or a mid-yawn moment. The cat's right paw is extended towards the camera, adding to the playful or relaxed demeanor. The soft focus of the image emphasizes the cat's face, particularly its eyes and mouth.\n\nIn the background, there is a small blue ball, which could suggest that the cat was playing before the photo was taken. The overall impression is that of a content and relaxed cat in a comfortable home environment. The cat's expression and body language exude a sense of calm and comfort, making the image quite endearing to