In [1]:
from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction
from chromadb.utils.data_loaders import ImageLoader

embedding_function = OpenCLIPEmbeddingFunction()
image_loader = ImageLoader()

In [2]:
import chromadb

client = chromadb.PersistentClient(path="chromadb")

vehicles = client.create_collection(
    name="vehicle_images",
    embedding_function=embedding_function,
    data_loader=image_loader,
)

In [4]:
import os

image_paths = []
image_ids = []
dataset_folder = "dataset/"

for image_folder in os.listdir(dataset_folder):
    folder_path = os.path.join(dataset_folder, image_folder)
    for filename in sorted(os.listdir(folder_path)):
        if filename.endswith(".jpg"):
            image_paths.append(os.path.join(folder_path, filename))
            image_ids.append(f"{image_folder}-{filename.split(".")[0]}")

vehicles.add(ids=image_ids, uris=image_paths)
print("Images added to collection.")

Images added to collection.


### Retrieve images based on description

In [7]:
retrieved = vehicles.query(query_texts=["old red truck"], n_results=1, include=["uris"])

for uri in retrieved["uris"][0]:
    try:
        print(uri)
    except AttributeError:
        print(f"Could not load image from {uri}: {e}")

dataset/truck/Image_71.jpg


In [8]:
from dotenv import load_dotenv

load_dotenv()

True

In [11]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

chat_openai = ChatOpenAI(model="gpt-4o", temperature="0.0")
# Instantiate the Output Parser
parser = StrOutputParser()

# Define the Prompt
image_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful car showroom assistant. Answer the user's question  using the given image context with direct references to parts of the images provided."
            " Highlight the pros and cons of each vehicle. Use markdown formatting for highlights, emphasis, and structure.",
        ),
        (
            "user",
            [
                {"type": "text", "text": "I'm interested in buying a {user_query}"},
                {
                    "type": "image_url",
                    "image_url": "data:image/jpeg;base64,{image_data_1}",
                },
                {
                    "type": "image_url",
                    "image_url": "data:image/jpeg;base64,{image_data_2}",
                },
            ],
        ),
    ]
)

# Define the LangChain Chain
vision_chain = image_prompt | chat_openai | parser

In [12]:
# Define a function to query a database, taking a query string and an optional number of results to return.
def query_db(query, results=1):
    # Use the vehicles database's query method to search for data matching the query.
    # It fetches a specified number of results and includes URIs in the returned data.
    results = vehicles.query(
        query_texts=[query],
        n_results=results,
        include=['uris'])
    # Return the search results.
    return results

# Define a function to display the results from a database query.
def print_results(results):
    # Iterate over each URI in the first list of URIs contained in the results dictionary.
    # This assumes that results are structured with 'uris' as a key pointing to a list of lists of URIs.
    for uri in results['uris'][0]:
        # Print the file path of the URI to the console.
        print(f"Path: {uri}")
        # Display the image using the IPython display function, setting the image width to 300 pixels for better visibility.
        display(Image(filename=uri, width=300))
        # Print a newline to ensure separation between consecutive images for clarity.
        print("\n")

In [None]:
#
# To start things off, run this block of code
#

query = "cheap blue car"

# bring back the first two images that match our query
# then call our chain
#
results = query_db(query, results=2)
prompt_input = format_prompt_inputs(results, query)
response = image_chain.invoke(prompt_input)

# Show the images used
display(Markdown("Example Picture 1:"))
display(Image(filename=results['uris'][0][0], width=300))
display(Markdown("Example Picture 2:"))
display(Image(filename=results['uris'][0][1], width=300))

# Printing LLM Response
display(Markdown(response))