In [1]:
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_experimental.open_clip import OpenCLIPEmbeddings
import glob
import base64

paths = glob.glob('../images/*.jpeg', recursive=True)

In [2]:
lc_docs = []
def encode_image(path):
    with open(path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

for path in paths:
    doc = Document(
        page_content=encode_image(path),
        metadata ={
            'source': path
        }
    )

    lc_docs.append(doc)

In [3]:
vector_store = FAISS.from_documents(lc_docs, embedding=OpenCLIPEmbeddings())

In [4]:
retriever = vector_store.as_retriever()

In [5]:
dog_paths = glob.glob('../images/dog*.jpeg', recursive=True)

In [6]:
dog_to_cat = {}
for dog_pic in dog_paths:
    docs = retriever.invoke(encode_image(dog_pic))
    cats_retrieved = 0
    for i, doc in enumerate(docs):
        if "cat" in doc.metadata["source"]:
            cats_retrieved += 4 - i
    # Make sure this is indented inside the loop
    dog_to_cat[dog_pic] = cats_retrieved

In [7]:
dog_to_cat

{'../images/dog_1.jpeg': 5,
 '../images/dog_3.jpeg': 3,
 '../images/dog_2.jpeg': 2,
 '../images/dog_5.jpeg': 1,
 '../images/dog_4.jpeg': 1}

In [9]:
from IPython.display import Markdown, display
import os

# Dictionary to hold results: each dog image maps to a tuple with the score and list of cat image sources
dog_to_cat_details = {}

for dog_pic in dog_paths:
    # Retrieve similar images for the dog image
    docs = retriever.invoke(encode_image(dog_pic))
    
    # Filter out the documents that come from cat images
    cat_docs = [doc for doc in docs if "cat" in doc.metadata["source"]]
    
    # Compute a score using the same logic as before (optional)
    cats_retrieved = 0
    for i, doc in enumerate(docs):
        if "cat" in doc.metadata["source"]:
            cats_retrieved += 4 - i
            
    # Save both the score and the cat_docs list
    dog_to_cat_details[dog_pic] = (cats_retrieved, cat_docs)

# Now, display the results, showing only the file names
for dog_pic, (score, cat_docs) in dog_to_cat_details.items():
    dog_filename = os.path.basename(dog_pic)
    display(Markdown(f"### Dog Image: {dog_filename} (Cat Similarity Score: {score})"))
    
    if cat_docs:
        display(Markdown("**Retrieved Cat Images:**"))
        for doc in cat_docs:
            cat_filename = os.path.basename(doc.metadata["source"])
            display(Markdown(f"*Source:* {cat_filename}"))
    else:
        display(Markdown("No similar cat images found."))
    
    display(Markdown("---"))


### Dog Image: dog_1.jpeg (Cat Similarity Score: 5)

**Retrieved Cat Images:**

*Source:* cat_5.jpeg

*Source:* cat_4.jpeg

---

### Dog Image: dog_3.jpeg (Cat Similarity Score: 3)

**Retrieved Cat Images:**

*Source:* cat_4.jpeg

*Source:* cat_5.jpeg

---

### Dog Image: dog_2.jpeg (Cat Similarity Score: 2)

**Retrieved Cat Images:**

*Source:* cat_3.jpeg

---

### Dog Image: dog_5.jpeg (Cat Similarity Score: 1)

**Retrieved Cat Images:**

*Source:* cat_5.jpeg

---

### Dog Image: dog_4.jpeg (Cat Similarity Score: 1)

**Retrieved Cat Images:**

*Source:* cat_3.jpeg

---