In [30]:
%pip install faiss-cpu

Collecting faiss-cpu
  Using cached faiss_cpu-1.10.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (4.4 kB)
Using cached faiss_cpu-1.10.0-cp312-cp312-macosx_11_0_arm64.whl (3.2 MB)
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0
Note: you may need to restart the kernel to use updated packages.


In [1]:
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_experimental.open_clip import OpenCLIPEmbeddings
import glob
import base64

paths = glob.glob('../images/*.jpeg', recursive=True)

In [2]:
lc_docs = []
def encode_image(path):
    with open(path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

for path in paths:
    doc = Document(
        page_content=encode_image(path),
        metadata ={
            'source': path
        }
    )
    lc_docs.append(doc)

In [3]:
vector_store = FAISS.from_documents(lc_docs, embedding=OpenCLIPEmbeddings())

In [4]:
retriever = vector_store.as_retriever()

In [5]:
docs = retriever.invoke("rottweiler", k=4) # dog 5

for doc in docs:
    print(doc.metadata)
    

{'source': '../images/dog_5.jpeg'}
{'source': '../images/dog_3.jpeg'}
{'source': '../images/cat_3.jpeg'}
{'source': '../images/dog_2.jpeg'}


In [6]:
from IPython.display import Image, display

docs = retriever.invoke("rottweiler", k=4)  # retrieve 4 documents for "rottweiler"

for doc in docs:
    print(doc.metadata)


{'source': '../images/dog_5.jpeg'}
{'source': '../images/dog_3.jpeg'}
{'source': '../images/cat_3.jpeg'}
{'source': '../images/dog_2.jpeg'}


In [12]:
from IPython.display import Image, display

docs = retriever.invoke("rottweiler", k=4)  # retrieve 4 documents for "rottweiler"

for doc in docs:
    # Retrieve the source path from the document metadata
    print(doc.metadata)


{'source': '../images/dog_5.jpeg'}
{'source': '../images/dog_3.jpeg'}
{'source': '../images/cat_3.jpeg'}
{'source': '../images/dog_2.jpeg'}


In [8]:
docs = retriever.invoke(encode_image("../images/cat_1.jpeg"), k=4) # cat 1

for doc in docs:
    print(doc.metadata)
    

{'source': '../images/cat_1.jpeg'}
{'source': '../images/cat_2.jpeg'}
{'source': '../images/dog_2.jpeg'}
{'source': '../images/dog_5.jpeg'}


In [9]:
docs = retriever.invoke("gray cat with long hair in a field", k=4) # cat 2

for doc in docs:
    print(doc.metadata)

{'source': '../images/dog_5.jpeg'}
{'source': '../images/dog_3.jpeg'}
{'source': '../images/cat_4.jpeg'}
{'source': '../images/dog_2.jpeg'}


In [10]:
docs = retriever.invoke("golden retriever playing with orange ball", k=4) # dog 2

for doc in docs:
    print(doc.metadata)
    

{'source': '../images/dog_2.jpeg'}
{'source': '../images/dog_3.jpeg'}
{'source': '../images/cat_3.jpeg'}
{'source': '../images/dog_5.jpeg'}


In [11]:
docs = retriever.invoke("golden retriever in field with a sunny blurred background", k=4) # dog 4

for doc in docs:
    print(doc.metadata)
    

{'source': '../images/cat_3.jpeg'}
{'source': '../images/dog_3.jpeg'}
{'source': '../images/dog_2.jpeg'}
{'source': '../images/dog_5.jpeg'}
