## Prerequisites
- pip install langchain-openai
- pip install langchain-chroma
- export OPENAI_API_KEY="..."

In [1]:
# Initialize OpenAI embeddings

import os
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])

In [2]:
# TODO: Setup Chroma with LangChain

from langchain_chroma import Chroma

db = Chroma(
    collection_name="speech_collection",
    embedding_function=OpenAIEmbeddings(),
    persist_directory="./chroma_db",
)

In [3]:
# TODO: Add documents to Chroma

from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(
    page_content="20 tons of cocoa have been deposited at Warehouse AX749",
    collection_name="speech_collection",
    embedding_function=OpenAIEmbeddings(),
    persist_directory="./chroma_db",
    metadata={"source": "messaging_api"},
    id=1,
)

document_2 = Document(
    page_content="The National Geographic Society has discovered a new species of aquatic animal, off the coast of Miami. They have been exploring at 8000 miles deep in the Pacific Ocean. They believe there's a lot more to learn from the oceans.",
    metadata={"source": "news"},
    id=2,
)

document_3 = Document(
    page_content="Martin Luther King's speech, I Have a Dream, remains one of the world's greatest ever. Here's everything he said in 5 minutes.",
    metadata={"source": "website"},
    id=3,
)

document_4 = Document(
    page_content="For the first time in 1200 years, the Kalahari desert receives 200ml of rain.",
    metadata={"source": "tweet"},
    id=4,
)

document_5 = Document(
    page_content="New multi-modal learning content about AI is ready from Kodeco.",
    metadata={"source": "kodeco_rss_feed"},
    id=5,
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

db.add_documents(ids=uuids, documents=documents)

['56020fed-1545-4a02-bade-37913181d812',
 'e7edc823-4544-49aa-84c4-096b5d631472',
 '405f1802-49c3-45c7-92c2-d07fe17b59a8',
 'd96c8e6a-76a1-4612-b383-1b397225ba2e',
 '6c739973-9657-4cf3-83f6-dfb33c58e7ff']

In [4]:
# TODO: Perform a similarity search

results = db.similarity_search(
    "What's the latest on the warehouse?",
    k=2,
    filter={"source": "messaging_api"},
)
for res in results:
    print(f"* {res.page_content}")

* 20 tons of cocoa have been deposited at Warehouse AX749
* 20 tons of cocoa have been deposited at Warehouse AX749


In [5]:
# TODO: Perform a similarity search with score

results = db.similarity_search_with_score(
    "Where can I find tutorials on AI?", 
    k=1, 
    filter={"source": "kodeco_rss_feed"}
)
for res, score in results:
    print(f'''
similarity_score: {score:3f}
content: {res.page_content}
source: {res.metadata['source']}
''')


similarity_score: 0.386230
content: New multi-modal learning content about AI is ready from Kodeco.
source: kodeco_rss_feed

