In [1]:
from langchain.docstore.document import Document
from langchain_chroma import Chroma
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

In [2]:
# Section: Embedding Model
embeddings = HuggingFaceEmbeddings()

  from tqdm.autonotebook import tqdm, trange


In [3]:
# Section: Documents
paragraphs = [
    "Artificial Intelligence (AI) is transforming healthcare by improving diagnostics, personalizing treatment plans, and enhancing patient outcomes. AI models can analyze vast amounts of medical data to predict disease patterns and assist in early detection of conditions like cancer and heart disease. Through natural language processing, AI is also improving administrative tasks by automating medical records management, reducing errors, and streamlining workflows for healthcare professionals.",
    "The shift toward renewable energy sources is critical in addressing climate change and achieving sustainability goals. Solar, wind, and hydropower are leading the renewable energy revolution, reducing reliance on fossil fuels and lowering greenhouse gas emissions. Innovations in energy storage technologies, such as advanced batteries, are enhancing the efficiency of renewable energy systems, making clean energy more accessible and affordable for all.",
    "Blockchain technology is revolutionizing the financial sector by providing a decentralized and secure framework for transactions. It eliminates the need for intermediaries, reduces fraud, and enhances transparency in financial operations. Cryptocurrencies like Bitcoin and Ethereum leverage blockchain to facilitate peer-to-peer payments, while smart contracts enable automated and tamper-proof agreements between parties, reducing the risk of disputes.",
    "Mental health plays a pivotal role in student success and overall well-being. Schools are increasingly incorporating mental health programs to support students dealing with anxiety, depression, and other psychological challenges. By promoting emotional resilience and providing resources for counseling, educational institutions are creating environments where students can thrive both academically and personally.",
    "Autonomous vehicles (AVs) are an integral component of the smart cities of the future. By utilizing AI and machine learning algorithms, AVs can navigate traffic, avoid collisions, and reduce congestion in urban environments. Smart cities are leveraging connected infrastructure, such as traffic sensors and smart grids, to ensure seamless communication between vehicles and the urban landscape, creating safer and more efficient transportation networks.",
    "As the Internet of Things (IoT) expands, cybersecurity challenges are becoming more complex. With billions of devices connected to the internet, from smart home appliances to industrial machines, protecting sensitive data and maintaining secure networks is crucial. Hackers can exploit vulnerabilities in IoT devices, leading to potential breaches in personal privacy and critical infrastructure. Advanced encryption methods and multi-factor authentication are being deployed to safeguard IoT ecosystems.",
    "Human interest in space exploration has reached new heights with initiatives aiming to colonize Mars and beyond. Space agencies like NASA and private companies like SpaceX are developing advanced spacecraft to explore distant planets. Colonizing space offers opportunities for scientific discovery, resource utilization, and potentially securing humanity’s future. However, it also presents challenges in terms of life support systems, long-duration space travel, and sustainable living in extraterrestrial environments.",
    "The rise of digital marketing has significantly influenced consumer behavior. With the increasing use of social media, search engines, and email marketing, brands are utilizing targeted advertising strategies to engage with customers. Data analytics and AI tools provide insights into consumer preferences, enabling businesses to personalize their marketing efforts and improve conversion rates. The shift from traditional to digital media is shaping the future of marketing and customer interactions.",
]

In [4]:
# Section: Metadata
documents = [
    Document(
        page_content=paragraphs[0],
        metadata={"source": "healthcare_ai_article", "topic": "Artificial Intelligence", "subtopic": "Healthcare"}
    ),
    Document(
        page_content=paragraphs[1],
        metadata={"source": "sustainability_journal", "topic": "Renewable Energy", "subtopic": "Sustainability"}
    ),
    Document(
        page_content=paragraphs[2],
        metadata={"source": "blockchain_whitepaper", "topic": "Blockchain", "subtopic": "Financial Security"}
    ),
    Document(
        page_content=paragraphs[3],
        metadata={"source": "education_magazine", "topic": "Mental Health", "subtopic": "Education"}
    ),
    Document(
        page_content=paragraphs[4],
        metadata={"source": "autonomous_cars_report", "topic": "Autonomous Vehicles", "subtopic": "Smart Cities"}
    ),
    Document(
        page_content=paragraphs[5],
        metadata={"source": "cybersecurity_news", "topic": "Cybersecurity", "subtopic": "Internet of Things"}
    ),
    Document(
        page_content=paragraphs[6],
        metadata={"source": "space_exploration_journal", "topic": "Space Exploration", "subtopic": "Colonization"}
    ),
    Document(
        page_content=paragraphs[7],
        metadata={"source": "marketing_analysis", "topic": "Digital Marketing", "subtopic": "Consumer Behavior"}
    ),
]

In [5]:
# Section: Database
chroma_db = Chroma.from_documents(
    documents=documents,
    embedding=HuggingFaceEmbeddings(),
    collection_name="vector_store_with_metadata",
)
print(type(chroma_db))

<class 'langchain_chroma.vectorstores.Chroma'>


In [6]:
# Section: Queries
queries = [
    "How is AI improving healthcare diagnostics and patient outcomes?",
    "What are the benefits of renewable energy sources like solar and wind?",
    "How does blockchain technology enhance financial security and transparency?",
    "What is the importance of mental health programs in schools for student success?",
    "How do autonomous vehicles contribute to smart cities and urban transportation?",
    "What are the cybersecurity risks associated with the Internet of Things (IoT)?",
    "What are the challenges and opportunities of colonizing Mars and other planets?",
    "How is digital marketing influencing consumer behavior in the age of social media?",
]

In [7]:
# Section: Basic Search
results = chroma_db.similarity_search(queries[0], k=3)
print("Chroma Search Results:")
for i, doc in enumerate(results):
    print(f"Result {i}:\n{doc.page_content}")

Chroma Search Results:
Result 0:
Artificial Intelligence (AI) is transforming healthcare by improving diagnostics, personalizing treatment plans, and enhancing patient outcomes. AI models can analyze vast amounts of medical data to predict disease patterns and assist in early detection of conditions like cancer and heart disease. Through natural language processing, AI is also improving administrative tasks by automating medical records management, reducing errors, and streamlining workflows for healthcare professionals.
Result 1:
Autonomous vehicles (AVs) are an integral component of the smart cities of the future. By utilizing AI and machine learning algorithms, AVs can navigate traffic, avoid collisions, and reduce congestion in urban environments. Smart cities are leveraging connected infrastructure, such as traffic sensors and smart grids, to ensure seamless communication between vehicles and the urban landscape, creating safer and more efficient transportation networks.
Result 2:

In [8]:
# Section: Search with Scores
results_with_scores = chroma_db.similarity_search_with_score(queries[2], k=2)
print("\nResults with similarity scores:")
for doc, score in results_with_scores:
    print(f"\nDocument:\n{doc.page_content}")
    print(f"Score: {score}")


Results with similarity scores:

Document:
Blockchain technology is revolutionizing the financial sector by providing a decentralized and secure framework for transactions. It eliminates the need for intermediaries, reduces fraud, and enhances transparency in financial operations. Cryptocurrencies like Bitcoin and Ethereum leverage blockchain to facilitate peer-to-peer payments, while smart contracts enable automated and tamper-proof agreements between parties, reducing the risk of disputes.
Score: 0.5804454684257507

Document:
The shift toward renewable energy sources is critical in addressing climate change and achieving sustainability goals. Solar, wind, and hydropower are leading the renewable energy revolution, reducing reliance on fossil fuels and lowering greenhouse gas emissions. Innovations in energy storage technologies, such as advanced batteries, are enhancing the efficiency of renewable energy systems, making clean energy more accessible and affordable for all.
Score: 1.6

In [9]:
# Section: Metadata Filter
filtered_results = chroma_db.similarity_search(
    query=queries[-1],
    filter={"topic": "Digital Marketing"},
)
print("Filtered Results:", filtered_results)

Filtered Results: [Document(metadata={'source': 'marketing_analysis', 'subtopic': 'Consumer Behavior', 'topic': 'Digital Marketing'}, page_content='The rise of digital marketing has significantly influenced consumer behavior. With the increasing use of social media, search engines, and email marketing, brands are utilizing targeted advertising strategies to engage with customers. Data analytics and AI tools provide insights into consumer preferences, enabling businesses to personalize their marketing efforts and improve conversion rates. The shift from traditional to digital media is shaping the future of marketing and customer interactions.')]


In [10]:
# Section: Save and Load
chroma_db.delete_collection()
persist_directory = "./chroma_db"
chroma_db = Chroma.from_documents(
    documents=documents,
    embedding=HuggingFaceEmbeddings(),
    collection_name="vector_store_with_metadata",
    persist_directory=persist_directory,
)
chroma_db_from_directory = Chroma(
    persist_directory=persist_directory,
    collection_name="vector_store_with_metadata",
    embedding_function=embeddings,
)
print(chroma_db_from_directory._collection.count())

48


In [11]:
# Section: Retriever
retriever = chroma_db_from_directory.as_retriever(search_type="mmr", search_kwargs={"k": 1, "fetch_k": 3})
retriever.invoke(queries[0])

[Document(metadata={'source': 'healthcare_ai_article', 'subtopic': 'Healthcare', 'topic': 'Artificial Intelligence'}, page_content='Artificial Intelligence (AI) is transforming healthcare by improving diagnostics, personalizing treatment plans, and enhancing patient outcomes. AI models can analyze vast amounts of medical data to predict disease patterns and assist in early detection of conditions like cancer and heart disease. Through natural language processing, AI is also improving administrative tasks by automating medical records management, reducing errors, and streamlining workflows for healthcare professionals.')]