In [1]:
from langchain_core.documents import Document

In [None]:
documents = [
    Document(
        page_content="",
        metadata={"source":""}
    ),
    Document(
        page_content="",
        metadata={"source":""}
    ),
]

In [5]:
from langchain_core.documents import Document
import random

# Sample synthetic data for demonstration
sample_contents = [
    "Artificial Intelligence is transforming industries.",
    "Machine Learning allows computers to learn from data.",
    "Natural Language Processing enables communication between humans and machines.",
    "Deep Learning is a subset of Machine Learning.",
    "Generative AI can create new content based on existing data.",
    "Data Science combines statistics and computer science.",
    "AI ethics is crucial for responsible AI deployment.",
    "Reinforcement Learning is used in robotics and gaming.",
    "Computer Vision allows machines to interpret visual information.",
    "Big Data analytics drives insights in various fields."
]

# Generate 10 synthetic Document objects
documents = [
    Document(
        page_content=random.choice(sample_contents),
        metadata={"source": f"source_{i+1}"}
    ) for i in range(10)
]


In [4]:
documents

[Document(metadata={'source': 'source_1'}, page_content='Computer Vision allows machines to interpret visual information.'),
 Document(metadata={'source': 'source_2'}, page_content='Natural Language Processing enables communication between humans and machines.'),
 Document(metadata={'source': 'source_3'}, page_content='Reinforcement Learning is used in robotics and gaming.'),
 Document(metadata={'source': 'source_4'}, page_content='AI ethics is crucial for responsible AI deployment.'),
 Document(metadata={'source': 'source_5'}, page_content='Natural Language Processing enables communication between humans and machines.'),
 Document(metadata={'source': 'source_6'}, page_content='AI ethics is crucial for responsible AI deployment.'),
 Document(metadata={'source': 'source_7'}, page_content='Big Data analytics drives insights in various fields.'),
 Document(metadata={'source': 'source_8'}, page_content='Deep Learning is a subset of Machine Learning.'),
 Document(metadata={'source': 'source

In [7]:
import os
from dotenv import load_dotenv
load_dotenv()

GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [8]:
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [9]:
os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_TOKEN")

In [10]:
from langchain_groq import ChatGroq

In [11]:
llm = ChatGroq(model="llama3-8b-8192")

In [12]:
# Creating Embeddings

from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [14]:
# VectorStores

from langchain_chroma import Chroma

In [26]:
vectorstore = Chroma.from_documents(documents, embedding=embeddings)

In [27]:
vectorstore.similarity_search("Computer Vision")

[Document(metadata={'source': 'source_1'}, page_content='Computer Vision allows machines to interpret visual information.'),
 Document(metadata={'source': 'source_1'}, page_content='Computer Vision allows machines to interpret visual information.'),
 Document(metadata={'source': 'source_1'}, page_content='Computer Vision allows machines to interpret visual information.'),
 Document(metadata={'source': 'source_4'}, page_content='Machine Learning allows computers to learn from data.')]

In [28]:
## Async query

await vectorstore.asimilarity_search("Computer Vision")

[Document(metadata={'source': 'source_1'}, page_content='Computer Vision allows machines to interpret visual information.'),
 Document(metadata={'source': 'source_1'}, page_content='Computer Vision allows machines to interpret visual information.'),
 Document(metadata={'source': 'source_1'}, page_content='Computer Vision allows machines to interpret visual information.'),
 Document(metadata={'source': 'source_4'}, page_content='Machine Learning allows computers to learn from data.')]

In [29]:
vectorstore.similarity_search_with_score("Computer Vision")

[(Document(metadata={'source': 'source_1'}, page_content='Computer Vision allows machines to interpret visual information.'),
  0.5013437271118164),
 (Document(metadata={'source': 'source_1'}, page_content='Computer Vision allows machines to interpret visual information.'),
  0.5013437271118164),
 (Document(metadata={'source': 'source_1'}, page_content='Computer Vision allows machines to interpret visual information.'),
  0.5013437271118164),
 (Document(metadata={'source': 'source_4'}, page_content='Machine Learning allows computers to learn from data.'),
  1.2406715154647827)]

### Retrievers

In [21]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

In [30]:
retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)
retriever.batch(["Computer Vision", "Machien Learning"])

[[Document(metadata={'source': 'source_1'}, page_content='Computer Vision allows machines to interpret visual information.')],
 [Document(metadata={'source': 'source_4'}, page_content='Machine Learning allows computers to learn from data.')]]

In [31]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k":1}
)

retriever.batch(["Computer Vision", "Machien Learning"])

[[Document(metadata={'source': 'source_1'}, page_content='Computer Vision allows machines to interpret visual information.')],
 [Document(metadata={'source': 'source_4'}, page_content='Machine Learning allows computers to learn from data.')]]

In [36]:
from IPython.display import display, Markdown

In [37]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
        Answer this question using the provided context only
        {question}

        Context:
        {context}
"""

prompt = ChatPromptTemplate.from_messages(["human", message])

rag_chain = {"context":retriever,
            "question":RunnablePassthrough()}| prompt | llm

response = rag_chain.invoke("Tell me about Computer Vision")

display(Markdown(response.content))

According to the provided context, Computer Vision allows machines to interpret visual information.