In [9]:
from langchain_core.documents import Document

documents = [
  Document(
    page_content= "The Nile is the longest river in the world.",
    metadata= {"source": "geography", "author": "admin"}
  ),
  Document(
    page_content= "Python is a popular programming language for AI.",
    metadata= {"source": "technology", "author": "kaleb"}
  ),
  Document(
    page_content= "Coffee was first discovered in Ethiopia.",
    metadata= {"source": "history", "author": "researcher"}
  ),
  Document(
    page_content= "Mount Everest is the highest mountain on Earth.",
    metadata= {"source": "geography", "author": "explorer"}
  ),
  Document(
    page_content= "Photosynthesis is how plants make their own food.",
    metadata= {"source": "science", "author": "teacher"}
  ),
  Document(
    page_content= "Soccer is the most popular sport in the world.",
     metadata={"source": "sports", "author": "sports_writer"}
  ),
  Document(
    page_content= "The Great Wall of China is visible from space.",
    metadata= {"source": "history", "author": "guide"}
  ),
  Document(
    page_content= "Gravity was first described by Isaac Newton.",
    metadata= {"source": "science", "author": "student"}
  ),
  Document(
    page_content= "The Sahara is the largest hot desert in the world.",
    metadata={"source": "geography", "author": "scientist"}
  ),
  Document(
    page_content= "Bananas are naturally radioactive.",
    metadata= {"source": "fun_fact", "author": "blogger"}
  )
]

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq

In [2]:
groq_api_key = os.getenv('GROQ_API_KEY')
os.environ['HF_TOKEN']= os.getenv('HF_TOKEN')

In [4]:
llm = ChatGroq(model='Llama3-8b-8192', api_key=groq_api_key)
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000002664CE8C800>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000002664D15FAD0>, model_name='Llama3-8b-8192', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [5]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

In [6]:
embedding = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
from langchain_chroma import Chroma

In [10]:
vectorestore = Chroma.from_documents(documents, embedding=embedding)
vectorestore

<langchain_chroma.vectorstores.Chroma at 0x26604203d70>

In [11]:
vectorestore.similarity_search("river")

[Document(id='4d72f6e4-1c2c-41a0-8859-71e961ad3ad1', metadata={'author': 'admin', 'source': 'geography'}, page_content='The Nile is the longest river in the world.'),
 Document(id='e19b5288-3a18-4dfc-8805-500025db93ed', metadata={'author': 'kaleb', 'source': 'technology'}, page_content='Python is a popular programming language for AI.'),
 Document(id='5f26e7da-bcb3-4cb7-ab9f-ebe52ce0f6fd', metadata={'author': 'researcher', 'source': 'history'}, page_content='Coffee was first discovered in Ethiopia.'),
 Document(id='633646e4-dc9a-462b-bcb0-6ca7e0886e1d', metadata={'author': 'guide', 'source': 'history'}, page_content='The Great Wall of China is visible from space.')]

In [12]:
from typing import List

from langchain_core.runnables import RunnableLambda

In [14]:
retriever=RunnableLambda(vectorestore.similarity_search).bind(k=1)
retriever.batch(["river","Kenya"])

[[Document(id='4d72f6e4-1c2c-41a0-8859-71e961ad3ad1', metadata={'author': 'admin', 'source': 'geography'}, page_content='The Nile is the longest river in the world.')],
 [Document(id='5f26e7da-bcb3-4cb7-ab9f-ebe52ce0f6fd', metadata={'author': 'researcher', 'source': 'history'}, page_content='Coffee was first discovered in Ethiopia.')]]

In [24]:
retriever2=vectorestore.as_retriever(
    search_type="similarity",
    search_kwargs={"k":2}
)

retriever2.batch(["brain","Kenya"])

[[Document(id='e19b5288-3a18-4dfc-8805-500025db93ed', metadata={'author': 'kaleb', 'source': 'technology'}, page_content='Python is a popular programming language for AI.'),
  Document(id='633646e4-dc9a-462b-bcb0-6ca7e0886e1d', metadata={'author': 'guide', 'source': 'history'}, page_content='The Great Wall of China is visible from space.')],
 [Document(id='5f26e7da-bcb3-4cb7-ab9f-ebe52ce0f6fd', metadata={'author': 'researcher', 'source': 'history'}, page_content='Coffee was first discovered in Ethiopia.'),
  Document(id='a1f92a88-d6e5-457d-92be-83d4a0af8305', metadata={'author': 'scientist', 'source': 'geography'}, page_content='The Sahara is the largest hot desert in the world.')]]

In [25]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

messages="""
Answer this question using the provided context only.

{question}

Context:
{context}
"""

prompt=ChatPromptTemplate.from_messages([("human", messages)])

rag_chain={"context":retriever2, "question":RunnablePassthrough()}| prompt|llm

In [26]:
rag_chain.invoke("Tell 1 fact geographic fact")

AIMessage(content="Here's a geographic fact:\n\nThe Sahara is the largest hot desert in the world.", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 143, 'total_tokens': 161, 'completion_time': 0.015, 'prompt_time': 0.054469593, 'queue_time': 0.137977315, 'total_time': 0.069469593}, 'model_name': 'Llama3-8b-8192', 'system_fingerprint': 'fp_dadc9d6142', 'finish_reason': 'stop', 'logprobs': None}, id='run-ae52f610-f5c0-4011-ad00-a2b3c517194b-0', usage_metadata={'input_tokens': 143, 'output_tokens': 18, 'total_tokens': 161})