# RAG: Introduction

In [4]:
from dotenv import load_dotenv

load_dotenv()

True

In [5]:
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "rag"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [8]:
with open("./agent.txt", "r") as f:
    content = f.read()

In [12]:
content[:100]

'What are AI Agents?\nAn artificial intelligence (AI) agent is a software program that can interact wi'

In [21]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("./agent.txt")
docs = loader.load()

In [22]:
# split the content
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
doc_split = text_splitter.split_documents(docs)

In [24]:
doc_split

[Document(metadata={'source': './agent.txt'}, page_content='What are AI Agents?\nAn artificial intelligence (AI) agent is a software program that can interact with its environment, collect data, and use the data to perform self-determined tasks to meet predetermined goals. Humans set goals, but an AI agent independently chooses the best actions it needs to perform to achieve those goals. For example, consider a contact center AI agent that wants to resolves customer queries. The agent will automatically ask the customer different questions, look up information in internal documents, and respond with a solution. Based on the customer responses, it determines if it can resolve the query itself or pass it on to a human.\n\nWhat are the key principles that define AI agents?\nAll software autonomously completes different tasks as determined by the software developer. So, what makes AI or intelligent agents special?'),
 Document(metadata={'source': './agent.txt'}, page_content="AI agents are

In [30]:
# !pip install faiss-cpu

In [31]:
from langchain_community.vectorstores import FAISS

# embedding
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}

hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)
vectorstore = FAISS.from_documents(documents=doc_split, embedding=hf_embeddings)
# Dense Retrieval: using embedding (closest doc is retrieved by dense.)
retriever = vectorstore.as_retriever()



In [34]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

In [39]:
prompt_str = """
Answer the question given the context:

Context: {context}
Question: {question}
"""
prompt_str = prompt_str.format(context="Capital of France is Paris", question="What is the capital of France?")

prompt_str

'\nAnswer the question given the context:\n\nContext: Capital of France is Paris\nQuestion: What is the capital of France?\n'

In [44]:
prompt.messages

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]

In [45]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model_name="llama3-8b-8192",
    temperature="0.3"
)
llm.invoke("Hello")

AIMessage(content="Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 11, 'total_tokens': 37, 'completion_time': 0.021666667, 'prompt_time': 0.00101652, 'queue_time': 0.0155811, 'total_time': 0.022683187}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_af05557ca2', 'finish_reason': 'stop', 'logprobs': None}, id='run-7b07eaf1-81e3-4f2c-9cf8-d004141baa78-0', usage_metadata={'input_tokens': 11, 'output_tokens': 26, 'total_tokens': 37})

In [46]:
from langchain_core.runnables import RunnablePassthrough

In [48]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
)

In [52]:
response = rag_chain.invoke("what is the agent architecture?")

In [53]:
print(response.content)

The agent architecture refers to the base or structure that the agent operates from, which can be a physical structure, a software program, or a combination of both. This architecture provides the foundation for the agent's operations and enables it to interact with its environment.


In [54]:
rag_chain.invoke("What is ethical consideration of agent").content

"I don't know. The provided context does not mention the ethical consideration of an AI agent."

# Indexing

In [57]:
hf_embeddings

HuggingFaceBgeEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='BAAI/bge-small-en', cache_folder=None, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True}, query_instruction='Represent this question for searching relevant passages: ', embed_instruction='', show_progress=False)

In [58]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("./agent.txt")
docs = loader.load()

In [66]:
query_result = hf_embeddings.embed_query("What is the agent architecture?")
document_result = hf_embeddings.embed_query("The agent architecture is the agent structure")

len(query_result), len(document_result)

(384, 384)

In [61]:
import numpy as np

In [62]:
def cosine_similarity(a, b):
    dot_product = np.dot(a, b)
    norm_vec1 = np.linalg.norm(a)
    norm_vec2 = np.linalg.norm(b)
    return dot_product / (norm_vec1 * norm_vec2)

In [67]:
query_result[:10]

[-0.0337931327521801,
 -0.0018535088747739792,
 -0.01836559735238552,
 -0.018039198592305183,
 -0.005999590270221233,
 -0.003841521218419075,
 0.018952878192067146,
 0.015587064437568188,
 0.017307696864008904,
 -0.0155978137627244]

In [68]:
similarity = cosine_similarity(query_result, document_result)
print(similarity)

0.9638556782968755


In [73]:
cosine_similarity(hf_embeddings.embed_query("I love cat"), hf_embeddings.embed_query("I love cat very much"))

0.9745068812633708

# Retrieval

In [75]:
doc = retriever.get_relevant_documents("Agent structure")
doc

[Document(metadata={'source': './agent.txt'}, page_content='Architecture\nArchitecture is the base the agent operates from. The architecture can be a physical structure, a software program, or a combination. For example, a robotic AI agent consists of actuators, sensors, motors, and robotic arms. Meanwhile, an architecture that hosts an AI software agent may use a text prompt, API, and databases to enable autonomous operations. \n\nAgent function\nThe agent function describes how the data collected is translated into actions that support the agentâ€™s objective. When designing the agent function, developers consider the type of information, AI capabilities, knowledge base, feedback mechanism, and other technologies required.\n\nAgent program\nAn agent program is the implementation of the agent function. It involves developing, training, and deploying the AI agent on the designated architecture. The agent program aligns the agentâ€™s business logic, technical requirements, and performan

In [76]:
retriever.get_relevant_documents("AI")

[Document(metadata={'source': './agent.txt'}, page_content='Improved productivity\nAI agents are autonomous intelligent systems performing specific tasks without human intervention. Organizations use AI agents to achieve specific goals and more efficient business outcomes. Business teams are more productive when they delegate repetitive tasks to AI agents. This way, they can divert their attention to mission-critical or creative activities, adding more value to their organization.\n\nReduced costs\nBusinesses can use intelligent agents to reduce unnecessary costs arising from process inefficiencies, human errors, and manual processes. You can confidently perform complex tasks because autonomous agents follow a consistent model that adapts to changing environments.'),
 Document(metadata={'source': './agent.txt'}, page_content='What are AI Agents?\nAn artificial intelligence (AI) agent is a software program that can interact with its environment, collect data, and use the data to perform

# Generation

In [77]:
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context:
{context}

question: {question}"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nquestion: {question}'), additional_kwargs={})])

In [78]:
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000017D38559490>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000017D38559EE0>, model_name='llama3-8b-8192', temperature=0.3, model_kwargs={}, groq_api_key=SecretStr('**********'))

In [80]:
chain = prompt | llm
chain

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nquestion: {question}'), additional_kwargs={})])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000017D38559490>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000017D38559EE0>, model_name='llama3-8b-8192', temperature=0.3, model_kwargs={}, groq_api_key=SecretStr('**********'))

In [81]:
chain.invoke({"context": "The agent architecture is the agent structure", "question": "What is the agent architecture?"})

AIMessage(content='According to the context, the agent architecture is the agent structure.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 36, 'total_tokens': 50, 'completion_time': 0.011666667, 'prompt_time': 0.004595723, 'queue_time': 0.009757526999999998, 'total_time': 0.01626239}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_179b0f92c9', 'finish_reason': 'stop', 'logprobs': None}, id='run-bd35aa3b-98b3-46f6-92eb-b64fd02ec2f5-0', usage_metadata={'input_tokens': 36, 'output_tokens': 14, 'total_tokens': 50})