AI Agent with RAG implementation

In [38]:
#inicializando a chave da api
import os
from dotenv import load_dotenv

load_dotenv()
GOOGLE_API_KEY= os.getenv("GOOGLE_API_KEY")

In [39]:
from pathlib import Path
from langchain_community.document_loaders import PyMuPDFLoader

docs = []
data = Path("text_docs/")
for n in data.glob('*.pdf'):
    try:
        loader = PyMuPDFLoader(str(n))
        docs.extend(loader.load())
        print(f'file loaded sucessifuly: {n.name}')
    except Exception as e:
        print(f'failed to load the file {n.name}: {e}')

print(f'{len(docs)} docs loaded.')

file loaded sucessifuly: Política de Reembolsos (Viagens e Despesas).pdf
file loaded sucessifuly: Política de Uso de E-mail e Segurança da Informação.pdf
file loaded sucessifuly: Políticas de Home Office.pdf
3 docs loaded.


In [40]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=30)
chunks = splitter.split_documents(docs)

In [41]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

embedings = GoogleGenerativeAIEmbeddings(
    model = "models/gemini-embedding-001",
    google_api_key=GOOGLE_API_KEY
)

In [42]:
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(chunks, embedings)
retriever = vectorstore.as_retriever(search_type= 'similarity_score_threshold', 
                                     search_kwargs = {'score_threshold':0.3,'k':4})

In [43]:
llm_rag = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0,
    api_key=GOOGLE_API_KEY
)

In [44]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain

prompt_rag = ChatPromptTemplate.from_messages([
    ("system", "You are an expert HR and IT assistant for the company 'Torres Devs'. "
               "Your main task is to answer employee questions based ONLY on the context provided. "
               "Be polite and professional in your responses. "
               "If the information to answer the question is not in the provided context, "
               "clearly state that you cannot find the answer in the company's documents."),
    
    ("user", "Based on our company's policies, please answer the following question:\n\nQuestion: {input}\n\nContext:\n{context}")
])

document_chain = create_stuff_documents_chain(llm_rag, prompt_rag)

In [52]:
from typing import Dict

def askQuestionRag(question:str) -> Dict:
    related_docs = retriever.invoke(question)

    if not related_docs:
        return {"answer": "cannot find the answer in the company's documents",
                "citations":[],
                "context_found": False}
    
    answer = document_chain.invoke({'input': question,
                                   'context': related_docs})
    txt = (answer or "").strip()

    if txt.rstrip(".!?") == "cannot find the answer in the company's documents":
        return {"answer": "cannot find the answer in the company's documents",
                "citations":[],
                "context_found": False}
    
    return {"answer": txt,
                "citations":related_docs,
                "context_found": True}

In [54]:
tests = ["Posso reembolsar a internet?",
          "Me acidentei gravemente e estou enviando esse atestado",
          "Quero pedir demissão. Como faço?",
          "quantas cestas do logo o Stephen Curry fez na carreira?"]

for qst in tests:
    answer = askQuestionRag(qst)
    print(f'Question: {qst}')
    print(f'Answer: {answer['answer']}')
    if answer['context_found']:
        print(f'Citations: {answer['citations']}')
        print()

Question: Posso reembolsar a internet?
Answer: Olá!

Sim, a internet para home office é reembolsável. A Torres Devs oferece um subsídio mensal de até R$ 100 para internet domiciliar para quem trabalha em regime de home office. Para solicitar o reembolso, é necessário apresentar uma nota fiscal nominal.

Atenciosamente,
Equipe de RH e TI da Torres Devs
Citations: [Document(id='d951e92d-0ed5-4002-be79-b8997e817165', metadata={'producer': 'Skia/PDF m140 Google Docs Renderer', 'creator': '', 'creationdate': '', 'source': 'text_docs\\Política de Reembolsos (Viagens e Despesas).pdf', 'file_path': 'text_docs\\Política de Reembolsos (Viagens e Despesas).pdf', 'total_pages': 1, 'format': 'PDF 1.4', 'title': 'Imersão: Política de Reembolsos (Viagens e Despesas)', 'author': '', 'subject': '', 'keywords': '', 'moddate': '', 'trapped': '', 'modDate': '', 'creationDate': '', 'page': 0}, page_content='são reembolsáveis.\u200b\n \n3.\u200b Transporte: táxi/app são permitidos quando não houver alternat