In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader

In [None]:
# Load dos modelos (Embeddings e LLM)
llm = ChatOllama(
  model="gpt-oss:20b",
  temperature=0
)

embeddings_model = OllamaEmbeddings(
  model="nomic-embed-text:v1.5"
)

# Carregar Vector DB - Chroma
vector_store = Chroma(
  embedding_function=embeddings_model,
  persist_directory="./chroma_langchain_db",
)

In [None]:
# Carregar o PDF
pdf_link = "../rag/document.pdf"

loader = PyPDFLoader(pdf_link, extract_images=False)
pages = loader.load_and_split()

In [None]:
# Separar em Chunks (Pedações de documento)
text_splitter = RecursiveCharacterTextSplitter(
  chunk_size=4000,
  chunk_overlap=20,
  length_function=len,
  add_start_index=True
)

chunks = text_splitter.split_documents(pages)

In [None]:
# Salvar no Vector DB - Chroma
document_ids = vector_store.add_documents(documents=chunks)

In [None]:
# Carregar Retriever
retriever = vector_store.as_retriever(
  search_kwargs={"k": 3},
)

In [None]:
def ask(question: str):
  docs = retriever.invoke(question)
  docs_string = "".join(doc.page_content for doc in docs)

  documents = docs_string

  ai_msg = llm.invoke([
    {"role": "system", "content": documents},
    {"role": "user", "content": question},
  ])
  
  answer = ai_msg.content
  return answer, docs

In [None]:
user_question = "Quais os principais pontos da Lei que preciso ficar atento na minha empresa?"
answer, docs = ask(user_question)

In [None]:
print(answer)

In [None]:
docs[0]