In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_classic.retrievers import ParentDocumentRetriever
from langchain_core.stores import InMemoryStore

In [None]:
llm = ChatOllama(
    model="gpt-oss:20b",
)

embeddings_model = OllamaEmbeddings(
  model="nomic-embed-text:v1.5"
)

In [None]:
# Carregar o PDF
pdf_link = "../rag/projeto_lei_ia.pdf"
loader = PyPDFLoader(pdf_link, extract_images=False)

pages = loader.load_and_split()
len(pages)

In [None]:
# Splitter
child_splitter = RecursiveCharacterTextSplitter(chunk_size=200)

parent_splitter = RecursiveCharacterTextSplitter(
    chunk_size=4000, chunk_overlap=200, length_function=len, add_start_index=True
)

In [None]:
# Storages
store = InMemoryStore()

vectorstore = Chroma(
    embedding_function=embeddings_model, persist_directory="./childVectorDB"
)

In [None]:
parent_document_retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)

parent_document_retriever.add_documents(pages, ids=None)

In [None]:
parent_document_retriever.vectorstore.get()

In [None]:
TEMPLATE = """
  Você é um especialista em legislação e tecnologia. Responda a pergunta abaixo utilizando o contexto informado.
  Query:
  {question}

  Context:
  {context}
"""

rag_prompt = ChatPromptTemplate.from_template(TEMPLATE)

In [None]:
setup_retrieval = RunnableParallel(
    {"question": RunnablePassthrough(), "context": parent_document_retriever}
)

output_parser = StrOutputParser()

In [None]:
parent_chain_retrieval = setup_retrieval | rag_prompt | llm | output_parser

In [None]:
parent_chain_retrieval.invoke("Quais os principais riscos do marco legal de IA?")