In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.question_answering import load_qa_chain
import os

In [12]:
os.environ['OPENAI_API_KEY'] = ''

In [13]:
# Load dos modelos
embeddings_model = OpenAIEmbeddings()
llm = ChatOpenAI(model='gpt-4o-mini', max_tokens=200)

In [14]:
# Carregar PDF
pdf_link = ""
loader = PyPDFLoader(pdf_link, extract_images=False)
pages = loader.load_and_split()

In [15]:
# Separar pedaços de documentos
text_spliter = RecursiveCharacterTextSplitter(
    chunk_size=4000,
    chunk_overlap=20,
    length_function=len,
    add_start_index=True
)

chunks = text_spliter.split_documents(pages)

In [16]:
#Salvar no vector DB

db = Chroma.from_documents(chunks, embedding=embeddings_model, persist_directory='')

In [17]:
# Carregar DB
vector_db = Chroma(persist_directory='', embedding_function=embeddings_model)

# Load Retriever
retriever = vector_db.as_retriever(search_kwargs={"k": 3})

# construção da cadeia de prompt para chamada do LLM
chain = load_qa_chain(llm, chain_type="stuff")

In [18]:
def ask(question):
    context = retriever.get_relevant_documents(question)
    answer = (chain({ "input_documents": context, "question": question }, return_only_outputs=True))['output_text']
    return answer, context

In [19]:
user_question = input("User: ")
answer, context = ask(user_question)
print("Answer: ", answer)
# print("Context", context)

Answer:  Olá! Como posso ajudar você hoje?
