In [30]:
#importacao
import os 
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain import hub
from langchain_community.document_loaders import PyPDFDirectoryLoader
from dotenv import load_dotenv
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


In [41]:
# load
loader = PyPDFDirectoryLoader("files")
docs = loader.load()

# embedding
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(
    documents=splits, embedding=OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"))
)

# retriever
retriever = vectorstore.as_retriever()

# llm
llm = ChatGroq(model="llama-3.1-8b-instant")

# chains
# system_prompt = (
#     "Você é um assistente para tarefas de resposta a perguntas. "
#     "Use os seguintes pedaços de contexto recuperado para responder "
#     "à pergunta. Se você não sabe a resposta, diga que "
#     "não sabe. Use no máximo três frases e mantenha a "
#     "resposta concisa."
#     "\n\n"
#     "{context}"
# )

template = """Você é um assistente para tarefas de resposta a perguntas. 
    Use os seguintes pedaços de contexto recuperado para responder 
    à pergunta. Se você não sabe a resposta, diga que não sabe. 
    Use no máximo três frases e mantenha a resposta concisa.

    "{context}"
    
    Perguntas: {question}
"""

custom_rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)



In [43]:

# rag_chain.invoke("Quais campos devo preencher para realizar atendimento?")
rag_chain.invoke("Como acessar o sistema?")

'Para acessar o sistema, abra a página da Defensoria Pública do Estado do Tocantins e clique no menu "ACESSO INTERNO > SOLAR" ou pelo endereço http://sisat.defensoria.to.gov.br. Informe seu usuário e senha e clique em "Autenticar".'

In [45]:
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)


### Construct retriever ###
loader = loader = PyPDFDirectoryLoader("files")

# loader = WebBaseLoader(
#     web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
#     bs_kwargs=dict(
#         parse_only=bs4.SoupStrainer(
#             class_=("post-content", "post-title", "post-header")
#         )
#     ),
# )
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()


### Contextualize question ###
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)


### Answer question ###
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


### Statefully manage chat history ###
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [46]:
conversational_rag_chain.invoke(
    {"input": "Como acessar o sistema?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'Para acessar o sistema, abra a página da Defensoria Pública do Estado do Tocantins e clique no menu "ACESSO INTERNO > SOLAR" ou acesse diretamente pelo endereço http://sisat.defensoria.to.gov.br. Serão solicitados os dados de acesso ao sistema, onde você deve informar seu usuário e senha e clicar em "Autenticar".'

In [48]:
conversational_rag_chain.stream(
    {"input": "Qual o link?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

TypeError: 'generator' object is not subscriptable