# Tudo junto

In [None]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_text_splitters import CharacterTextSplitter
from transformers import AutoTokenizer
from langchain_ollama import OllamaEmbeddings
from langchain_milvus import Milvus

from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser, CommaSeparatedListOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_ollama.llms import OllamaLLM
from langchain.retrievers.multi_query import MultiQueryRetriever


from dotenv import load_dotenv
load_dotenv()

## Index

In [None]:
pdfs = DirectoryLoader("documentos", glob="*.pdf").load()
len(pdfs)

In [None]:
embeddings_model = "BAAI/bge-m3"
embeddings_tokenizer = AutoTokenizer.from_pretrained(embeddings_model)

splitter = CharacterTextSplitter.from_huggingface_tokenizer(
    tokenizer=embeddings_tokenizer,
    chunk_size=1250, chunk_overlap=150
)

pedacos = splitter.split_documents(pdfs)
len(pedacos)

In [None]:
embeddings_model = OllamaEmbeddings(model="bge-m3:567m")

URI = "./milvus_example.db"

vector_store = Milvus(
    embedding_function=embeddings_model,
    connection_args={"uri": URI},
    index_params={"index_type": "FLAT", "metric_type": "L2"},
)

In [None]:
vector_store.add_documents(pedacos)

## Generate

In [None]:
query_model = OllamaLLM(model="gemma3:1b")

# multi_query_prompt_template = """Você é um assistente de modelo de linguagem de IA. Sua tarefa é gerar cinco
# versões diferentes da pergunta do usuário para recuperar documentos relevantes de um banco de dados vetorial.
# Ao gerar múltiplas perspectivas sobre a pergunta do usuário, seu objetivo é ajudar
# o usuário a superar algumas das limitações da busca por similaridade baseada em distância.
# Forneça estas perguntas alternativas separadas por quebras de linha.
# Responda apenas com os textos das perguntas, sem introdução ou comentários finais. Não coloque bullets ou numeros nas linhas.
# Pergunta original: {question}
# Perguntas:"""

# multi_query_prompt = PromptTemplate.from_template(multi_query_prompt_template)

# multi_query_chain = multi_query_prompt | query_model | CommaSeparatedListOutputParser()

# multi_query_retriever = MultiQueryRetriever(
#     retriever=vector_store.as_retriever(), llm_chain=multi_query_chain, 
# )

hyde_prompt_template = """
Escreva uma frase que possa responder à pergunta apresentada. Não adicione mais nada.
Pergunta: {query}
Frase:
"""

hyde_prompt = PromptTemplate.from_template(hyde_prompt_template)
# Next, build the HyDE chain:
hyde_chain = hyde_prompt | query_model | StrOutputParser() 

In [None]:
question_model = OllamaLLM(model="granite3.3:8b")

rag_prompt = ChatPromptTemplate.from_messages(
    [ 
        ("system", "Responda usando exclusivamente os conteúdo fornecido. Seja breve na resposta com no máximo 100 palavras.\n\nContexto:\n{contexto}"),
        ("human", "{query}")
    ]
)

cadeia = (
    {
        "contexto": {"query": RunnablePassthrough()} | hyde_chain | vector_store.as_retriever(), 
        "query": RunnablePassthrough(),#B
    }
    | rag_prompt 
    | question_model 
    | StrOutputParser()
)

In [None]:
cadeia.invoke("Como fazer um seguro viagem?")

In [None]:
cadeia.invoke("O que fazer se tiver meu cartão Gold roubado?")

In [None]:
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(hyde_chain.invoke(state["question"]))
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = rag_prompt.invoke({"query": state["question"], "contexto": docs_content})
    response = question_model.invoke(messages)
    return {"answer": response}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
graph.invoke({"question": "Quais os beneficios de um cartão platinum?" })['answer']

In [None]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field

# Define your desired data structure.
class QandA(BaseModel):
    question: str = Field(description="question")
    answer: str = Field(description="answer to the question")

from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

modelo = ChatOpenAI(
    model="gpt-4.1-nano",
    temperature=0
)

# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=QandA)

prompt = PromptTemplate(
    template="Com base no conteúdo fornecido, crie duas perguntas, uma em cada linha, sem marcador ou numeração.\n\nConteúdo: {content}\n",
    input_variables=["content"],
    # partial_variables={"format_instructions": QandA.model_json_schema()},
)

chain = prompt | modelo | StrOutputParser()

# perguntas = chain.batch([p.page_content for p in pedacos])

In [None]:
perguntas_respostas = []

resposta_prompt = PromptTemplate(
    template="Com base no conteúdo fornecido, responda a pergunda em no máximo duas frases.\n\nConteúdo: {content}\n\nPergunta: {pergunta}\n\nResposta:",
    input_variables=["content", "pergunta"],
    # partial_variables={"format_instructions": QandA.model_json_schema()},
)

resposta_chain = resposta_prompt | modelo | StrOutputParser()

for i in range(len(pedacos)):
    for pergunta in perguntas[i].split("\n"):
        print(pergunta)
        perguntas_respostas.append({"pergunta": pergunta.strip(), "resposta": resposta_chain.invoke({"content": pedacos[i].page_content, "pergunta": pergunta.strip()})})

In [None]:
perguntas_respostas

In [None]:
import json

with open("test_qa.json", "w", encoding="utf-8") as file:
    json.dump(perguntas_respostas, file, indent=2)

In [None]:
from langchain.evaluation.qa import QAGenerateChain

# Initialize the LLM for generating Q&A pairs
example_gen_chain = QAGenerateChain.from_llm(modelo)

# Generate Q&A pairs from the documents
# The input to apply_and_parse should be a list of dictionaries, 
# where each dictionary contains a 'doc' key with the text content.
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t.page_content} for t in pedacos]
)

# Print the generated examples
# for example in new_examples:
#     print(f"Query: {example['query']}")
#     print(f"Answer: {example['answer']}\n")

In [None]:
with open("qa_pairs.json", "w", encoding="utf-8") as file:
    json.dump(new_examples, file, indent=2)

In [None]:
import json 
with open("qa_pairs.json", "r", encoding="utf-8") as file:
    new_examples = json.load(file)

In [None]:
chain = (
    {
        "contexto": {"query": RunnablePassthrough()} | hyde_chain | vector_store.as_retriever(), 
        "query": RunnablePassthrough(),#B
    }
    | rag_prompt 
    | modelo 
    | StrOutputParser()
)

eval_data = [{"query": pair["qa_pairs"]["query"], "answer": pair["qa_pairs"]["answer"] } for pair in new_examples]

eval_data

In [None]:
predictions = chain.batch(eval_data)


In [None]:
predictions

In [None]:
from langchain.evaluation.qa import QAEvalChain
eval_chain = QAEvalChain.from_llm(modelo)
graded_outputs = eval_chain.evaluate(eval_data, [ {"result": p } for p in predictions ])
graded_outputs

In [None]:
corrects = 0
for i, eg in enumerate(eval_data):
    # print(f"Example {i}:")
    # print("Question: " + eval_data[i]['query'])
    # print("Real Answer: " + eval_data[i]['answer'])
    # print("Predicted Answer: " + predictions[i])
    # ("Predicted Grade: " + graded_outputs[i]['results'].split("\n")[-1].split(":")[-1].strip())
    corrects = corrects + (1 if graded_outputs[i]['results'].split("\n")[-1].split(":")[-1].strip() == "CORRECT" else 0)
    # print()
    
corrects

In [None]:
corrects / len(eval_data)