# Code Workshop TDC DEMO



## setup

* Elastic Credentials - Create an [Elastic Cloud deployment](https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud) to get all Elastic credentials (`ELASTIC_CLOUD_ID`, `ELASTIC_API_KEY`).



## Install packages

In [None]:
%pip install --upgrade langchain langchain-community pypdf tiktoken langchain_openai langchain_elasticsearch

## Import packages and credentials

In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser


## Get Credentials

In [None]:
# OPEN_AI_KEY-> https://platform.openai.com/api-keys

OPENAI_API_KEY = input("Por favor, insira sua OpenAI API Key: ")
ELASTIC_API_KEY = input("Por favor, insira sua Elastic API Key: ")
CLOUD_ID = input("Por favor, insira seu Elastic Cloud ID: ")

In [None]:
import os
os.environ['OPENAI_API_KEY']=OPENAI_API_KEY

In [None]:
from langchain.document_loaders import PyPDFLoader

pdf_url = "https://raw.githubusercontent.com/salgado/2024-10-25-tdc-bsb/main/concurso-bnb-edital.pdf"

# Load PDF
loaders = [
    # Duplicate documents on purpose - messy data
    PyPDFLoader(pdf_url)
]
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [None]:
# Split
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# text_splitter = RecursiveCharacterTextSplitter(
#     chunk_size = 1500,
#     chunk_overlap = 500
# )

# Testar com split de tokens
from langchain.text_splitter import TokenTextSplitter
text_splitter = TokenTextSplitter(
    chunk_size = 500,
    chunk_overlap = 30
)

In [None]:
splits = text_splitter.split_documents(docs)

In [None]:
len(splits)

In [None]:
from langchain_openai import OpenAIEmbeddings


In [None]:
ELASTIC_INDEX_NAME="tdc-workshop-002"

In [None]:
from langchain_elasticsearch import ElasticsearchStore
from langchain_openai import ChatOpenAI


In [None]:
query_embedding=OpenAIEmbeddings()


In [None]:
#query_embedding = GoogleGenerativeAIEmbeddings(
#    model="models/embedding-001", task_type="retrieval_document"
#)
es = ElasticsearchStore.from_documents(
    splits,
    es_cloud_id=CLOUD_ID,
    es_api_key=ELASTIC_API_KEY,
    index_name=ELASTIC_INDEX_NAME,
    embedding=query_embedding,
)

In [None]:
def format_docs(docs):
    return " ".join(doc.page_content for doc in docs)
    ##return "\n\n".join(doc.page_content for doc in docs)

In [None]:
retriever = es.as_retriever(search_kwargs={"k": 3})     #outro ponto de ajuste


In [None]:

#     | ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.8)

template = """Answer the question in portuguese based only on the following context:\n

{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)


chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | ChatOpenAI(model="gpt-4o") ## Calling Gpt-4o
    | StrOutputParser()
)



In [None]:
def print_chat(text):
    # Substitui os caracteres de nova linha e os prompts interrompidos
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))


In [None]:
question1 = "Qual o concurso disponivel"
chain.invoke(question1)


# chatting with pdf data

In [None]:
question2 = "qual a faixa salarial"
chain.invoke(question2)

In [None]:
question3 = "Fale mais sobre o cargo Especialista Técnico – Analista de Sistemas – Perfil 1: Desenvolvimento de Sistemas"
chain.invoke(question3)


In [None]:
question4 = "quais os pre-requisitos?"
chain.invoke(question4)

In [None]:
# verificar ajustes de tokens de saida

In [None]:
!pip install openai

# Question Answerings

In [None]:
# Question Answerings

## Retrieval Chain types

In [None]:

# Run chain
from langchain.chains import RetrievalQA
question = "quais cargos abertos no concurso?"


In [None]:
llm_name = "gpt-3.5-turbo"
llm = ChatOpenAI(model_name=llm_name, temperature=0)
llm.predict("Bom dia!")

In [None]:
# map_reduce
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    chain_type="map_reduce"
)

In [None]:
result = qa_chain_mr({"query": question})

In [None]:
result["result"]

In [None]:
# refine
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    chain_type="refine"
)
result = qa_chain_mr({"query": question})
result["result"]

In [None]:
# refine
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    chain_type="map_rerank"
)
result = qa_chain_mr({"query": question})
result["result"]

# Memory


In [None]:
# aplicando memoria
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

In [None]:
from langchain.chains import ConversationalRetrievalChain
retriever=retriever
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [None]:
result = qa({"question": question})

In [None]:
result['answer']

In [None]:
question2= "Fale mais sobre o Cargo 1: Especialista Técnico – Analista de Sistemas – Perfil 1: Desenvolvimento de Sistemas"

In [None]:
question = question2
result = qa({"question": question})

In [None]:
result['answer']

In [None]:
question = "Qual o salario para esse cargo"
result = qa({"question": question})

In [None]:
result['answer']