In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

In [131]:
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
GOOGLE_API_KEY= os.getenv("GOOGLE_API_KEY")

### Escolher as funções disponíveis

In [102]:
from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq

# Data model
class RouteQuery(BaseModel):
    """Encaminhe uma consulta de usuário para o datasource mais relevante."""

    datasource: Literal["vectorstore", "websearch", "agendamento"] = Field(
        ...,
        description="Dada uma pergunta do usuário, escolha encaminhá-la para websearch, vectorstore ou agendamento.",
    )

# LLM with function call
llm = ChatGroq(
    temperature=0,
    model="llama3-70b-8192",
)
structured_llm_router = llm.with_structured_output(RouteQuery)

# Prompt 
system = """Você trabalha na Tech4.ai e é um especialista em encaminhar uma pergunta do usuário para um vectorstore, pesquisa na web ou agendamento de reunião. \n
O vectorstore contém documentos relacionados a perguntas comuns apenas sobre a empresa (Tech4.ai), tais como missão, visão, valores, cultura, programas internos, políticas \n
de trabalho remoto, horários, etc. Use o vectorstore para perguntas sobre esses tópicos. \n
Para perguntas sobre as ferramentas Github, Vscode, Jira e discord use a pesquisa na web. \n
Para perguntas sobre agendamento de reuniões use o agendamento."""

route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

question_router = route_prompt | structured_llm_router

In [103]:
print(question_router.invoke({"question": "Qual a missão da Tech4.ai?"}))
print(question_router.invoke({"question": "Quero saber os valores da empresa"}))
print(question_router.invoke({"question": "Quero saber o que esperar de um dia de trabalho na Tech4.ai"}))
print(question_router.invoke({"question": "gostaria de agendar uma reunião com o time de desenvolvimento"}))
print(question_router.invoke({"question": "Como posso marcar uma reunião com o time de desenvolvimento?"}))
print(question_router.invoke({"question": "Preciso de ajuda com o Github"}))
print(question_router.invoke({"question": "Como posso usar o Vscode?"}))
print(question_router.invoke({"question": "Como posso usar o Jira?"}))

datasource='vectorstore'
datasource='vectorstore'
datasource='vectorstore'
datasource='agendamento'
datasource='agendamento'
datasource='websearch'
datasource='websearch'
datasource='websearch'


### Vector Database

In [111]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings


# Load PDF document
pdf_path = "Base.pdf"
loader = PyPDFLoader(pdf_path)
docs = loader.load()

# Split the combined documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
doc_splits = text_splitter.split_documents(docs)

""" 
(VOLTAR SE SOBRAR TEMPO!)
O text_splitter não pega as imagens no final do pdf (Valores da Empresa). Tentativa de extrair texto das imagens:
# Extract text from images
image_paths = ["image1.PNG", "image2.PNG", "image3.PNG", "image4.PNG", "image5.PNG", "image6.PNG"]
image_texts = [extract_text_from_image(image_path) for image_path in image_paths]

doc_splitsIMAGES = text_splitter.split_documents(text_splitter.create_documents(image_texts)) 
"""

# Load an open-source embedding model from Hugging Face
embedding_function = SentenceTransformerEmbeddings(model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

# Add to vector store
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="Empresa",
    embedding=embedding_function,
)

retriever = vectorstore.as_retriever()


###### Usar para deletar vector store

In [110]:

vectorstore = Chroma(collection_name="Empresa")
vectorstore.delete_collection()

###### .

In [112]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
 
# Data model
class GradeDocuments(BaseModel):
    """Pontuação binária para verificar a relevância nos documentos utilizados."""

    binary_score: str = Field(description="Documentos são relevantes para a pergunta, 'sim' or 'não'")

# LLM with function call 
structured_llm_grader_docs = llm.with_structured_output(GradeDocuments)

# Prompt 
system = """Você é um avaliador que avalia a relevância de um documento recuperado para uma pergunta do usuário. \n
 Se o documento contiver palavra(s)-chave ou significado semântico relacionado à pergunta, classifique-o como relevante. \n
 Dê uma pontuação binária "sim" ou "não" para indicar se o documento é relevante para a pergunta."""

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader_relevance = grade_prompt | structured_llm_grader_docs

In [113]:
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = ChatPromptTemplate.from_template(
    """Você é um assistente para tarefas de resposta a perguntas. Use as seguintes partes do contexto recuperado para responder à pergunta. \n
      Se você não souber a resposta, apenas diga que não sabe. Sempre responda em português do Brasil.
Question: {question}
Context: {context}
Answer:"""
)
 
# Chain
rag_chain = prompt | llm | StrOutputParser()

In [118]:
question = "Quais são os programas internos da empresa?"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader_relevance.invoke({"question": question, "document": doc_txt}))
# Run
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)

binary_score='sim'
Os programas internos da empresa mencionados são:

1. Star Performers: reconhece os funcionários de alto desempenho na empresa.
2. Monthly Highlights: destaca as atividades realizadas por cada departamento no mês passado.
3. Team Talk: é um podcast apresentado por funcionários da empresa, discutindo tópicos relevantes e compartilhando insights.
4. Network Sessions: promove o networking entre os funcionários em torno de temas pré-definidos.
5. Learning Moments: apresenta palestras e workshops com especialistas externos para promover o aprendizado contínuo.
6. Book Club: é um programa de leitura onde os funcionários compartilham conhecimentos e aprendem uns com os outros.
7. Talent Development: visa o desenvolvimento comportamental dos funcionários.
8. Engagement Projects: são projetos criados para aumentar o engajamento dos funcionários, incluindo jogos e atividades de produtividade.
9. Referral Program: oferece uma recompensa aos funcionários que indicam candidatos b

### Hallucination Grader 

In [119]:
# Data model
class GradeHallucinations(BaseModel):
    """Pontuação binária para alucinação presente na resposta obtida."""

    binary_score: str = Field(description="Não considere a possibilidade de chamar APIs externas para obter informações adicionais. A resposta é apoiada pelos fatos, 'sim' ou 'não'.")
 
# LLM with function call 
structured_llm_grader_hallucination = llm.with_structured_output(GradeHallucinations)
 
# Prompt 
system = """Você é um avaliador que avalia se uma resposta gerada por LLM é apoiada por um conjunto de fatos recuperados. \n 
     Restrinja-se a dar uma pontuação binária, seja "sim" ou "não". Se a resposta for apoiada ou parcialmente apoiada pelo conjunto de fatos, considere-a um sim. \n
    Não considere a chamada de APIs externas para obter informações adicionais."""

hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
]
)
  
hallucination_grader = hallucination_prompt | structured_llm_grader_hallucination
hallucination_grader.invoke({"documents": docs, "generation": generation})

GradeHallucinations(binary_score='sim')

### Answer Grader 

In [120]:
# Data model
class GradeAnswer(BaseModel):
    """Pontuação binária para avaliar se a resposta responde a pergunta."""

    binary_score: str = Field(description="Responde responde a pergunta, 'sim' ou 'não'")

# LLM with function call 
structured_llm_grader_answer = llm.with_structured_output(GradeAnswer)

# Prompt 
system = """Você é um avaliador que avalia se uma resposta aborda/resolve uma pergunta \n 
     Dê uma pontuação binária 'sim' ou 'não'. 'Sim' significa que a resposta resolve a pergunta."""

answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ]
)

answer_grader = answer_prompt | structured_llm_grader_answer
answer_grader.invoke({"question": question,"generation": generation})

GradeAnswer(binary_score='sim')

### Web-Search

In [None]:
from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults(k=3)

### Google Calendar

In [143]:
from dotenv import load_dotenv
from langchain.agents import initialize_agent, AgentType
from langchain.callbacks import wandb_tracing_enabled

from langchain_google_calendar_tools.utils import build_resource_service, get_oauth_credentials
from langchain_google_calendar_tools.tools.create_new_event.tool import CreateNewEvent
from langchain_google_calendar_tools.tools.list_events.tool import ListEvents
from langchain_google_calendar_tools.tools.update_exist_event.tool import UpdateExistEvent

from langchain_google_calendar_tools.helper_tools.get_current_datetime import GetCurrentDatetime

In [144]:
import webbrowser

browser_path = "/usr/bin/firefox"
webbrowser.register('firefox', None, webbrowser.BackgroundBrowser(browser_path))

In [145]:
credentials = get_oauth_credentials(
    client_secrets_file="credentials.json",
)

api_resource = build_resource_service(credentials=credentials)

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=326746794011-tk5t81lnatfpl17qfv35n0atc8mrj0gc.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A47117%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcalendar.events&state=Hu3ujIuHBLNwhoc3OUFZTwsz8SyPCg&access_type=offline


/usr/bin/firefox: 12: xdg-settings: not found




In [146]:
agent = initialize_agent(
    tools=[
        ListEvents(api_resource=api_resource),
        CreateNewEvent(api_resource=api_resource),
        UpdateExistEvent(api_resource=api_resource),
        GetCurrentDatetime(),
    ],
    llm = ChatGroq(temperature=0, model="llama3-70b-8192"),
    agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
)

  warn_deprecated(


In [None]:
# Use wandb_tracing_enabled() to trace the agent's output.

with wandb_tracing_enabled():
    output = agent.run("Create new event on 2023-11-14 at 10:00 with summary 'test', return html link and event summary.")

    print(output)

### LangGraph

In [122]:
from typing_extensions import TypedDict
from typing import List

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents 
    """
    question : str
    generation : str
    web_search : str
    documents : List[str]

In [128]:
from langchain.schema import Document

### Nodes

def retrieve(state):
    """
    Retrieve documents from vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE from Vector Store DB---")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}

def generate(state):
    """
    Generate answer using RAG on retrieved documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE Answer---")
    question = state["question"]
    documents = state["documents"]
    
    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}

def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question
    If any document is not relevant, we will set a flag to run web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Filtered out irrelevant documents and updated web_search state
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]
    
    # Score each doc
    filtered_docs = []
    web_search = "No"
    for d in documents:
        score = retrieval_grader_relevance.invoke({"question": question, "document": d.page_content})
        grade = score.binary_score
        # Document relevant
        if grade.lower() == "sim":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        # Document not relevant
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            # We do not include the document in filtered_docs
            # We set a flag to indicate that we want to run web search
            web_search = "Yes"
            continue
    return {"documents": filtered_docs, "question": question, "web_search": web_search}
    
def web_search(state):
    """
    Web search based based on the question

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Appended web results to documents
    """

    print("---WEB SEARCH. Append to vector store db---")
    question = state["question"]
    documents = state["documents"]

    # Web search
    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {"documents": documents, "question": question}

### Edges

def route_question(state):
    """
    Route question to web search or RAG 

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---ROUTE QUESTION---")
    question = state["question"]
    source = question_router.invoke({"question": question})   
    if source.datasource == 'websearch':
        print("---ROUTE QUESTION TO WEB SEARCH---")
        return "websearch"
    elif source.datasource == 'vectorstore':
        print("---ROUTE QUESTION TO RAG---")
        return "vectorstore"

def decide_to_generate(state):
    """
    Determines whether to generate an answer, or add web search

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    question = state["question"]
    web_search = state["web_search"]
    filtered_documents = state["documents"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---")
        return "websearch"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"

def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke({"documents": documents, "generation": generation})
    grade = score.binary_score

    # Check hallucination
    if grade == "sim":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question,"generation": generation})
        grade = score.binary_score
        if grade == "sim":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"

In [129]:
from langgraph.graph import END, StateGraph

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("websearch", web_search) # web search # key: action to do
workflow.add_node("retrieve", retrieve) # retrieve
workflow.add_node("grade_documents", grade_documents) # grade documents
workflow.add_node("generate", generate) # generatae

workflow.add_edge("websearch", "generate") #start -> end of node
workflow.add_edge("retrieve", "grade_documents")

# Build graph
workflow.set_conditional_entry_point(
    route_question,
    {
        "websearch": "websearch",
        "vectorstore": "retrieve",
    },
)
 
workflow.add_conditional_edges(
    "grade_documents", # start: node
    decide_to_generate, # defined function
    {
        "websearch": "websearch", #returns of the function
        "generate": "generate",   #returns of the function
    },
)
workflow.add_conditional_edges(
    "generate", # start: node
    grade_generation_v_documents_and_question, # defined function
    {
        "not supported": "generate", #returns of the function
        "useful": END,               #returns of the function
        "not useful": "websearch",   #returns of the function
    },
)

# Compile
app = workflow.compile()

In [130]:
from pprint import pprint
inputs = {"question": "Quais são os programas internos da empresa?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
pprint(value["generation"])

---ROUTE QUESTION---
---ROUTE QUESTION TO RAG---
---RETRIEVE from Vector Store DB---
'Finished running: retrieve:'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
'Finished running: grade_documents:'
---GENERATE Answer---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
'Finished running: generate:'
('Os programas internos da empresa mencionados são:\n'
 '\n'
 '1. Star Performers: reconhece os funcionários de alto desempenho na '
 'empresa.\n'
 '2. Monthly Highlights: destaca as atividades realizadas por cada '
 'departamento no mês passado.\n'
 '3. Team Talk: é um podcast apresentado por funcionários da empresa, '
 'discutindo tópicos relevantes e compartilhando insights.\n'
 '4. Network Sessions: promove