In [26]:
import os
from dotenv import load_dotenv
from langchain_google_vertexai import VertexAIEmbeddings
from langchain_google_community import BigQueryVectorStore
from langchain import hub
from langchain.chat_models import init_chat_model

In [12]:
load_dotenv()
PROJECT_ID = os.getenv("PROJECT_ID") 
LOCATION = os.getenv("EMB_LOCATION") 
DATASET = os.getenv("DATASET") 
TABLE = os.getenv("TABLE") 

In [9]:
embeddings = VertexAIEmbeddings(model="textembedding-gecko@latest")

In [13]:
vector_store = BigQueryVectorStore(
    project_id=PROJECT_ID,
    dataset_name=DATASET,
    table_name=TABLE,
    location=LOCATION,
    embedding=embeddings,
)

BigQuery table llm-studies.blog_embeddings.rag_embeddings initialized/validated as persistent storage. Access via BigQuery console:
 https://console.cloud.google.com/bigquery?project=llm-studies&ws=!1m5!1m4!4m3!1sllm-studies!2sblog_embeddings!3srag_embeddings


In [17]:
retrieved_docs = vector_store.similarity_search(
    "Dennett é um eliminativista?",
    k=2,
    filter={"doc_id": "4169c3a6102540149ae4d0de6cbf06f2"},
)

In [27]:
llm = init_chat_model("gemini-2.0-flash-001", model_provider="google_vertexai")

In [28]:
prompt = hub.pull("rlm/rag-prompt")
question = "Dennett é um eliminativista?"
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [29]:
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
prompt = prompt.invoke({"question": question, "context": docs_content})
answer = llm.invoke(prompt)

In [31]:
answer.content

'Eu não sei. O contexto fornecido não contém informação sobre a visão de Dennett sobre o eliminativismo.\n'

#### Qual embedding?

In [1]:
TABLE = "rag_embeddings_s"

https://python.langchain.com/docs/tutorials/rag/

In [2]:
import os
from dotenv import load_dotenv
from langchain_google_vertexai import VertexAIEmbeddings
from langchain_google_community import BigQueryVectorStore
import csv
from langchain_community.document_loaders import WebBaseLoader
import bs4
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chat_models import init_chat_model
from langchain_core.tools import tool
from langgraph.graph import MessagesState, StateGraph, END
from langgraph.prebuilt import ToolNode, tools_condition
from IPython.display import Image
from langchain_core.messages import SystemMessage

In [3]:
load_dotenv()
assert "LANGSMITH_TRACING" in os.environ, "Please set the LANGSMITH_TRACING environment variable."
assert "LANGSMITH_API_KEY" in os.environ, "Please set the LANGSMITH_API_KEY environment variable."
assert "PROJECT_ID" in os.environ, "Please set the PROJECT_ID environment variable."
assert "LOCATION" in os.environ, "Please set the LOCATION environment variable."
assert "DATASET" in os.environ, "Please set the DATASET environment variable."
assert "TABLE" in os.environ, "Please set the TABLE environment variable."
PROJECT_ID = os.getenv("PROJECT_ID") 
LOCATION = os.getenv("LOCATION") 
DATASET = os.getenv("DATASET") 
TABLE = os.getenv("TABLE") 

In [4]:
PROJECT_ID = "llm-studies"
LOCATION = "us-central1"
DATASET = "blog_embeddings"

In [5]:
embeddings = VertexAIEmbeddings(model="textembedding-gecko@latest")

In [6]:
vector_store = BigQueryVectorStore(
    project_id=PROJECT_ID,
    dataset_name=DATASET,
    table_name=TABLE,
    location=LOCATION,
    embedding=embeddings,
)

BigQuery table llm-studies.blog_embeddings.rag_embeddings initialized/validated as persistent storage. Access via BigQuery console:
 https://console.cloud.google.com/bigquery?project=llm-studies&ws=!1m5!1m4!4m3!1sllm-studies!2sblog_embeddings!3srag_embeddings


In [14]:
llm = init_chat_model("gemini-2.0-flash-001", model_provider="google_vertexai")

In [8]:
@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Retrieve information related to a query."""
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\n" f"Content: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [9]:
# Step 1: Generate an AIMessage that may include a tool-call to be sent.
def query_or_respond(state: MessagesState):
    """Generate tool call for retrieval or respond."""
    llm_with_tools = llm.bind_tools([retrieve])
    response = llm_with_tools.invoke(state["messages"])
    # MessagesState appends messages to state instead of overwriting
    return {"messages": [response]}

In [10]:
# Step 3: Generate a response using the retrieved content.
def generate(state: MessagesState):
    """Generate answer."""
    # Get generated ToolMessages
    recent_tool_messages = []
    for message in reversed(state["messages"]):
        if message.type == "tool":
            recent_tool_messages.append(message)
        else:
            break
    tool_messages = recent_tool_messages[::-1]

    # Format into prompt
    docs_content = "\n\n".join(doc.content for doc in tool_messages)
    system_message_content = (
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        f"{docs_content}"
    )
    conversation_messages = [
        message
        for message in state["messages"]
        if message.type in ("human", "system")
        or (message.type == "ai" and not message.tool_calls)
    ]
    prompt = [SystemMessage(system_message_content)] + conversation_messages

    # Run
    response = llm.invoke(prompt)
    return {"messages": [response]}

In [11]:
tools = ToolNode([retrieve])

In [15]:
graph_builder = StateGraph(MessagesState)
graph_builder.add_node(query_or_respond)
graph_builder.add_node(tools)
graph_builder.add_node(generate)

graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {END: END, "tools": "tools"},
)
graph_builder.add_edge("tools", "generate")
graph_builder.add_edge("generate", END)

graph = graph_builder.compile()

In [17]:
input_message = "Dennett é um eliminativista?"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}, {"role": "system", "content": "Você é um tutor de filosofia e deve trazer as respostas baseadas na teorias encontradas nos textos, sejam de autores ou termos"}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


Você é um tutor de filosofia e deve trazer as respostas baseadas na teorias encontradas nos textos, sejam de autores ou termos
Tool Calls:
  retrieve (18860e37-b95f-4ccc-8287-236514b6948b)
 Call ID: 18860e37-b95f-4ccc-8287-236514b6948b
  Args:
    query: Daniel Dennett eliminativism
Name: retrieve

Source: {'doc_id': '701792560a3d49af8ed3e872610183de', 'source': 'https://www.reflexoesdofilosofo.blog.br/2025/01/heterofenomenologia.html', 'score': 0.6980907463376732}
Content: constitui a essência da consciência. Ele propõe uma visão alternativa, que é baseada em uma abordagem materialista e funcionalista, onde a experiência subjetiva é vista como um conjunto de funções cerebrais e comportamentais, em vez de uma entidade metafísica.Pontos importantes sobre a visão de Dennett:Eliminativismo: Dennett é frequentemente associado ao eliminativismo, a visão de que conceitos como "qualia" e "experiência subjetiva" podem e devem ser eliminados da nossa linguagem e teoria da mente.Intencionalidad

In [18]:
input_message = "Me fale quem é eliminativista"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}, {"role": "system", "content": "Você é um tutor de filosofia e deve trazer as respostas baseadas na teorias encontradas nos textos, sejam de autores ou termos"}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


Você é um tutor de filosofia e deve trazer as respostas baseadas na teorias encontradas nos textos, sejam de autores ou termos
Tool Calls:
  retrieve (1f070483-f4e6-42b1-99c8-432e9a72735c)
 Call ID: 1f070483-f4e6-42b1-99c8-432e9a72735c
  Args:
    query: eliminativismo
Name: retrieve

Source: {'doc_id': 'fb7fcafa9ffe47269dbad63df4a0eb18', 'source': 'https://www.reflexoesdofilosofo.blog.br/2023/02/descritivismo.html', 'score': 0.6619441515696384}
Content: Descritivismo

Source: {'doc_id': '9afdd777f93a40b5a366da0d475c7ebb', 'source': 'https://www.reflexoesdofilosofo.blog.br/2022/12/referencialismo.html', 'score': 0.7284535514220247}
Content: Referencialismo

Não tenho informações sobre quem é um eliminativista nos textos fornecidos.


In [12]:
# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")