https://python.langchain.com/docs/tutorials/rag/

In [11]:
%pip install --quiet --upgrade langchain-text-splitters langchain-community langgraph langchain-google-vertexai langchain-community faiss-cpu

Note: you may need to restart the kernel to use updated packages.




In [2]:
import os
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model
from langchain_google_vertexai import VertexAIEmbeddings
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

In [3]:
load_dotenv()
assert "LANGSMITH_TRACING" in os.environ, "Please set the LANGSMITH_TRACING environment variable."
assert "LANGSMITH_API_KEY" in os.environ, "Please set the LANGSMITH_API_KEY environment variable."
assert "MONGODB_COLLECTION" in os.environ, "Please set the MONGODB_COLLECTION environment variable."
assert "ATLAS_VECTOR_SEARCH_INDEX_NAME" in os.environ, "Please set the ATLAS_VECTOR_SEARCH_INDEX_NAME environment variable."
MONGODB_COLLECTION = os.getenv('MONGODB_COLLECTION')
ATLAS_VECTOR_SEARCH_INDEX_NAME = os.getenv('ATLAS_VECTOR_SEARCH_INDEX_NAME')

In [5]:
llm = init_chat_model("gemini-2.0-flash-001", model_provider="google_vertexai")
embeddings = VertexAIEmbeddings(model="text-embedding-004")

In [6]:
index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [8]:
urls = [
    "https://www.reflexoesdofilosofo.blog.br/2024/08/a-terceira-margem-do-rio.html",
    "https://www.reflexoesdofilosofo.blog.br/2024/09/uma-teoria-da-mente.html",
    "https://www.reflexoesdofilosofo.blog.br/2025/02/o-problema-de-parmenides.html",
]

In [9]:
loader = WebBaseLoader(
    web_paths=(urls),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post hentry uncustomized-post-template", "post-title entry-title", "post-header")
        )
    ),
)
docs = loader.load()
len(docs)

3

In [10]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)
len(all_splits)

31

In [11]:
_ = vector_store.add_documents(documents=all_splits)

In [12]:
# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")

In [13]:
# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [14]:
# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [16]:
response = graph.invoke({"question": "me fale sobre a mente"})
print(response["answer"])

A teoria da mente está relacionada à filosofia da mente, que questiona a natureza da consciência e do mental. Alguns materialistas tratam o mental como algo puramente cerebral, mas a consciência é vista por outros como um fenômeno irredutível. A neurociência busca elucidar a consciência através da investigação científica do cérebro.

