In [None]:
import getpass
import os

if not os.getenv("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")


In [36]:
import certifi

os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('articles.csv')

In [4]:
df.reset_index(inplace=True)
df.rename(columns={'index': 'article_id'}, inplace=True)

In [5]:
df['article_name'].nunique()

477

In [6]:
embeddings_model = "text-embedding-3-large"

In [7]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large",
)

In [8]:
from langchain.docstore.document import Document

In [9]:
df["docs"] = df.apply(
    lambda x: Document(
        page_content=x['article_name'],
        metadata={
            'article_id': x['article_id']
        }
    ),
    axis=1,
)

In [10]:
from langchain.text_splitter import CharacterTextSplitter

In [11]:
lst_documents = df["docs"].tolist()

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500,
    chunk_overlap=20,
)

split_docs = text_splitter.split_documents(lst_documents)

In [31]:
from langchain_community.document_loaders import WebBaseLoader

urls = [
    "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",
    "https://lilianweng.github.io/posts/2024-07-07-hallucination/",
    "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/",
]

docs = [WebBaseLoader(url).load() for url in urls]

SSLError: HTTPSConnectionPool(host='lilianweng.github.io', port=443): Max retries exceeded with url: /posts/2024-11-28-reward-hacking/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1000)')))

In [12]:
# ADD NORMALIZATION?

In [13]:
from langchain.vectorstores import FAISS
from langchain_core.vectorstores import InMemoryVectorStore

In [38]:
vector_store = FAISS.from_documents(
    documents=split_docs, embedding=embeddings
)

APIConnectionError: Connection error.

In [21]:
docs = vector_store.similarity_search("como acho um produto?", k=10)

NameError: name 'vector_store' is not defined

In [None]:
docs

[Document(id='8f975360-0028-44ba-bb9f-09c3a7bc7d30', metadata={'article_id': 380}, page_content='Como pesquisar produtos no Mercado da Hotmart?'),
 Document(id='cc95782f-8993-40d8-b232-cafe3c73b6a8', metadata={'article_id': 186}, page_content='Como cadastrar meu produto?'),
 Document(id='06290407-dde8-4369-85c5-dd5811cd1d8b', metadata={'article_id': 398}, page_content='Como acessar o produto que comprei?'),
 Document(id='2bccd68e-c26f-4322-95fc-c4a9d3ad65a5', metadata={'article_id': 419}, page_content='Busca de produto Hotmart'),
 Document(id='f45eda6b-509b-4f0f-90b6-09944946d8ef', metadata={'article_id': 470}, page_content='Como cadastrar um produto de evento online?'),
 Document(id='aaf7424d-a3c4-414a-976a-9acd141b115e', metadata={'article_id': 59}, page_content='Como encontrar as mídias de divulgação do produto?'),
 Document(id='25d1b4e4-c82a-4661-a452-c1d11d272c17', metadata={'article_id': 308}, page_content='Como comprar um produto pela Hotmart?'),
 Document(id='5514def2-da1e-4dc4

In [23]:
from typing import Literal, TypedDict
import uuid

from langgraph.constants import START, END
from langgraph.graph import StateGraph
from langgraph.types import interrupt, Command
from langgraph.checkpoint.memory import MemorySaver

# Define the shared graph state
class State(TypedDict):
    llm_output: str
    decision: str

# Simulate an LLM output node
def generate_llm_output(state: State) -> State:
    return {"llm_output": "This is the generated output."}

# Human approval node
def human_approval(state: State) -> Command[Literal["approved_path", "rejected_path"]]:
    decision = interrupt({
        "question": "Do you approve the following output?",
        "llm_output": state["llm_output"]
    })

    if decision == "approve":
        return Command(goto="approved_path", update={"decision": "approved"})
    else:
        return Command(goto="rejected_path", update={"decision": "rejected"})

# Next steps after approval
def approved_node(state: State) -> State:
    print("✅ Approved path taken.")
    return state

# Alternative path after rejection
def rejected_node(state: State) -> State:
    print("❌ Rejected path taken.")
    return state

# Build the graph
builder = StateGraph(State)
builder.add_node("generate_llm_output", generate_llm_output)
builder.add_node("human_approval", human_approval)
builder.add_node("approved_path", approved_node)
builder.add_node("rejected_path", rejected_node)

builder.set_entry_point("generate_llm_output")
builder.add_edge("generate_llm_output", "human_approval")
builder.add_edge("approved_path", END)
builder.add_edge("rejected_path", END)

checkpointer = MemorySaver()
graph = builder.compile(checkpointer=checkpointer)

# Run until interrupt
config = {"configurable": {"thread_id": uuid.uuid4()}}
result = graph.invoke({}, config=config)
print(result["__interrupt__"])
# Output:
# Interrupt(value={'question': 'Do you approve the following output?', 'llm_output': 'This is the generated output.'}, ...)

# Simulate resuming with human input
# To test rejection, replace resume="approve" with resume="reject"
# final_result = graph.invoke(Command(resume="approve"), config=config)
# print(final_result)

[Interrupt(value={'question': 'Do you approve the following output?', 'llm_output': 'This is the generated output.'}, resumable=True, ns=['human_approval:80f0461a-484d-52f3-60db-b96980d9b9d8'])]


In [24]:
final_result = graph.invoke(Command(resume="approve"), config=config)
print(final_result)

✅ Approved path taken.
{'llm_output': 'This is the generated output.', 'decision': 'approved'}


In [25]:
retriever = vector_store.as_retriever()

NameError: name 'vector_store' is not defined

In [23]:
from langchain.tools.retriever import create_retriever_tool


retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_faq_articles",
    "Search and return FAQ articles about Hotmart.",
)

NameError: name 'retriever' is not defined