In [12]:
# ===============================
# INSTALL (run once)
# ===============================
# !pip install -U langgraph langchain langchain-openai

from langgraph.checkpoint.memory import MemorySaver
import json
from typing import TypedDict, Optional
from langgraph.graph import StateGraph, END
from langgraph.types import interrupt, Command
from langchain_openai import ChatOpenAI

# ===============================
# STATE
# ===============================
class RouterState(TypedDict):
    userInput: str
    label: Optional[str]
    confidence: float
    humanOverride: Optional[str]
    result: Optional[str]

# ===============================
# LLM
# ===============================
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0
)

# ===============================
# NODES
# ===============================

def classify_with_confidence(state: RouterState) -> RouterState:
    """
    LLM returns BOTH label and confidence (0‚Äì1)
    """
    prompt = f"""
You are a classification system.

Classify the user input into one of:
- billing_issue
- technical_issue
- general_query

Return STRICT JSON:
{{
  "label": "<label>",
  "confidence": <number between 0 and 1>
}}

User input:
"{state['userInput']}"
"""

    response = llm.invoke([{"role": "user", "content": prompt}])
    data = json.loads(response.content)

    state["label"] = data["label"]
    state["confidence"] = float(data["confidence"])

    print(f"üß† LLM classified as {state['label']} ")
    print(f"   with confidence {state['confidence']}")
    return state


def confidence_router(state: RouterState) -> RouterState:
    print("üîç Checking confidence...")
    if state["confidence"] <= 0.7:
        print("‚ö†Ô∏è Confidence below threshold, pausing for human review.")
        updated_state = interrupt({
            "type": "low_confidence",
            "message": "LLM confidence below threshold",
            "predictedLabel": state["label"],
            "confidence": state["confidence"],
            "input": state["userInput"]
        })
        print("‚ñ∂Ô∏è Resuming after human review with updated state", updated_state)
        return updated_state
    return state


def finalize(state: RouterState) -> RouterState:
    print("‚úÖ Finalizing classification...")
    final_label = state["humanOverride"] or state["label"]
    print(f"üìÑ Final label: {final_label}")
    state["result"] = (
        f"‚úÖ Final classification: {final_label} "
        f"(confidence: {state['confidence']})"
    )
    print(state["result"])
    return state

# ===============================
# GRAPH
# ===============================
builder = StateGraph(RouterState)

builder.add_node("classify", classify_with_confidence)
builder.add_node("confidence_router", confidence_router)
builder.add_node("finalize", finalize)

builder.set_entry_point("classify")
builder.add_edge("classify", "confidence_router")
builder.add_edge("confidence_router", "finalize")
builder.add_edge("finalize", END)

graph = builder.compile()
checkpointer = MemorySaver()
graph = builder.compile(checkpointer=checkpointer)

# ===============================
# RUN (MAY PAUSE)
# ===============================
initial_state = {
    "userInput": "Something",
    "label": None,
    "confidence": 0.0,
    "humanOverride": None,
    "result": None
}

config = {"configurable": {"thread_id": "approval-123"}}
state = graph.invoke(initial_state, config=config)
print("‚è∏Ô∏è If confidence < 0.7, graph paused")
print("paused state", state)
# ===============================
# HUMAN ESCALATION (ONLY IF PAUSED)
# ===============================
# Human reviews and corrects label


final_state = graph.invoke(Command(resume={
    "userInput": state.get("userInput"),
    "label": state.get("label"),
    "confidence": state.get("confidence"),
    "humanOverride": "billing_issue",
    "result": "Overrided"
}), config=config)
print(final_state)


üß† LLM classified as general_query 
   with confidence 0.5
üîç Checking confidence...
‚ö†Ô∏è Confidence below threshold, pausing for human review.
‚è∏Ô∏è If confidence < 0.7, graph paused
paused state {'userInput': 'Something', 'label': 'general_query', 'confidence': 0.5, 'humanOverride': None, 'result': None, '__interrupt__': [Interrupt(value={'type': 'low_confidence', 'message': 'LLM confidence below threshold', 'predictedLabel': 'general_query', 'confidence': 0.5, 'input': 'Something'}, id='4b7d93063ae1fa1de052c5080ee1f0dd')]}
üîç Checking confidence...
‚ö†Ô∏è Confidence below threshold, pausing for human review.
‚ñ∂Ô∏è Resuming after human review with updated state {'userInput': 'Something', 'label': 'general_query', 'confidence': 0.5, 'humanOverride': 'billing_issue', 'result': 'Overrided'}
‚úÖ Finalizing classification...
üìÑ Final label: billing_issue
‚úÖ Final classification: billing_issue (confidence: 0.5)
{'userInput': 'Something', 'label': 'general_query', 'confidence':