## Environment

In [31]:
%pip install python-dotenv
#!pip install -U langchain langchain-openai
%pip install --quiet --upgrade langchain-text-splitters langchain-community langgraph

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


LangSmith setup

In [33]:
# LangSmith setup -- allows us to track what is going on inside our chain or agent
import os
from dotenv import load_dotenv

load_dotenv(".env")

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")

MistralAI LLM and embeddings model setup

In [34]:
pip install -qU "langchain[mistralai]"

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [82]:
load_dotenv(".env")
os.environ["MISTRAL_API_KEY"] = os.getenv("MISTRAL_API_KEY")

from langchain.chat_models import init_chat_model

llm = init_chat_model("mistral-large-latest", model_provider="mistralai")

from langchain_mistralai import MistralAIEmbeddings

embeddings = MistralAIEmbeddings(model="mistral-embed")

  validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)


Vector store setup

In [37]:
pip install -qU langchain-core

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [83]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

## Loading data

In [84]:
import json
from langchain_community.document_loaders import JSONLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document

# Step 1: Load the JSON file
file_path = "singapore_hdb_data.json"

with open(file_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# Step 2: Convert JSON into Documents for Retrieval
documents = []
for entry in data:
    area = entry["area"]
    has_hdb = entry["has_hdb"]
    hdb_price_range = entry["hdb_price_range"]
    pros = entry["pros"]
    cons = entry["cons"]

    # Create a retrievable document
    doc = Document(
        page_content=f"Area: {area}\nHDB Available: {has_hdb}\nHDB Price range: {hdb_price_range}\nPros: {pros}\nCons: {cons}",
        metadata={"Area": area} # identifier for information
    )
    documents.append(doc)

# Step 3: Split text into smaller chunks for better retrieval
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
json_splits = text_splitter.split_documents(documents)

In [87]:
_ = vector_store.add_documents(documents=json_splits)

In [91]:
vector_store.search('Yishun', 'similarity')

[Document(id='efc1fc14-59d7-4e17-a7fb-627b0770c0d0', metadata={'Area': 'Yishun'}, page_content="Area: Yishun\nHDB Available: True\nHDB Price range: SGD 420K - 780K\nPros: ['Affordable compared to other areas', 'Plenty of neighbourhood amenities', 'Close to nature parks like Lower Seletar Reservoir']\nCons: ['Far from city centre', 'Reputation for being less desirable in the past']"),
 Document(id='3e0b1681-233b-42fe-8f94-5a2ac756cbb5', metadata={'Area': 'Woodlands'}, page_content="Area: Woodlands\nHDB Available: True\nHDB Price range: SGD 450K - 800K\nPros: ['Affordable HDB options', 'Family-friendly environment', 'Close to Malaysia via Causeway']\nCons: ['Far from city centre', 'Traffic jams near Causeway']"),
 Document(id='29f3fc8f-e5f2-45ad-9069-f1c0dd4c4dde', metadata={'Area': 'Punggol'}, page_content="Area: Punggol\nHDB Available: True\nHDB Price range: SGD 480K - 850K\nPros: ['Modern new town', 'Scenic waterfront lifestyle', 'Family-oriented with parks and schools']\nCons: ['Stil

## LangGraph implementation

In [108]:
# MessagesState -- keeps track of messages in the convo, eg: { messages: [
# HumanMessage (user input), AIMessage (Vector store query with tool calls), ToolMessage (Retrived docs), AIMessage (Final Answer)
#]}

from langgraph.graph import MessagesState, StateGraph

graph_builder = StateGraph(MessagesState)

In [110]:
# retrieve from documents stored in vector store
from langchain_core.tools import tool

@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Retrieve information related to a query."""
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    # print("\n===== DEBUG: Retrieved Data =====")
    # print(retrieved_docs)
    # print("\n=====")

    return serialized, retrieved_docs

In [111]:
from langchain_core.messages import SystemMessage
from langgraph.prebuilt import ToolNode


# Step 1: Generate an AIMessage that may include a tool-call to be sent - if no tool call, respond directly
def query_or_respond(state: MessagesState):
    """Generate tool call for retrieval or respond."""
    llm_with_tools = llm.bind_tools([retrieve])
    response = llm_with_tools.invoke(state["messages"])
    # MessagesState appends messages to state instead of overwriting
    return {"messages": [response]}


# Step 2: Execute the retrieval.
tools = ToolNode([retrieve])


# Step 3: Generate a response using the retrieved content.
def generate(state: MessagesState):
    """Generate answer."""
    # Get generated ToolMessages
    recent_tool_messages = []
    for message in reversed(state["messages"]):
        if message.type == "tool":
            recent_tool_messages.append(message)
        else:
            break
    tool_messages = recent_tool_messages[::-1]

    # Format into prompt
    docs_content = "\n\n".join(doc.content for doc in tool_messages)
    system_message_content = (
        "You are an assistant for question-answering tasks on the housing market in Singapore."
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        f"{docs_content}"
    )
    conversation_messages = [
        message
        for message in state["messages"]
        if message.type in ("human", "system")
        or (message.type == "ai" and not message.tool_calls)
    ]
    # passes previous human/ai or system messages as context for next response
    prompt = [SystemMessage(system_message_content)] + conversation_messages

    # Run
    response = llm.invoke(prompt)
    return {"messages": [response]}

In [112]:
# Build the graph

from langgraph.graph import END
from langgraph.prebuilt import ToolNode, tools_condition

graph_builder.add_node(query_or_respond)
graph_builder.add_node(tools)
graph_builder.add_node(generate)

graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {END: END, "tools": "tools"},
)
graph_builder.add_edge("tools", "generate")
graph_builder.add_edge("generate", END)

graph = graph_builder.compile()

## Test RAG pipeline

In [106]:
# No tool call required
input_message = "Hello"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


Hello

Hi there! How can I assist you today? 😊


In [113]:
# Tool call required
input_message = "what is there to do in Yishun?"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


what is there to do in Yishun?
Tool Calls:
  retrieve (kASVXfzep)
 Call ID: kASVXfzep
  Args:
    query: things to do in Yishun, Singapore
Name: retrieve

Source: {'Area': 'Yishun'}
Content: Area: Yishun
HDB Available: True
HDB Price range: SGD 420K - 780K
Pros: ['Affordable compared to other areas', 'Plenty of neighbourhood amenities', 'Close to nature parks like Lower Seletar Reservoir']
Cons: ['Far from city centre', 'Reputation for being less desirable in the past']

Source: {'Area': 'Woodlands'}
Content: Area: Woodlands
HDB Available: True
HDB Price range: SGD 450K - 800K
Pros: ['Affordable HDB options', 'Family-friendly environment', 'Close to Malaysia via Causeway']
Cons: ['Far from city centre', 'Traffic jams near Causeway']

In Yishun, you can enjoy **plenty of neighbourhood amenities** like shopping malls (Northpoint City), hawker centres, and sports facilities. It’s also **close to nature parks**, including Lower Seletar Reservoir, for outdoor activities. However, it’s know

## Test Ongoing Conversation

In [114]:
# Test multi-turn conversation with memory
from langchain_core.messages import HumanMessage

# Start with an empty conversation state
conversation_state = {"messages": []}

def continue_conversation(user_input, state):
    """Add user message and get response, maintaining conversation history"""
    # Add user message to existing conversation
    new_message = HumanMessage(content=user_input)
    state["messages"].append(new_message)
    
    # Get response from graph
    result = graph.invoke(state)
    
    # Return updated state with full conversation history
    return result

print("=== CONVERSATION TEST ===\n")

# Turn 1: Ask about Yishun
print("🗣️ User: Tell me about Yishun")
conversation_state = continue_conversation("Tell me about Yishun", conversation_state)
print("🤖 Assistant:", conversation_state["messages"][-1].content)
print("\n" + "="*50 + "\n")

# Turn 2: Follow-up question (should remember context)
print("🗣️ User: What about the housing prices there?")
conversation_state = continue_conversation("What about the housing prices there?", conversation_state)
print("🤖 Assistant:", conversation_state["messages"][-1].content)
print("\n" + "="*50 + "\n")

# Turn 3: Another follow-up
print("🗣️ User: Are there any downsides to living there?")
conversation_state = continue_conversation("Are there any downsides to living there?", conversation_state)
print("🤖 Assistant:", conversation_state["messages"][-1].content)
print("\n" + "="*50 + "\n")

# Show conversation history length
print(f"📈 Total messages in conversation: {len(conversation_state['messages'])}")
print("💬 Message types:", [msg.type for msg in conversation_state['messages']])

=== CONVERSATION TEST ===

🗣️ User: Tell me about Yishun
🤖 Assistant: Yishun offers **affordable HDB flats (SGD 420K–780K)** with **plenty of amenities** and **proximity to nature** (e.g., Lower Seletar Reservoir). However, it’s **far from the city centre** and has a past reputation for being less desirable. It remains a practical choice for budget-conscious buyers seeking a well-equipped neighbourhood.


🗣️ User: What about the housing prices there?
🤖 Assistant: In **Yishun**, the **HDB (public housing) flats** are priced between **SGD 420,000 and SGD 780,000**, depending on factors like:

- **Flat type** (e.g., 3-room, 4-room, 5-room, or executive flats).
- **Location within Yishun** (proximity to amenities, MRT stations, or nature spots).
- **Age of the flat** (newer developments tend to be pricier).
- **Renovation and condition** of the unit.

Yishun is considered **relatively affordable** compared to other areas in Singapore, especially those closer to the city centre. Would you l

In [None]:
# Interactive conversation simulator
def chat_session():
    """Simulate an interactive chat session"""
    conversation = {"messages": []}
    
    print("🤖 RAG Chatbot: Hello! Ask me about Singapore housing areas. Type 'quit' to exit.\n")
    
    while True:
        user_input = input("🗣️ You: ")
        
        if user_input.lower() in ['quit', 'exit', 'bye']:
            print("🤖 RAG Chatbot: Goodbye!")
            break
            
        # Add user message
        conversation["messages"].append(HumanMessage(content=user_input))
        
        # Get bot response
        try:
            result = graph.invoke(conversation)
            conversation = result
            
            # Print bot response
            bot_response = conversation["messages"][-1].content
            print(f"🤖 RAG Chatbot: {bot_response}\n")
            
        except Exception as e:
            print(f"❌ Error: {e}\n")
    
    print(f"\n📊 Conversation Summary:")
    print(f"Total messages: {len(conversation['messages'])}")
    print(f"Message types: {[msg.type for msg in conversation['messages']]}")

# Uncomment the line below to start interactive chat
chat_session()