# NUS DSA Module Planning Assistant — API Tooling

This notebook wires up the NUS Data Science & Analytics planning chatbot with the API tooling described in the project specification.

It focuses on the LangGraph agent shell and the NUSMods REST API tools. Context retrieval tools will be integrated later.

Use the annotated sections below to understand how each component contributes to the end-to-end conversation loop—starting from HTTP requests, through LangChain tool wrappers, and finally into the LangGraph state machine that powers the chat experience.


In [None]:
# Core language features and typing helpers
from __future__ import annotations

# Logging keeps the API interactions transparent when debugging
import logging
from typing import Any, Dict, Iterable, List, Optional

# Third-party dependencies for HTTP access and the LangChain/LangGraph stack
import requests
from langchain_core.tools import tool
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
from langchain_ollama.chat_models import ChatOllama
from langgraph.graph import START, MessagesState, StateGraph
from langgraph.prebuilt import ToolNode, tools_condition


In [None]:
# Configure a module-specific logger so network activity is easy to inspect.
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("nusmods")


class NusModsClient:
    """Thin wrapper around the NUSMods v2 API with simple response caching."""

    def __init__(self, default_acad_year: str = "2025-2026"):
        # Default AY so every tool can omit it unless the user specifies otherwise.
        self.default_acad_year = default_acad_year
        # Single `Session` reuses TCP connections for faster repeated calls.
        self._session = requests.Session()
        # In-memory caches prevent duplicate requests when the same module is referenced.
        self._module_cache: Dict[str, Dict[str, Any]] = {}
        self._module_list_cache: Dict[str, List[Dict[str, Any]]] = {}

    def _year_base(self, acad_year: Optional[str]) -> str:
        year = acad_year or self.default_acad_year
        return f"https://api.nusmods.com/v2/{year}"

    def _normalise_code(self, module_code: str) -> str:
        code = (module_code or "").strip().upper()
        if not code:
            raise ValueError("module_code is required")
        return code

    def module(self, module_code: str, acad_year: Optional[str] = None) -> Dict[str, Any]:
        # Fetch the canonical module JSON payload (and memoise it).
        code = self._normalise_code(module_code)
        year = acad_year or self.default_acad_year
        cache_key = f"{year}:{code}"
        if cache_key not in self._module_cache:
            url = f"{self._year_base(year)}/modules/{code}.json"
            response = self._session.get(url)
            response.raise_for_status()
            self._module_cache[cache_key] = response.json()
        return self._module_cache[cache_key]

    def module_list(self, acad_year: Optional[str] = None) -> List[Dict[str, Any]]:
        # Entire catalogue dump (cached) that powers keyword search.
        year = acad_year or self.default_acad_year
        if year not in self._module_list_cache:
            url = f"{self._year_base(year)}/moduleList.json"
            response = self._session.get(url)
            response.raise_for_status()
            self._module_list_cache[year] = response.json()
        return self._module_list_cache[year]

    def search_modules(
        self,
        query: str,
        acad_year: Optional[str] = None,
        level: Optional[int] = None,
        limit: int = 10,
    ) -> List[Dict[str, Any]]:
        # Basic keyword search across module codes and titles with optional level filtering.
        query_lower = (query or "").strip().lower()
        if not query_lower:
            raise ValueError("query must be a non-empty string")

        matches: List[Dict[str, Any]] = []
        for mod in self.module_list(acad_year):
            if level is not None:
                code = mod.get("moduleCode", "")
                if len(code) >= 3 and code[2].isdigit():
                    if int(code[2]) != int(level):
                        continue
                else:
                    continue
            if query_lower in mod.get("moduleCode", "").lower() or query_lower in mod.get("title", "").lower():
                matches.append(mod)
            if len(matches) >= limit:
                break
        return matches

    def module_timetable(
        self,
        module_code: str,
        acad_year: Optional[str] = None,
        semester: Optional[int] = None,
    ) -> List[Dict[str, Any]]:
        # Pull the semester-by-semester timetable blocks for a module.
        data = self.module(module_code, acad_year)
        semester_data = data.get("semesterData", [])
        if semester is None:
            return semester_data
        return [sem for sem in semester_data if sem.get("semester") == semester]


# Shared client instance reused across all LangChain tools.
client = NusModsClient()


## LangChain tool wrappers

Each decorated function below exposes a focused slice of the NUSMods API.
They are intentionally lightweight so the language model can call them with the
exact parameters needed to answer a student query.


In [None]:
@tool
def nusmods_module_overview(module_code: str, acad_year: Optional[str] = None) -> Dict[str, Any]:
    """Retrieve the canonical module payload for course planning questions.

    Use this tool whenever you need authoritative facts about a module, such as its
    title, MC value, summary description, faculty ownership, or the semesters when
    it typically runs. The response is trimmed for conversational use so the model
    does not need to wade through the full API schema.

    Inputs:
    - module_code (str): NUS module code like "CS3244". Required.
    - acad_year (str, optional): Academic year in "YYYY-YYYY" format. Defaults
      to the client's configured year when omitted.

    Output (dict):
    - moduleCode: Normalised module code.
    - title: Official module title.
    - description: Module synopsis text.
    - moduleCredit: MC value as a string.
    - faculty / department: Owning faculty metadata.
    - semesterData: List of semesters with exam date and lesson types offered.
    - prerequisite / preclusion / fulfillRequirements: Relationship metadata that
      helps answer follow-up eligibility questions.
    """
    # The client fetches and trims the verbose payload to the pieces planners need.
    data = client.module(module_code, acad_year)
    return {
        "moduleCode": data.get("moduleCode"),
        "title": data.get("title"),
        "description": data.get("description"),
        "moduleCredit": data.get("moduleCredit"),
        "faculty": data.get("faculty"),
        "department": data.get("department"),
        "semesterData": [
            {
                "semester": sem.get("semester"),
                "examDate": sem.get("examDate"),
                "lessonTypes": sorted({lesson.get("lessonType") for lesson in sem.get("timetable", []) if lesson.get("lessonType")}),
            }
            for sem in data.get("semesterData", [])
        ],
        "prerequisite": data.get("prerequisite"),
        "preclusion": data.get("preclusion"),
        "fulfillRequirements": data.get("fulfillRequirements"),
    }


@tool
def nusmods_module_prerequisites(module_code: str, acad_year: Optional[str] = None) -> Dict[str, Any]:
    """Surface prerequisite, preclusion, and fulfilment data for a module.

    Use this tool when the student is checking eligibility, dependency chains,
    or which later modules list the target as a prerequisite. The schema narrows
    the NUSMods payload down to the relationship fields that matter for advising.

    Inputs:
    - module_code (str): NUS module code being evaluated. Required.
    - acad_year (str, optional): Academic year in "YYYY-YYYY" format. Defaults to
      the client's configured year when omitted.

    Output (dict):
    - moduleCode: Normalised module code.
    - title: Module title for context in the response.
    - prerequisite: Human-readable prerequisite description.
    - prerequisiteTree: Structured prerequisite tree usable for reasoning.
    - fulfillRequirements: List of modules that accept this module as fulfilment.
    - preclusion / corequisite: Additional relationship metadata to mention if relevant.
    """
    # Keep the focus on dependency-related keys so the agent can reason about eligibility.
    data = client.module(module_code, acad_year)
    return {
        "moduleCode": data.get("moduleCode"),
        "title": data.get("title"),
        "prerequisite": data.get("prerequisite"),
        "prerequisiteTree": data.get("prerequisiteTree"),
        "fulfillRequirements": data.get("fulfillRequirements"),
        "preclusion": data.get("preclusion"),
        "corequisite": data.get("corequisite"),
    }


@tool
def nusmods_module_timetable(
    module_code: str,
    acad_year: Optional[str] = None,
    semester: Optional[int] = None,
    limit_lessons: Optional[int] = 20,
) -> Dict[str, Any]:
    """Summarise the module timetable across semesters and lesson groupings.

    Call this tool when the question involves class availability, lesson timings,
    or exam dates for a specific semester. It can optionally filter to one semester
    and clamps the number of raw lesson rows so the response stays manageable.

    Inputs:
    - module_code (str): NUS module code to inspect. Required.
    - acad_year (str, optional): Academic year in "YYYY-YYYY" format. Defaults to
      the client's configured year when omitted.
    - semester (int, optional): Semester number (1 or 2). When omitted, returns all
      semesters available in the academic year.
    - limit_lessons (int, optional): Maximum timetable rows to include per semester.
      Use a smaller value if the schedule is extremely long.

    Output (dict):
    - moduleCode: Normalised module code.
    - acadYear: Academic year used for the lookup.
    - semesterData: List with one entry per semester that contains:
        * semester: Semester number.
        * lessons: Timetable entries with class number, activity type, day, and time.
    """
    # Normalise the nested structure into a concise dictionary for the LLM.
    semester_data = client.module_timetable(module_code, acad_year, semester)
    shaped: List[Dict[str, Any]] = []
    for sem in semester_data:
        lessons = sem.get("timetable", [])
        if limit_lessons is not None:
            lessons = lessons[:limit_lessons]
        shaped.append({
            "semester": sem.get("semester"),
            "lessons": lessons,
        })
    return {
        "moduleCode": client._normalise_code(module_code),
        "acadYear": acad_year or client.default_acad_year,
        "semesterData": shaped,
    }


@tool
def nusmods_module_search(
    query: str,
    acad_year: Optional[str] = None,
    level: Optional[int] = None,
    limit: int = 10,
) -> Dict[str, Any]:
    """Locate modules by keyword, optionally filtered by level, for discovery tasks.

    Trigger this tool when the student is exploring potential electives, looking
    for modules that match a theme, or asking for options at a specific level.
    The search is performed client-side against the cached module list so repeat
    queries remain responsive within a notebook session.

    Inputs:
    - query (str): Keyword to match against module codes and titles. Required.
    - acad_year (str, optional): Academic year in "YYYY-YYYY" format. Defaults to
      the client's configured year when omitted.
    - level (int, optional): Restrict results to a numeric module level (e.g. 1, 2, 3).
    - limit (int, optional): Maximum number of results to return. Defaults to 10.

    Output (dict):
    - query: Echo of the original search term.
    - acadYear: Academic year used for the search.
    - count: Number of modules returned.
    - results: List of module summaries, each containing moduleCode, title, and MCs.
    """
    # Search reuses the cached module list for responsive autocomplete-style experiences.
    matches = client.search_modules(query, acad_year, level=level, limit=limit)
    return {
        "query": query,
        "acadYear": acad_year or client.default_acad_year,
        "count": len(matches),
        "results": [
            {
                "moduleCode": mod.get("moduleCode"),
                "title": mod.get("title"),
                "moduleCredit": mod.get("moduleCredit"),
            }
            for mod in matches
        ],
    }


API_TOOLS = [
    nusmods_module_overview,
    nusmods_module_prerequisites,
    nusmods_module_timetable,
    nusmods_module_search,
]


## LLM configuration

The assistant is powered by an Ollama-served Qwen 3 14B model with reasoning enabled.
We bind the previously defined tools so the model can call them via function-calling.


In [None]:
# Instantiate the local Ollama model and attach the tool schema.
llm = ChatOllama(
    model="qwen3:14b",
    temperature=0.2,
    num_predict=256,
    reasoning=True,
    validate_model_on_init=True,
)

# Tool binding enables structured tool-calling responses.
llm_with_tools = llm.bind_tools(API_TOOLS)

# System prompt keeps the agent grounded in planning responsibilities.
system_prompt = SystemMessage(
    content=(
        "You are an academic planning assistant for the NUS Data Science & Analytics major. "
        "Always review the full chat history so follow-up questions stay consistent. "
        "Use a private chain-of-thought to break complex requests into sub-questions, plan the tool-call sequence, and call multiple tools when needed before answering. "
        "If a student's question is ambiguous or missing critical details, ask for clarification before committing to a tool plan. "
        "Ground every module fact in the provided NUSMods API tools and cross-check conflicting data. "
        "If a question falls outside academic planning, politely steer the student back to relevant topics. "
        "If a module cannot be located, apologise and suggest verifying the code or academic year, "
        "and if the tools cannot answer, explain the limitation instead of guessing."
    )
)



## LangGraph conversation loop

This section wires the language model and the toolset into a two-node graph:
the assistant decides whether to answer directly or call a tool, and the tool
node executes the request before control returns to the model.


In [None]:
# LangGraph node that calls the LLM with the accumulated chat history.
def assistant(state: MessagesState):
    return {"messages": [llm_with_tools.invoke([system_prompt] + state["messages"])]}


builder = StateGraph(MessagesState)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(API_TOOLS))

# assistant -> tool (conditional) -> assistant forms a loop until no more tool calls are required.
builder.add_edge(START, "assistant")
builder.add_conditional_edges("assistant", tools_condition)
builder.add_edge("tools", "assistant")


graph = builder.compile()


## Chat helpers for notebook exploration

The utilities below maintain a bounded conversation state, stream intermediate
tool traces, and expose helper functions so you can quickly test prompts.


In [None]:
# Keep a rolling chat history so the agent has short-term memory.
MAX_HISTORY = 20
chat_state: Dict[str, Any] = {"messages": []}


def _trim(messages: Iterable[Any]) -> List[Any]:
    """Limit the stored history so notebook experiments stay lightweight."""
    return list(messages)[-MAX_HISTORY:]


def _msg_type(message: Any) -> str:
    """Pretty-print helper for LangChain message objects."""
    return getattr(message, "type", message.__class__.__name__).upper()


def _msg_text(message: Any) -> str:
    """Extract message body, handling ToolMessage payload differences."""
    if isinstance(message, ToolMessage):
        return str(message.content)
    return getattr(message, "content", str(message))


def _msg_metadata(message: Any) -> Dict[str, Any]:
    """Collect any auxiliary metadata attached to LangChain messages."""
    metadata: Dict[str, Any] = {}
    additional = getattr(message, "additional_kwargs", None)
    if additional:
        metadata["additional_kwargs"] = additional
    response_meta = getattr(message, "response_metadata", None)
    if response_meta:
        metadata["response_metadata"] = response_meta
    tool_calls = getattr(message, "tool_calls", None)
    if tool_calls:
        metadata["tool_calls"] = tool_calls
    return metadata


def reset_chat() -> None:
    """Clear the global chat state to restart the conversation."""
    global chat_state
    chat_state = {"messages": []}
    print("Chat state reset.")


def ask(prompt: str, show_trace: bool = True, developer_view: bool = False) -> None:
    """Submit a user message, optionally printing the intermediate LangGraph trace and diagnostics."""
    global chat_state

    show_trace = show_trace or developer_view
    history = _trim(chat_state["messages"] + [HumanMessage(content=prompt)])

    if developer_view:
        print("=== Developer View: Model Input ===")
        for idx, msg in enumerate([system_prompt] + history, start=1):
            print(f"{idx:02d}. [{_msg_type(msg)}] {_msg_text(msg)}")
            metadata = _msg_metadata(msg)
            if metadata:
                print(f"    metadata: {metadata}")
        print("=" * 40)

    last_len = len(history)
    final_state = None

    if show_trace:
        print("=== Stream trace ===")

    for state in graph.stream({"messages": history}, stream_mode="values"):
        msgs = state["messages"]
        new_msgs = msgs[last_len:]
        if show_trace and new_msgs:
            for msg in new_msgs:
                print(f"[{_msg_type(msg)}] {_msg_text(msg)}")
                if developer_view:
                    metadata = _msg_metadata(msg)
                    if metadata:
                        print(f"    metadata: {metadata}")
                print("-" * 40)
        last_len = len(msgs)
        final_state = state

    if final_state is not None:
        chat_state = final_state
        chat_state["messages"] = _trim(chat_state["messages"])

    if developer_view:
        print("=== Developer View: Stored Chat State ===")
        for idx, msg in enumerate(chat_state["messages"], start=1):
            print(f"{idx:02d}. [{_msg_type(msg)}] {_msg_text(msg)}")
            metadata = _msg_metadata(msg)
            if metadata:
                print(f"    metadata: {metadata}")
        print("=" * 40)

    for msg in reversed(chat_state["messages"]):
        if isinstance(msg, AIMessage):
            print(msg.content)
            break



## Example runs

Use these sample prompts to sanity-check the tool orchestration.


In [None]:
# Start from a clean slate so repeated execution produces the full trace.
reset_chat()
ask("What are the key details for CS3244?")


In [None]:
# Explore timetable availability by semester.
ask("Show me the timetable options for ST3131 in semester 1.")


In [None]:
# Run a catalogue search filtered to level-3000 modules.
ask("Find level 3 modules about data for the DSA major.")
