In [1]:
import os
import json
import openai
import asyncio
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
import pageindex.utils as utils 

load_dotenv()

OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open("RLM_tree.json", "r", encoding="utf-8") as f:
    tree_data = json.load(f)

In [3]:
async def call_llm(prompt: str, model: str, reasoning: bool, temperature: float = 0.0):
    if reasoning:
        client = openai.AsyncOpenAI(
            api_key=os.getenv("OPENROUTER_API_KEY"),
            base_url="https://openrouter.ai/api/v1"
        )

        response = await client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature
        )

        return response.choices[0].message.content.strip()
    else:
        groq_llm = ChatGroq(
            groq_api_key=os.getenv("GROQ_API_KEY"),
            model_name=model,   # "openai/gpt-oss-120b" for for generating the answer
            temperature=temperature
        )

        response = await groq_llm.ainvoke(
            [HumanMessage(content=prompt)]
        )

        return response.content.strip()

In [4]:
node_map = utils.create_node_mapping(tree_data)

In [5]:
async def ask(query: str) -> str:
    tree_without_text = utils.remove_fields(node_map, fields=['text'])

    search_prompt = f"""
You are given a question and a tree structure of a document.
Each node contains a node id, node title, and a corresponding summary.
Your task is to find all nodes that are likely to contain the answer to the question.

Question: {query}

Document tree structure:
{json.dumps(tree_without_text, indent=2)}

Please reply in the following JSON format:
{{
    "thinking": "<Your thinking process on which nodes are relevant to the question>",
    "node_list": ["node_id_1", "node_id_2", ..., "node_id_n"]
}}
Directly return the final JSON structure. Do not output anything else.
"""
    "stepfun/step-3.5-flash:free"
    tree_search_result = await call_llm(search_prompt, reasoning=True, model="stepfun/step-3.5-flash:free")

    try:
        tree_search_result_json = json.loads(tree_search_result)
    except json.JSONDecodeError:
        return "Sorry, I couldn't process that query. Please try again."
    
    print("\nReasoning Process:")
    utils.print_wrapped(tree_search_result_json.get("thinking", ""))

    # Print retrieved nodes
    node_list = tree_search_result_json.get("node_list", [])
    print("\nRetrieved Nodes:")
    for node_id in node_list:
        node = node_map.get(node_id)
        if node:
            page = node.get("page_index", f"{node.get('start_index', '?')}-{node.get('end_index', '?')}")
            print(f"  Node ID: {node['node_id']}\t Page: {page}\t Title: {node['title']}")

    # Extract relevant context
    relevant_content = "\n\n".join(
        node_map[nid]["text"] for nid in node_list if nid in node_map and "text" in node_map[nid]
    )

    if not relevant_content:
        return "No relevant sections found in the document."

    print("\nRetrieved Context (preview):")
    #utils.print_wrapped(relevant_content[:1000] + "...")

    answer_prompt = f"""
Answer the question based on the context:

Question: {query}
Context: {relevant_content}

Provide a clear, concise answer based only on the context provided.
"""
    answer = await call_llm(answer_prompt, model="llama-3.3-70b-versatile", reasoning=False)
    return answer


In [6]:
async def main():
    print("\n" + "=" * 60)
    print("  PageIndex Document Chatbot")
    print("  Ask any question about the document.")
    print('  Type "exit" to quit.')
    print("=" * 60)

    while True:
        print()
        query = input("You: ").strip()
        if not query:
            continue
        if query.lower() == "exit":
            print("Goodbye!")
            break

        answer = await ask(query)
        print(f"\nBot: {answer}")


In [7]:
await main()


  PageIndex Document Chatbot
  Ask any question about the document.
  Type "exit" to quit.


Reasoning Process:
The question asks about the problem Recursive Language Models (RLMs) aim to solve and how they
differ from context compaction approaches. Based on the document summaries: Node 0000 (Abstract)
introduces RLMs as a paradigm for processing arbitrarily long prompts, highlighting their ability to
exceed standard context windows and outperform existing methods. Node 0001 (Introduction) explicitly
states the problem: limitations of LLMs with small context windows and context rot, and contrasts
RLMs with context condensation and prior agent architectures. Node 0002 (Recursive Language Models)
explains RLMs' approach of treating prompts as an external environment and contrasts them with
scaffolds that copy prompts into context or generate output directly, both limited by context size.
Node 0007 (Related Works) categorizes long-context approaches, mentions lossy context management
(li