In [1]:
import os

from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
llm = ChatGroq(
    groq_api_key=os.environ["GROQ_API_KEY"],
    model_name="llama-3.3-70b-versatile",
    temperature=0
)

In [3]:
pdf_path = r"C:\Users\amanm\Desktop\learning\pageindex-exp\RLM.pdf"

loader = PyPDFLoader(pdf_path)
documents = loader.load()

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

split_docs = text_splitter.split_documents(documents)

In [5]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

  embeddings = HuggingFaceEmbeddings(
Loading weights: 100%|██████████| 103/103 [00:00<00:00, 707.64it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [6]:
vectorstore = FAISS.from_documents(split_docs, embeddings)

retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

In [7]:
def classical_rag(question):

    retrieved_docs = retriever.invoke(question)

    context = "\n\n".join([doc.page_content for doc in retrieved_docs])

    prompt = f"""
Answer the question based on the context:

Question: {question}
Context: {context}

Provide a clear, concise answer based only on the context provided.
"""

    response = llm.invoke([HumanMessage(content=prompt)])

    return {
        "answer": response.content,
        "num_chunks_used": len(retrieved_docs),
        "source_pages": [doc.metadata.get("page", None) for doc in retrieved_docs]
    }

In [12]:
question = "How does fine-tuning Qwen3-8B improve its performance as an RLM, and what does this suggest about training models specifically for recursive reasoning?"

result = classical_rag(question)

print("\nANSWER:\n")
print(result["answer"])

print("\nChunks Used:", result["num_chunks_used"])
print("Source Pages:", result["source_pages"])


ANSWER:

Fine-tuning Qwen3-8B as an RLM improves its performance by 28.3% on average, suggesting that training models specifically for recursive reasoning can significantly enhance their ability to manipulate the REPL and launch recursive calls, even when applied to unrelated tasks. This implies that targeted fine-tuning can make models more effective in recursive reasoning tasks, leading to better decision-making and reduced inference costs.

Chunks Used: 5
Source Pages: [4, 13, 23, 0, 6]
