In [None]:
!pip install langchain llama_index openai
!pip install -qU langchain-openai

Collecting llama_index
  Downloading llama_index-0.12.32-py3-none-any.whl.metadata (12 kB)
Collecting llama-index-agent-openai<0.5.0,>=0.4.0 (from llama_index)
  Downloading llama_index_agent_openai-0.4.6-py3-none-any.whl.metadata (727 bytes)
Collecting llama-index-cli<0.5.0,>=0.4.1 (from llama_index)
  Downloading llama_index_cli-0.4.1-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-core<0.13.0,>=0.12.32 (from llama_index)
  Downloading llama_index_core-0.12.32-py3-none-any.whl.metadata (2.6 kB)
Collecting llama-index-embeddings-openai<0.4.0,>=0.3.0 (from llama_index)
  Downloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl.metadata (684 bytes)
Collecting llama-index-indices-managed-llama-cloud>=0.4.0 (from llama_index)
  Downloading llama_index_indices_managed_llama_cloud-0.6.11-py3-none-any.whl.metadata (3.6 kB)
Collecting llama-index-llms-openai<0.4.0,>=0.3.0 (from llama_index)
  Downloading llama_index_llms_openai-0.3.38-py3-none-any.whl.metadata (3.3 kB)
Colle

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# text_extractor.py

# Install dependencies if needed:
!pip install python-docx PyPDF2

import os
from docx import Document
from PyPDF2 import PdfReader

INPUT_DIR = "/content/drive/MyDrive/airaChallenge/wireless_documents"
OUTPUT_DIR = "/content/drive/MyDrive/airaChallenge/docs"

os.makedirs(OUTPUT_DIR, exist_ok=True)

def extract_docx(path: str) -> str:
    doc = Document(path)
    return "\n".join(para.text for para in doc.paragraphs)

def extract_pdf(path: str) -> str:
    reader = PdfReader(path)
    texts = []
    for page in reader.pages:
        text = page.extract_text()
        if text:
            texts.append(text)
    return "\n".join(texts)

def main():
    for filename in os.listdir(INPUT_DIR):
        base, ext = os.path.splitext(filename)
        ext = ext.lower()
        input_path = os.path.join(INPUT_DIR, filename)
        output_path = os.path.join(OUTPUT_DIR, f"{base}.txt")

        if ext == ".docx":
            text = extract_docx(input_path)
        elif ext == ".pdf":
            text = extract_pdf(input_path)
        elif ext == ".txt":
            with open(input_path, "r", encoding="utf-8") as f:
                text = f.read()
        else:
            print(f"Skipping unsupported file: {filename}")
            continue

        with open(output_path, "w", encoding="utf-8") as f:
            f.write(text)
        print(f"Extracted {filename} → {base}.txt")

if __name__ == "__main__":
    main()


Skipping unsupported file: 38201-000.doc
Skipping unsupported file: 38201-001.doc
Skipping unsupported file: 38.201_v0.0.2.doc
Extracted R1-1713894.docx → R1-1713894.txt
Skipping unsupported file: 38201-010.doc
Skipping unsupported file: 38.201_v0.1.1.doc
Skipping unsupported file: 38201-100.doc
Skipping unsupported file: R1-1719242 38.201_v1.0.1.doc
Skipping unsupported file: 38201-110.doc
Skipping unsupported file: 38201-120.doc
Skipping unsupported file: 38201-120cm.doc
Skipping unsupported file: 38201-f00.doc
Skipping unsupported file: 38201-g00.doc
Skipping unsupported file: 38201-h00.doc
Skipping unsupported file: 38201-i00.doc


In [None]:
# toy_agent.py

import os
import json
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from langchain_openai import ChatOpenAI
from langchain.agents import initialize_agent, Tool
from llama_index.core.schema import Document
os.environ["OPENAI_API_KEY"] = 'YOUR_API_KEY'

# 2. Load documents (place your .txt specs in a folder named 'docs/')
docs = SimpleDirectoryReader("/content/drive/MyDrive/airaChallenge/docs").load_data()
index = VectorStoreIndex.from_documents(docs)

# 3. Define a tool that queries the index
def query_docs(query: str) -> str:
    response = index.as_query_engine().query(query)
    return str(response)

tools = [
    Tool(
        name="doc_query",
        func=query_docs,
        description="Query telecom spec documents for relevant snippets"
    )
]

# 4. Initialize the LangChain agent with the tool
#    Make sure OPENAI_API_KEY is set in your environment
llm = ChatOpenAI(temperature=0)
agent = initialize_agent(
    tools, llm, agent="zero-shot-react-description", verbose=True
)

# 5. Simple CLI loop that prints JSON results
def main():
    print("🔍 Toy RAG Agent CLI (type 'exit' to quit)")
    while True:
        q = input("\nAsk a telecom question: ")
        if q.lower() in ("exit", "quit"):
            print("Goodbye!")
            break
        answer = agent.run(q)
        output = {"question": q, "answer": answer}
        print(json.dumps(output, indent=2))

if __name__ == "__main__":
    main()


  agent = initialize_agent(


🔍 Toy RAG Agent CLI (type 'exit' to quit)

Ask a telecom question: exit
Goodbye!


In [None]:
with open("/content/drive/MyDrive/airaChallenge/data/train_data.json", "r") as f:
    data = json.load(f)

# 3. Convert each QA pair into a Document for LlamaIndex
docs = []
for entry in data:
    # Combine question, options, and explanation into one text blob
    content = entry["question"] + "\n" + \
              "\n".join(f"{k}: {v}" for k, v in entry.items()
                        if k.startswith("option")) + "\n\n" + \
              "Explanation: " + entry["explanation"]
    meta = {"answer": entry["answer"], "category": entry["category"]}
    docs.append(Document(text=content, metadata=meta))

# 4. Build a small vector index
index = VectorStoreIndex.from_documents(docs)

# 5. Turn it into a LangChain “Tool”
def query_teleqna(q: str) -> str:
    # retrieves the most similar QA doc and returns its explanation + correct answer
    resp = index.as_query_engine().query(q)
    return str(resp)

qa_tool = Tool(
    name="teleqna_lookup",
    func=query_teleqna,
    description="Looks up Telecom Q&A from the TeleQnA dataset"
)

# 6. Initialize your agent with both this QnA tool and any others (e.g., your 3GPP RAG tool)
llm = ChatOpenAI(temperature=0)  # make sure OPENAI_API_KEY is set
agent = initialize_agent(
    [qa_tool],  # you can add more tools here
    llm,
    agent="zero-shot-react-description",
    verbose=True
)

# 7. Single‑loop CLI to test
if __name__ == "__main__":
    while True:
        q = input("Ask me a telecom question (or 'exit'): ")
        if q.lower() == "exit":
            break
        print(agent.run(q))

Ask me a telecom question (or 'exit'): What is included in the UE CONTEXT SETUP REQUEST message in the Intra-CU topology adaptation procedure in SA? [3GPP Release 17]


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use the teleqna_lookup tool to find the answer to this specific question.
Action: teleqna_lookup
Action Input: UE CONTEXT SETUP REQUEST message in the Intra-CU topology adaptation procedure in SA 3GPP Release 17[0m
Observation: [36;1m[1;3mThe UE CONTEXT SETUP REQUEST message in the Intra-CU topology adaptation procedure in SA 3GPP Release 17 includes a HandoverPreparationInformation.[0m
Thought:[32;1m[1;3mI need to continue looking up more details to fully understand the contents of the UE CONTEXT SETUP REQUEST message.
Action: teleqna_lookup
Action Input: HandoverPreparationInformation in UE CONTEXT SETUP REQUEST message[0m
Observation: [36;1m[1;3mThe HandoverPreparationInformation message in the UE CONTEXT SETUP REQUEST message includes the N