# Basic RAG Demonstration


## 1. Loading Data


In [1]:
import sys
import time
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve()
if PROJECT_ROOT.name == 'notebooks':
    PROJECT_ROOT = PROJECT_ROOT.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

In [None]:
from src.load_data import load_markdown_files
from src.chunking import chunk_documents
from src.embedder import generate_embeddings, generate_query_embedding
from src.retriever import retrieve
from src.llm_orchestrator import generate_answer

DATA_DIR = PROJECT_ROOT / 'data' / 'raw'
docs = load_markdown_files(str(DATA_DIR))
print(f'Loaded {len(docs)} documents from {DATA_DIR}.')
if docs:
    preview_doc = docs[0]
    preview_text = preview_doc.get('text', '')
    meta = {k: v for k, v in preview_doc.items() if k != 'text'}
    print('Sample metadata:', meta)
    print('Preview snippet:', preview_text[:300], '...')

Loaded 25 documents from C:\Users\tomasz.makowski.2\Desktop\SemesterII\ComputationalIntelligence\Project\agentic-rag-architectures\data\raw.
Sample metadata: {'id': 'a3f35897-f7f7-4c67-83cb-97e4c09053a4', 'filename': 'D01.md', 'path': 'C:\\Users\\tomasz.makowski.2\\Desktop\\SemesterII\\ComputationalIntelligence\\Project\\agentic-rag-architectures\\data\\raw\\D01.md'}
Preview snippet: # The Foundations of Transformation, 2025–2040

As a historian writing from the late encore of the twenty-first century, I am tasked with tracing the undercurrents that made the Polish century of transformation legible to later generations. The period between 2025 and 2040, though often treated as a ...


## 2. Chunking


In [3]:
chunks = chunk_documents(docs, chunk_size=400, overlap=50)
print(f'Generated {len(chunks)} chunks.')
for chunk in chunks[:3]:
    snippet = chunk.get('text', '')[:150].replace('\n', ' ')
    print({'chunk_id': chunk.get('chunk_id'), 'metadata': chunk.get('metadata', {}), 'snippet': snippet})


[32m[2025-12-09 12:39:56][INFO][src.chunking] Saved 162 chunks to ..\data\processed\chunks.json[0m


Generated 162 chunks.
{'chunk_id': '327b374b-0509-45a0-8c7e-3165fe75435b', 'metadata': {}, 'snippet': '# The Foundations of Transformation, 2025–2040 As a historian writing from the late encore of the twenty-first century, I am tasked with tracing the u'}
{'chunk_id': '8f0812a2-1891-41bc-867e-8f4c072287e0', 'metadata': {}, 'snippet': 'insight of the time was simple and counterintuitive: digital systems do not merely increase efficiency; they reframe political contingency. When data '}
{'chunk_id': '4c8c7b5a-c4e8-4891-a78c-48f856e1ac50', 'metadata': {}, 'snippet': 'success of the modern administrative state rested on a tacit social contract: the state would govern with greater precision in exchange for a more edu'}


## 3. Embedding


In [4]:
embeddings, index_map = generate_embeddings(chunks, provider='openai')
print('Embeddings shape:', embeddings.shape)
print('Index map sample:', list(index_map.items())[:3])


[32m[2025-12-09 12:39:58][INFO][src.embedder] EMBED | start | chunks=162[0m
[32m[2025-12-09 12:40:00][INFO][httpx] HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"[0m
[32m[2025-12-09 12:40:00][INFO][src.embedder] EMBED | processed batch 1/11 (16 chunks)[0m
[32m[2025-12-09 12:40:01][INFO][httpx] HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"[0m
[32m[2025-12-09 12:40:01][INFO][src.embedder] EMBED | processed batch 2/11 (16 chunks)[0m
[32m[2025-12-09 12:40:02][INFO][httpx] HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"[0m
[32m[2025-12-09 12:40:03][INFO][src.embedder] EMBED | processed batch 3/11 (16 chunks)[0m
[32m[2025-12-09 12:40:03][INFO][httpx] HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"[0m
[32m[2025-12-09 12:40:03][INFO][src.embedder] EMBED | processed batch 4/11 (16 chunks)[0m
[32m[2025-12-09 12:40:04][INFO][httpx] HTTP Request: POST https://api.openai.com/

Embeddings shape: (162, 1536)
Index map sample: [('327b374b-0509-45a0-8c7e-3165fe75435b', 0), ('8f0812a2-1891-41bc-867e-8f4c072287e0', 1), ('4c8c7b5a-c4e8-4891-a78c-48f856e1ac50', 2)]


## 4. Retrieval


In [5]:
query = 'How does acceleration in sprinting work?'
query_embedding = generate_query_embedding(query, provider='openai')
retrieved = retrieve(query_embedding, embeddings, index_map, chunks, k=5, threshold=0.5)
print('Retrieved chunks:')
for row in retrieved:
    preview = row['text'][:150].replace('\n', ' ')
    print({'chunk_id': row['chunk_id'], 'score': round(row['score'], 4), 'preview': preview})


[32m[2025-12-09 12:40:35][INFO][src.embedder] EMBED | query embedding | provider=openai model=text-embedding-3-small[0m
[32m[2025-12-09 12:40:35][INFO][httpx] HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"[0m
[32m[2025-12-09 12:40:35][INFO][src.retriever] RETRIEVE | start | vectors=162 k=5 threshold=0.50[0m
[32m[2025-12-09 12:40:35][INFO][src.retriever] RETRIEVE | hybrid enabled=False weight=0.00[0m
[32m[2025-12-09 12:40:35][INFO][src.retriever] RETRIEVE | threshold filtering | threshold=0.50 passed=162[0m
[32m[2025-12-09 12:40:35][INFO][src.retriever] RETRIEVE | top_k selected | [{'chunk_id': 'e2a0147c-b352-4ed9-aab7-79d3ce3623c9', 'score': 0.8458, 'dense': 0.6915, 'lexical': None}, {'chunk_id': '8a193d2e-b746-4cb9-b969-315dc7d63ca7', 'score': 0.8071, 'dense': 0.6142, 'lexical': None}, {'chunk_id': 'ea90acfd-0440-4a8a-bd14-6bc7bfbfc458', 'score': 0.7151, 'dense': 0.4302, 'lexical': None}, {'chunk_id': '01e4e888-49d0-4767-bc2f-d22e08960863', 'score

Retrieved chunks:
{'chunk_id': 'e2a0147c-b352-4ed9-aab7-79d3ce3623c9', 'score': 0.8458, 'preview': '# Layered Dynamics of Sprinting Mechanics ## Acceleration Foundations Acceleration out of the blocks decides whether the entire race unfolds on schedu'}
{'chunk_id': '8a193d2e-b746-4cb9-b969-315dc7d63ca7', 'score': 0.8071, 'preview': 'and indirectly keeps the pelvis neutral. Meanwhile, the shank angle should stay inside 50° through steps seven to nine. If one leg rotates to 55° whil'}
{'chunk_id': 'ea90acfd-0440-4a8a-bd14-6bc7bfbfc458', 'score': 0.7151, 'preview': 'first arm strike. A deviation—perhaps imagining an external stimulus instead of the internal feel—can lengthen reaction time by 0.01 s. Later in the s'}
{'chunk_id': '01e4e888-49d0-4767-bc2f-d22e08960863', 'score': 0.6612, 'preview': 'recommended block lengths; some coaches prefer 2–3 week blocks, others 4–6 weeks, primarily based on athlete experience. --- ## Conjugate and Concurre'}
{'chunk_id': 'c9e3bd71-bd34-4c87-ac0e-7fd3

## 5. LLM Generation


In [6]:
start = time.perf_counter()
answer = generate_answer(query, retrieved, provider='openai')
elapsed_ms = (time.perf_counter() - start) * 1000
print('Answer:')
print(answer)
print(f'Time elapsed: {elapsed_ms:.2f} ms')
print(f'Token estimate: {len(answer.split())}')


[32m[2025-12-09 12:40:41][INFO][src.llm_orchestrator] LLM | provider=openai model=gpt-5-nano context_chunks=5 context_chars=12643[0m
[32m[2025-12-09 12:40:41][INFO][src.llm_orchestrator] LLM | prompt_len=12831 approx_tokens=1911[0m
[32m[2025-12-09 12:40:41][INFO][src.llm_orchestrator] LLM | Sending request to OpenAI question: How does acceleration in sprinting work?...[0m
[32m[2025-12-09 12:41:05][INFO][httpx] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"[0m
[32m[2025-12-09 12:41:05][INFO][src.llm_orchestrator] LLM | OpenAI request completed in 24105.40 ms[0m
[32m[2025-12-09 12:41:05][INFO][src.llm_orchestrator] LLM | answer size approx_tokens=391[0m


Answer:
Acceleration in sprinting works through a coordinated combination of posture, timing, and force generation in the early strides, then transitions into high-speed running. Key elements from the provided material:

- Initiation and posture: From the blocks, cues include a torso angle around 45° (some use 42°) and a shin line aligned with the torso to maximize horizontal force. Block spacing is about 45 cm front-to-front and 96 cm front-to-back.
- Early impulse and contact: Ground contacts are about 0.19 seconds, supplying the impulse to push the sprinter forward before cadence increases.
- Power source and force transmission: Acceleration relies on a blend of concentric drive and elastic return, with the posterior chain doing the heavy lifting. Ankle stiffness matters for how much force reaches the ground, and keeping the ankle dorsiflexed during stance can reduce energy loss (about 6%), with some practitioners favoring a small heel drop for elastic rebound.
- Step length, cadenc

## 6. End-to-End Test


In [7]:
def run_demo(question: str, top_k: int = 5) -> None:
    q_emb = generate_query_embedding(question, provider='openai')
    retrieved_chunks = retrieve(q_emb, embeddings, index_map, chunks, k=top_k, threshold=0.5)
    response = generate_answer(question, retrieved_chunks, provider='openai')
    print('Question:', question)
    print('Answer:', response)
    print('Retrieved chunk IDs:', [item['chunk_id'] for item in retrieved_chunks])
    print('Tokens:', len(response.split()))

run_demo('What strategies influence sprint acceleration phases?')


[32m[2025-12-09 12:41:23][INFO][src.embedder] EMBED | query embedding | provider=openai model=text-embedding-3-small[0m
[32m[2025-12-09 12:41:23][INFO][httpx] HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"[0m
[32m[2025-12-09 12:41:23][INFO][src.retriever] RETRIEVE | start | vectors=162 k=5 threshold=0.50[0m
[32m[2025-12-09 12:41:23][INFO][src.retriever] RETRIEVE | hybrid enabled=False weight=0.00[0m
[32m[2025-12-09 12:41:23][INFO][src.retriever] RETRIEVE | threshold filtering | threshold=0.50 passed=162[0m
[32m[2025-12-09 12:41:23][INFO][src.retriever] RETRIEVE | top_k selected | [{'chunk_id': 'e2a0147c-b352-4ed9-aab7-79d3ce3623c9', 'score': 0.8652, 'dense': 0.7304, 'lexical': None}, {'chunk_id': '8a193d2e-b746-4cb9-b969-315dc7d63ca7', 'score': 0.8458, 'dense': 0.6916, 'lexical': None}, {'chunk_id': 'ea90acfd-0440-4a8a-bd14-6bc7bfbfc458', 'score': 0.7624, 'dense': 0.5249, 'lexical': None}, {'chunk_id': '01e4e888-49d0-4767-bc2f-d22e08960863', 'score

Question: What strategies influence sprint acceleration phases?
Answer: Strategies that influence sprint acceleration phases include:

- Technique cues and posture
  - Cue a torso angle around 45° (some use 42°) and ensure the initial shin line mirrors the torso to preserve horizontal force.
  - Use block setup: about 45 cm front-to-front, 96 cm front-to-back; drive the rear knee past the hip without the heel flaring outward.
  - From steps 3–8, aim for a gradual rise of the center of mass; prevent the pelvis from drifting behind the shoulders to avoid braking.

- Kinematics and limb mechanics
  - Keep knee lift within a 60° envelope relative to the hip; monitor for trunk rotation and address with core work.
  - Maintain a balance of force transmission: about 0.8 m per step in the first 10 strides; control ankle stiffness and ankle dorsiflexion to reduce energy loss (small heel drop can aid elastic rebound).
  - Arm mechanics: keep elbow angle under 105° and cue the thumb to brush the 