# Basic RAG Demonstration


## 1. Loading Data


In [7]:
import os
import sys
import time
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve()
if PROJECT_ROOT.name == 'notebooks':
    PROJECT_ROOT = PROJECT_ROOT.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

from src.load_data import load_markdown_files
from src.chunking import chunk_documents
from src.embedder import generate_embeddings, generate_query_embedding
from src.retriever import retrieve
from src.llm_orchestrator import generate_answer

DATA_DIR = PROJECT_ROOT / 'data' / 'raw'
docs = load_markdown_files(str(DATA_DIR))
print(f'Loaded {len(docs)} documents from {DATA_DIR}.')
if docs:
    preview_doc = docs[0]
    preview_text = preview_doc.get('text', '')
    meta = {k: v for k, v in preview_doc.items() if k != 'text'}
    print('Sample metadata:', meta)
    print('Preview snippet:', preview_text[:300], '...')


Loaded 5 documents from C:\Users\tomasz.makowski.2\Desktop\SemesterII\ComputationalIntelligence\Project\agentic-rag-architectures\data\raw.
Sample metadata: {'id': '2742b4bf-d9d0-450c-b6fe-67349cd974ca', 'filename': 'football_rules.md', 'path': 'C:\\Users\\tomasz.makowski.2\\Desktop\\SemesterII\\ComputationalIntelligence\\Project\\agentic-rag-architectures\\data\\raw\\football_rules.md'}
Preview snippet: # Comprehensive Overview of Football (Soccer) Rules: Structure, Gameplay, and Interpretations

## Introduction: The Logic and Governance of Football Laws
Football (known as soccer in some countries) is governed globally by the **Laws of the Game**, maintained by **IFAB (International Football Associ ...


## 2. Chunking


In [8]:
chunks = chunk_documents(docs, chunk_size=400, overlap=50)
print(f'Generated {len(chunks)} chunks.')
for chunk in chunks[:3]:
    snippet = chunk.get('text', '')[:150].replace('\n', ' ')
    print({'chunk_id': chunk.get('chunk_id'), 'metadata': chunk.get('metadata', {}), 'snippet': snippet})


Generated 22 chunks.
{'chunk_id': '5d92da19-bee6-4ae5-bacb-c73c90293a17', 'metadata': {}, 'snippet': '# Comprehensive Overview of Football (Soccer) Rules: Structure, Gameplay, and Interpretations ## Introduction: The Logic and Governance of Football La'}
{'chunk_id': 'd36546c8-2975-4c61-acdc-6cd3531440e9', 'metadata': {}, 'snippet': 'substitutes** (varies by competition) At least seven players are required to start a match. If a team drops below seven due to injuries or dismissals,'}
{'chunk_id': '5acfd75d-db01-4754-b67f-d6d4cd9859b8', 'metadata': {}, 'snippet': 'Handball (deliberate) A direct free kick may lead to a penalty if inside the defending penalty area. ### **Indirect Free Kick Offenses** Include: - Da'}


## 3. Embedding


In [9]:
embeddings, index_map = generate_embeddings(chunks, provider='openai')
print('Embeddings shape:', embeddings.shape)
print('Index map sample:', list(index_map.items())[:3])


Embeddings shape: (22, 1536)
Index map sample: [('5d92da19-bee6-4ae5-bacb-c73c90293a17', 0), ('d36546c8-2975-4c61-acdc-6cd3531440e9', 1), ('5acfd75d-db01-4754-b67f-d6d4cd9859b8', 2)]


## 4. Retrieval


In [10]:
query = 'How does acceleration in sprinting work?'
query_embedding = generate_query_embedding(query, provider='openai')
retrieved = retrieve(query_embedding, embeddings, index_map, chunks, k=5, threshold=0.5)
print('Retrieved chunks:')
for row in retrieved:
    preview = row['text'][:150].replace('\n', ' ')
    print({'chunk_id': row['chunk_id'], 'score': round(row['score'], 4), 'preview': preview})


Retrieved chunks:
{'chunk_id': 'f5dcd8bd-bd60-4ca7-972f-37e982cb32a3', 'score': 0.6916, 'preview': '# Layered Dynamics of Sprinting Mechanics ## Acceleration Foundations Acceleration out of the blocks decides whether the entire race unfolds on schedu'}
{'chunk_id': 'cba1976b-e193-4a5a-ac04-7454c53dff73', 'score': 0.6143, 'preview': 'and indirectly keeps the pelvis neutral. Meanwhile, the shank angle should stay inside 50° through steps seven to nine. If one leg rotates to 55° whil'}


## 5. LLM Generation


In [11]:
start = time.perf_counter()
answer = generate_answer(query, retrieved, provider='openai')
elapsed_ms = (time.perf_counter() - start) * 1000
print('Answer:')
print(answer)
print(f'Time elapsed: {elapsed_ms:.2f} ms')
print(f'Token estimate: {len(answer.split())}')


Answer:
Acceleration in sprinting is the staged process of converting block-driven impulse into forward velocity, controlled by body position, force application, and cadence.

Key elements from the context:

- Start and early drive
  - Block setup and posture: aim for a torso angle around 45° (some debate for 42°); the initial shin line should mirror the torso to avoid diluting horizontal force.
  - Ground contact: about 0.19 seconds in early contacts; these contacts supply the impulse to push the sprinter forward.
  - Spacing: block front-to-front ~45 cm and front-to-back ~96 cm.
  - COM and pelvis: from steps 3–8, the center of mass should rise gradually; the pelvis should stay under the shoulders—if the pelvis drifts behind, braking forces develop.

- Force production and mechanics
  - Step length and cadence: you push through roughly 0.8 m per step in the first ten strides; cadence rises toward about 4.5 strides per second.
  - Muscles and joints: the posterior chain does much of t

## 6. End-to-End Test


In [12]:
def run_demo(question: str, top_k: int = 5) -> None:
    q_emb = generate_query_embedding(question, provider='openai')
    retrieved_chunks = retrieve(q_emb, embeddings, index_map, chunks, k=top_k, threshold=0.5)
    response = generate_answer(question, retrieved_chunks, provider='openai')
    print('Question:', question)
    print('Answer:', response)
    print('Retrieved chunk IDs:', [item['chunk_id'] for item in retrieved_chunks])
    print('Tokens:', len(response.split()))

run_demo('What strategies influence sprint acceleration phases?')


Question: What strategies influence sprint acceleration phases?
Answer: Strategies influencing sprint acceleration phases include:

- Technical setup and posture
  - Cue a torso angle around 45° for the first two strides (some use 42°).
  - Align the initial shin line with torso projection to preserve horizontal force.
  - Block spacing typically about 45 cm front-to-front and 96 cm front-to-back.
  - Maintain pelvis position to avoid braking forces (pelvis should not drift behind the shoulders).

- Early stride mechanics (steps 1–8)
  - Ensure the COM rises gradually from steps three to eight.
  - Keep knee lift within a 60° envelope relative to the hip.
  - Use video overlays to monitor mechanics and correct minor lateral or trunk-rotation deviations.

- Force transmission and biomechanics
  - Emphasize a blend of concentric drive and elastic return; aim for about 0.8 m per step in the first ten strides.
  - Balance ankle stiffness and dorsiflexion to optimize ground impulse; conside