## Ingestion

In [126]:
import pandas as pd

In [127]:
df = pd.read_csv('../data/fitness_exercises_500.csv')

In [128]:
df.head()

Unnamed: 0,id,exercise_name,type_of_activity,type_of_equipment,body_part,type,muscle_groups_activated,instruction
0,0,Push-Up Hold,mobility,dip belt,lower,stretch,"glutes, quads, hamstrings",Setup: Prepare equipment: dip belt. Ensure sta...
1,1,Explosive Lateral Raise Pulse,cardio,barbells,full body,hold,"back, chest, legs",Setup: Set barbell with collars secured; grip ...
2,2,Rotational Jumping Jack Iso,warm-up,barbells,upper,push,"biceps, forearms, chest",Setup: Set barbell with collars secured; grip ...
3,3,Wide-Grip Running,strength,kettlebells,upper,pull,"deltoids, triceps, forearms",Setup: Place kettlebell close to midline; hing...
4,4,Decline Dips,cardio,barbells,core,stretch,"abs, lower back, obliques",Setup: Set barbell with collars secured; grip ...


In [129]:
import minsearch

In [130]:
df.columns

Index(['id', 'exercise_name', 'type_of_activity', 'type_of_equipment',
       'body_part', 'type', 'muscle_groups_activated', 'instruction'],
      dtype='object')

In [131]:
documents = df.to_dict(orient='records')

In [132]:
index = minsearch.Index(
    text_fields=['exercise_name', 'type_of_activity', 'type_of_equipment', 'body_part',
       'type', 'muscle_groups_activated', 'instruction'],
    keyword_fields=['id']
)

In [133]:
index.fit(documents)

<minsearch.minsearch.Index at 0x16716e030>

In [134]:
query = 'give me leg exercises for hamstrings'

In [135]:
search(query)

[{'id': 113,
  'exercise_name': 'Single-Leg Leg Press Variation',
  'type_of_activity': 'mobility',
  'type_of_equipment': 'dumbbells',
  'body_part': 'lower',
  'type': 'stretch',
  'muscle_groups_activated': 'quads, calves, hamstrings',
  'instruction': 'Setup: Hold dumbbells with a neutral, secure grip; wrists straight. Brace core, keep neutral spine, drive through whole foot. Primary focus: quads, calves, hamstrings. Movement: Gently enter the stretch until a mild pull is felt; hold without bouncing and keep breathing. Tempo: 2‑2‑2‑0. Dose: 1–2 sets × 30–60 sec each side. Rest 45–90s between sets. Breathing: Slow nasal breaths; exhale to deepen slightly, no pain. Coaching cues: keep neck long, ribs down, and knees tracking over toes. Common mistakes: avoid bouncing; stay within mild discomfort. Do not bounce or hyperextend joints. Safety: Never force end range; joints should feel stable.'},
 {'id': 408,
  'exercise_name': 'Leg Press Rotation',
  'type_of_activity': 'strength',
  't

## Implementing RAG Flow

In [136]:
from google import genai

In [137]:
client = genai.Client()

In [138]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        boost_dict=boost,
        filter_dict={},
        num_results=10
    )

    return results

In [139]:
documents[0]

{'id': 0,
 'exercise_name': 'Push-Up Hold',
 'type_of_activity': 'mobility',
 'type_of_equipment': 'dip belt',
 'body_part': 'lower',
 'type': 'stretch',
 'muscle_groups_activated': 'glutes, quads, hamstrings',
 'instruction': 'Setup: Prepare equipment: dip belt. Ensure stable setup and safe load. Brace core, keep neutral spine, drive through whole foot. Primary focus: glutes, quads, hamstrings. Movement: Gently enter the stretch until a mild pull is felt; hold without bouncing and keep breathing. Tempo: 2‑2‑2‑0. Dose: 1–2 sets × 30–60 sec each side. Rest 45–90s between sets. Breathing: Slow nasal breaths; exhale to deepen slightly, no pain. Coaching cues: keep neck long, ribs down, and knees tracking over toes. Common mistakes: avoid bouncing; stay within mild discomfort. Do not bounce or hyperextend joints. Safety: Never force end range; joints should feel stable.'}

In [140]:
#We want the LLM to put the document content in the context of the answer
prompt_template = """
    You're a professional fitness assistant. Answer the QUESTION based only on the CONTEXT provided from the exercise & fitness database.  
    
    - Use only the facts from the CONTEXT when answering the QUESTION.  
    - If the CONTEXT does not contain the answer, respond with: NONE.  
    - Keep your answer clear, concise, and detail with instruction for fitness use.  
    
    QUESTION: {question}  
    
    CONTEXT:  
    {context}  
""".strip()

entry_template = """
exercise_name: {exercise_name}',
type_of_activity: {type_of_activity},
type_of_equipment: {type_of_equipment},
body_part: {body_part},
type: {type},
muscle_groups_activated: {muscle_groups_activated},
instruction: {instruction}
""".strip()

def build_prompt(query, search_results):    
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [141]:
def llm(prompt):
    response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=prompt
    )

    return response.text

In [142]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [143]:
query = 'I want some core exercises that also help my back'

answer = rag(query)
print(answer)

Here are some core exercises that also help your back:

**1. Wide-Grip Face Pull Twist**
*   **Muscles Activated:** Lower back, abs, obliques.
*   **Equipment:** Kettlebells.
*   **Instruction:** Place kettlebell close to midline; hinge to grip; pack shoulders. Exhale gently to brace; avoid flaring ribs or arching low back. Pull elbows back toward hips, squeeze shoulder blades, then return slowly without shrugging.
*   **Tempo:** 3‑1‑1‑0.
*   **Dose:** 3–4 sets × 6–10 reps.
*   **Rest:** 45–90s between sets.

**2. Feet-Elevated Deadlift Combo**
*   **Muscles Activated:** Abs, obliques, lower back.
*   **Equipment:** Dip belt.
*   **Instruction:** Prepare equipment: dip belt. Ensure stable setup and safe load. Exhale gently to brace; avoid flaring ribs or arching low back. Hinge at hips with a flat back, push hips back, then extend to stand tall; keep bar/bell close to shins.
*   **Tempo:** 3‑1‑1‑0.
*   **Dose:** 3–4 sets × 6–10 reps.
*   **Rest:** 45–90s between sets.

**3. Tempo Face 

## Retrieval Evalutation

In [144]:
df_question = pd.read_csv('../data/ground-truth-retrieval.csv')

In [145]:
df_question.head()

Unnamed: 0,id,question
0,0,What equipment do I need to perform the Push-U...
1,0,How long should I hold each side during the Pu...
2,0,What should I focus on to ensure proper form w...
3,0,What common mistakes should I avoid when doing...
4,0,How should I breathe during the Push-Up Hold t...


In [146]:
ground_truth = df_question.to_dict(orient='records')

In [147]:
ground_truth[0]

{'id': 0,
 'question': 'What equipment do I need to perform the Push-Up Hold exercise?'}

In [148]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [155]:
documents[0]

{'id': 0,
 'exercise_name': 'Push-Up Hold',
 'type_of_activity': 'mobility',
 'type_of_equipment': 'dip belt',
 'body_part': 'lower',
 'type': 'stretch',
 'muscle_groups_activated': 'glutes, quads, hamstrings',
 'instruction': 'Setup: Prepare equipment: dip belt. Ensure stable setup and safe load. Brace core, keep neutral spine, drive through whole foot. Primary focus: glutes, quads, hamstrings. Movement: Gently enter the stretch until a mild pull is felt; hold without bouncing and keep breathing. Tempo: 2‑2‑2‑0. Dose: 1–2 sets × 30–60 sec each side. Rest 45–90s between sets. Breathing: Slow nasal breaths; exhale to deepen slightly, no pain. Coaching cues: keep neck long, ribs down, and knees tracking over toes. Common mistakes: avoid bouncing; stay within mild discomfort. Do not bounce or hyperextend joints. Safety: Never force end range; joints should feel stable.'}

In [161]:
def minsearch_search(query):
    boost = {'exercise_name':3, 'muscle_groups_activated':1}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [150]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [151]:
from tqdm.auto import tqdm

In [160]:
evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/2500 [00:00<?, ?it/s]

{'hit_rate': 0.7632, 'mrr': 0.6914885714285712}