In [27]:
import os
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
from groq import Groq
import openai

In [28]:
import minsearch

## Ingestion

In [29]:
df = pd.read_csv('../data/data.csv')

In [30]:
documents = df.to_dict(orient='records')

In [31]:
index = minsearch.Index(
    text_fields=['exercise_name', 'type_of_activity', 'type_of_equipment', 'body_part',
       'type', 'muscle_groups_activated', 'instructions'],
    keyword_fields=['id']
)

In [32]:
index.fit(documents)

<minsearch.Index at 0x793258f9b4a0>

## Rag Flow

In [33]:
load_dotenv()

True

In [34]:
client = OpenAI()

In [35]:
# client = Groq(
#     api_key=os.environ.get("GROQ_API_KEY"),
# )

In [36]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [37]:
prompt_template = """
You're a fitness instructor. Answer the QUESTION based on the CONTEXT from our exercises database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

entry_template = """ 
exercise_name : {exercise_name}
type_of_activity : {type_of_activity}
type_of_equipment : {type_of_equipment}
body_part : {body_part}
type : {type}
muscle_groups_activated : {muscle_groups_activated}
instructions : {instructions}
""".strip()
    
def build_prompt(query, search_results):
    
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt


In [38]:
query =  "Can you explain how to do a Glute Bridge, I'm not sure about the movement."

In [39]:
search_results = search(query)
prompt = build_prompt(query, search_results)

In [43]:
def llm(prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [44]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt)
    return answer

In [47]:
answer = rag("What specific muscle groups are predominantly activated during the Cable Face Pull exercise?")
print(answer)

The specific muscle groups predominantly activated during the Cable Face Pull exercise are the Rear Deltoids (Rear Delts), Trapezius (Traps), and Upper Back.


### Retrieval Evaluation

In [48]:
df_questions = pd.read_csv('../data/ground_truth_retrieval.csv')