In [1]:
import os
import pandas as pd
from dotenv import load_dotenv
from openai import OpenAI
import json
from tqdm.auto import tqdm

import minsearch

In [2]:
load_dotenv()

True

In [4]:
#api_key=os.environ.get("OPENAI_API_KEY"),
client = OpenAI()

## Ingestion

In [5]:
df = pd.read_csv('../data/data.csv')
documents = df.to_dict(orient='records')

In [6]:
prompt_template = """
You emulate a user of our fitness assistant applications.
Formulate 5 questions this user might ask based on a provided exercise.
Make the questions specific
The record should contain the answer to the questions, and the questions should 
be complete and not too short. Use fewer words as possible from the record. 

The record:
exercise_name : {exercise_name}
type_of_activity : {type_of_activity}
type_of_equipment : {type_of_equipment}
body_part : {body_part}
type : {type}
muscle_groups_activated : {muscle_groups_activated}
instructions : {instructions}

Provide the output in parsable JSON without using code blocks:

{{ questions: ["question1", "question2", ..., "question5"]}}
""".strip()

In [7]:
print(prompt_template.format(**documents[0]))

You emulate a user of our fitness assistant applications.
Formulate 5 questions this user might ask based on a provided exercise.
Make the questions specific
The record should contain the answer to the questions, and the questions should 
be complete and not too short. Use fewer words as possible from the record. 

The record:
exercise_name : Push-Up
type_of_activity : Strength
type_of_equipment : Bodyweight
body_part : Upper Body
type : Push
muscle_groups_activated : Chest, Triceps, Shoulders, Core
instructions : Start in a plank position, lower your body until your chest nearly touches the floor, then push back up to the starting position.

Provide the output in parsable JSON without using code blocks:

{ questions: ["question1", "question2", ..., "question5"]}


In [8]:
documents[2]

{'id': 2,
 'exercise_name': 'Deadlift',
 'type_of_activity': 'Strength',
 'type_of_equipment': 'Barbell',
 'body_part': 'Lower Body',
 'type': 'Pull',
 'muscle_groups_activated': 'Hamstrings, Glutes, Back, Core',
 'instructions': 'Stand with feet hip-width apart, grip the barbell, lift it by straightening your legs and hips, then lower it back to the ground.'}

In [9]:
prompt = prompt_template.format(**documents[5])

In [10]:
def llm(prompt):
    response = client.chat.completions.create(
        model ="gpt-4-turbo",
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [11]:
questions = llm(prompt)

In [12]:
json.loads(questions)

{'questions': ['What type of exercise is the Resistance Band Row?',
  'What equipment do I need for the Resistance Band Row?',
  'Which muscle groups does the Resistance Band Row target?',
  'How do I properly perform a Resistance Band Row?',
  'Is the Resistance Band Row considered a pull-type exercise?']}

In [13]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [18]:
results = {}

In [19]:
for doc in tqdm(documents): 
    doc_id = doc['id']
    if doc_id in results:
        continue

    questions_raw = generate_questions(doc)
    #print(questions_raw)
    questions = json.loads(questions_raw)
    results[doc_id] = questions["questions"]

  0%|          | 0/260 [00:00<?, ?it/s]

In [20]:
results

{0: ['What is the starting position for a push-up?',
  'Which muscle groups are activated during push-ups?',
  'What type of exercise is a push-up classified as?',
  'Do I need any equipment to perform push-ups?',
  'What is the correct form for lowering my body during a push-up?'],
 1: ['What is the primary muscle group worked during the Goblet Squat?',
  'What equipment do I need for the Goblet Squat exercise?',
  'Can you explain the proper form for performing the Goblet Squat?',
  'Is the Goblet Squat categorized as a strength or cardio exercise?',
  "How do I ensure I'm executing the Goblet Squat effectively for my lower body?"],
 2: ['What muscles are activated when performing the deadlift exercise?',
  'What type of equipment is necessary for deadlifting?',
  'Can you describe the stance and grip for the deadlift?',
  'Is the deadlift primarily a pulling or pushing exercise?',
  'What body part does the deadlift mainly target?'],
 3: ['What equipment do I need to perform the pla

In [21]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))

In [22]:
final_results

[(0, 'What is the starting position for a push-up?'),
 (0, 'Which muscle groups are activated during push-ups?'),
 (0, 'What type of exercise is a push-up classified as?'),
 (0, 'Do I need any equipment to perform push-ups?'),
 (0, 'What is the correct form for lowering my body during a push-up?'),
 (1, 'What is the primary muscle group worked during the Goblet Squat?'),
 (1, 'What equipment do I need for the Goblet Squat exercise?'),
 (1, 'Can you explain the proper form for performing the Goblet Squat?'),
 (1, 'Is the Goblet Squat categorized as a strength or cardio exercise?'),
 (1,
  "How do I ensure I'm executing the Goblet Squat effectively for my lower body?"),
 (2, 'What muscles are activated when performing the deadlift exercise?'),
 (2, 'What type of equipment is necessary for deadlifting?'),
 (2, 'Can you describe the stance and grip for the deadlift?'),
 (2, 'Is the deadlift primarily a pulling or pushing exercise?'),
 (2, 'What body part does the deadlift mainly target?'),

In [25]:
df_results = pd.DataFrame(final_results, columns = ['id', 'question'])

In [27]:
df_results.to_csv('../data/ground_truth_retrieval.csv', index = False)

In [28]:
!head '../data/ground_truth_retrieval.csv'

id,question
0,What is the starting position for a push-up?
0,Which muscle groups are activated during push-ups?
0,What type of exercise is a push-up classified as?
0,Do I need any equipment to perform push-ups?
0,What is the correct form for lowering my body during a push-up?
1,What is the primary muscle group worked during the Goblet Squat?
1,What equipment do I need for the Goblet Squat exercise?
1,Can you explain the proper form for performing the Goblet Squat?
1,Is the Goblet Squat categorized as a strength or cardio exercise?
