In [1]:
import json
import time
import pandas as pd

In [77]:
import os
from groq import Groq

In [78]:
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

In [79]:
df = pd.read_csv('../data/data.csv')
documents = df.to_dict(orient='records')

In [80]:
documents[0]

{'id': 0,
 'exercise_name': 'Push-Ups',
 'type_of_activity': 'Strength',
 'type_of_equipment': 'Bodyweight',
 'body_part': 'Upper Body',
 'type': 'Push',
 'muscle_groups_activated': 'Pectorals, Triceps, Deltoids',
 'instructions': 'Start in a high plank position with your hands under your shoulders. Lower your body until your chest nearly touches the floor. Push back up to the starting position.'}

In [81]:
prompt_template = """
You emulate a user of our fitness assistant application.
Formulate 5 questions this user might ask based on a provided exercise.
Make the questions specific to this exercise.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record. 

The record:

exercise_name: {exercise_name}
type_of_activity: {type_of_activity}
type_of_equipment: {type_of_equipment}
body_part: {body_part}
type: {type}
muscle_groups_activated: {muscle_groups_activated}
instructions: {instructions}

Provide the output in parsable JSON without using code blocks and any other texts like Here are 5 questions a user of our fitness assistant application might ask based on the provided exercise:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [82]:
prompt = prompt_template.format(**documents[0])

In [83]:
prompt

'You emulate a user of our fitness assistant application.\nFormulate 5 questions this user might ask based on a provided exercise.\nMake the questions specific to this exercise.\nThe record should contain the answer to the questions, and the questions should\nbe complete and not too short. Use as fewer words as possible from the record. \n\nThe record:\n\nexercise_name: Push-Ups\ntype_of_activity: Strength\ntype_of_equipment: Bodyweight\nbody_part: Upper Body\ntype: Push\nmuscle_groups_activated: Pectorals, Triceps, Deltoids\ninstructions: Start in a high plank position with your hands under your shoulders. Lower your body until your chest nearly touches the floor. Push back up to the starting position.\n\nProvide the output in parsable JSON without using code blocks and any other texts like Here are 5 questions a user of our fitness assistant application might ask based on the provided exercise:\n\n{"questions": ["question1", "question2", ..., "question5"]}'

In [84]:
def llm(prompt):
    response = client.chat.completions.create(
        model='llama3-8b-8192',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [85]:
questions = llm(prompt)

In [86]:
questions

'{"questions": [\n"How do I adjust my plank position to engage my core and avoid straining my back during push-ups?",\n"What are the common mistakes I should watch out for while performing push-ups, especially when it comes to lowering my body to the floor?",\n"Can I modify push-ups to make them easier to do, and if so, how do I do it?",\n"Are there any additional exercises I can do after completing a set of push-ups to target other muscle groups in my upper body?",\n"How long should I hold the starting position of the push-up to get the most effective workout for my pectorals and triceps?"]}'

In [87]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='llama3-8b-8192',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [88]:
from tqdm.auto import tqdm 

In [89]:
results = {}

In [90]:
for doc in tqdm(documents): 
    doc_id = doc['id']
    if doc_id in results:
        continue

    while True:
        try:
            questions_raw = generate_questions(doc)
            questions = json.loads(questions_raw)
            results[doc_id] = questions['questions']
            break  # Exit the loop if successful
        except Exception as e:
            print(f"Error processing document {doc_id}: {e}")
            print("Retrying in 10 seconds...")
            time.sleep(10)

  0%|          | 0/207 [00:00<?, ?it/s]

Error processing document 4: Expecting ',' delimiter: line 6 column 110 (char 510)
Retrying in 10 seconds...
Error processing document 5: Expecting ',' delimiter: line 5 column 130 (char 596)
Retrying in 10 seconds...
Error processing document 23: Extra data: line 3 column 1 (char 362)
Retrying in 10 seconds...
Error processing document 24: Expecting ',' delimiter: line 6 column 93 (char 480)
Retrying in 10 seconds...
Error processing document 37: Extra data: line 8 column 1 (char 564)
Retrying in 10 seconds...
Error processing document 45: Expecting ',' delimiter: line 6 column 157 (char 695)
Retrying in 10 seconds...
Error processing document 54: Expecting ',' delimiter: line 1 column 601 (char 600)
Retrying in 10 seconds...
Error processing document 57: Expecting ',' delimiter: line 6 column 141 (char 694)
Retrying in 10 seconds...
Error processing document 57: Expecting ',' delimiter: line 6 column 133 (char 672)
Retrying in 10 seconds...
Error processing document 64: Extra data: l

In [91]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))

In [92]:
final_results[0]

(0,
 "How do I make sure I'm positioning my hands correctly in a high plank for a proper push-up?")

In [93]:
df_results = pd.DataFrame(final_results, columns=['id', 'question'])

In [94]:
df_results.to_csv('../data/ground-truth-retrieval-groq.csv', index=False)

In [2]:
!head ../data/ground-truth-retrieval-groq.csv

id,question
0,How do I make sure I'm positioning my hands correctly in a high plank for a proper push-up?
0,What muscles will I be targeting if I do push-ups on a daily basis?
0,Is it okay to do push-ups with my hands closer together or spread wide apart?
0,Can I adjust the angle of my body during the push-up to target different muscle groups?
0,What are some common mistakes I should avoid when performing push-ups for the first time?
1,What is the correct stance for performing Squats?
1,How low should I go when doing Squats?
1,"Can I use any type of equipment for Squats, or do I need to rely on my own body weight?"
1,What muscle groups am I targeting with this exercise?
