In [1]:
# import libraries
import pandas as pd
from openai import OpenAI
import json
from tqdm.auto import tqdm

In [2]:
# Connect to OpenAI
client = OpenAI()

### Ingestion

In [3]:
# Load the data 
data = pd.read_csv('/workspaces/fitness-assistant-rag/data/yoga_poses.csv')

# Convert the data to a list of dictionaries
documents = data.to_dict(orient='records')

In [6]:
prompt_template = """
You are a yoga guru with access to a comprehensive yoga poses database.
Formulate 5 questions that a user might ask based on a provided yoga pose.
Make the questions specific to the pose, its variations, and its context.
The questions should be informative, utilizing details from the record but
not directly copying large portions. Use synonyms to enhance the variety of the questions.

The record:

pose_name: {pose_name}
type_of_practice: {type_of_practice}
variation: {variation}
position: {position}
difficulty: {difficulty}
props_required: {props_required}
body_focus: {body_focus}
benefits: {benefits}
synonyms: {synonyms}
instructions: {instructions}
context: {context}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [7]:
prompt = prompt_template.format(**documents[0])

In [8]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [9]:
questions = llm(prompt)

In [10]:
json.loads(questions)

{'questions': ['What are the specific steps to perform the Twisted variation of Downward Dog?',
  'How does the use of a chair enhance the practice of the Downward Dog pose?',
  'In what scenarios is the Twisted Downward Dog best integrated within a yoga routine?',
  'Can you explain the benefits of practicing the Downward Dog Alternative concerning flexibility and strength?',
  'What makes this pose classified as intermediate in terms of difficulty, and how can beginners approach it safely?']}

In [11]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [12]:
results = {}

In [13]:
for doc in tqdm(documents): 
    doc_id = doc['id']
    if doc_id in results:
        continue

    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc_id] = questions['questions']

  0%|          | 0/144 [00:00<?, ?it/s]

In [14]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))

In [15]:
final_results[0]

('7ce8c60e',
 'What modifications can I make to the Downward Dog pose to adapt it for my level as a beginner?')

In [16]:
data_results = pd.DataFrame(final_results, columns=['id', 'question'])

In [17]:
data_results.to_csv('../data/ground-truth-retrieval.csv', index=False)

In [18]:
!head ../data/ground-truth-retrieval.csv

id,question
7ce8c60e,What is the primary focus of the Downward Dog pose?
7ce8c60e,Can you describe the steps to perform the Twisted variation of Downward Dog?
7ce8c60e,What props are needed for practicing Downward Dog?
7ce8c60e,How does Downward Dog contribute to strength building?
7ce8c60e,What level of difficulty is associated with Downward Dog?
81650e9f,What are the basic instructions for performing the Eagle Pose?
81650e9f,Is there a reclining variation of the Eagle Pose?
81650e9f,What level of difficulty is the Eagle Pose considered to be?
81650e9f,What is the primary body focus when practicing Eagle Pose?
