In [1]:
import pandas as pd
import os
import minsearch
from openai import OpenAI
from tqdm.auto import tqdm
import random
import json

### Ingestion

In [2]:
# download minsearch
if not os.path.exists('minsearch.py'):
    # Download only if minsearch.py does not exist
    !wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py
else:
    print("File already exists.")

File already exists.


In [3]:
# Load the data and display the first few rows
data = pd.read_csv('/workspaces/fitness-assistant-rag/data/yoga_poses.csv')
data.head()

Unnamed: 0,id,pose_name,type_of_practice,variation,position,difficulty,props_required,body_focus,benefits,synonyms,instructions,context
0,7ce8c60e,Downward Dog,Yin,Twisted,Mixed,Intermediate,Chair,Flexibility,Builds strength,"Downward Dog Alternative, Downward Dog Variation","Stand with your feet wide apart, and turn one ...",Best used in Downward Dog sequences or routine...
1,81650e9f,Eagle Pose,Yin,Reclining,Standing,Beginner,Not Required,Balance,Improves balance,"Eagle Pose Alternative, Eagle Pose Variation","Lie on your back, bend your knees, and lift yo...",Best used in Eagle Pose sequences or routines ...
2,2217a7ac,Upward Dog,Restorative,No Variation,Mixed,Intermediate,Blanket,Core,Improves posture,"Upward Dog Alternative, Upward Dog Variation",Lie flat on your back with arms at your sides....,Best used in Upward Dog sequences or routines ...
3,988e61a6,Triangle Pose,Vinyasa,Seated,Lying Down,Intermediate,Block,Core,Increases energy,"Triangle Pose Alternative, Triangle Pose Varia...","Kneel on the mat, then sit back on your heels....",Best used in Triangle Pose sequences or routin...
4,bd927dc7,Downward Dog,Ashtanga,With Block,Lying Down,Beginner,Chair,Back,Builds strength,"Downward Dog Alternative, Downward Dog Variation",Lie flat on your back with arms at your sides....,Best used in Downward Dog sequences or routine...


In [4]:
# Convert the data to a list of dictionaries
documents = data.to_dict(orient='records')

In [5]:
# Create an index for the search engine
index = minsearch.Index(
    text_fields=['pose_name', 'type_of_practice', 'variation', 'position',
       'difficulty', 'props_required', 'body_focus', 'benefits',
       'instructions'],
    keyword_fields=['id']
)

In [6]:
# Add the documents to the index
index.fit(documents)

<minsearch.Index at 0x7cd68b352bd0>

### RAG Flow

In [7]:
# Connect to OpenAI
client = OpenAI()

In [8]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [9]:
prompt_template = """
You are a yoga guru with access to a comprehensive yoga poses database. Answer the QUESTION using only the information provided in the CONTEXT.
Treat similar words as synonyms and recognize variations of terms as equivalent.
Be concise, avoid repetition, and strictly adhere to the CONTEXT. 
If there are variations of the pose in the CONTEXT, tell the user about it.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
Pose: {pose_name}
Practice: {type_of_practice}
Variation: {variation}
Position: {position}
Difficulty: {difficulty}
Props: {props_required}
Focus: {body_focus}
Benefits: {benefits}
Instructions: {instructions}
""".strip()

def build_prompt(query, search_results):
    context = "\n\n".join([entry_template.format(**doc) for doc in search_results])
    return prompt_template.format(question=query, context=context)

In [10]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [11]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [12]:
question = "How do I perform the variation of Warrior I with a block?"
answer = rag(question)
print(answer)

To perform the variation of Warrior I with a block, follow these instructions:

1. **Position**: Begin by kneeling on the mat and sitting back on your heels.
2. **Set Up**: Place a block in front of you for support.
3. **Movement**: Stretch your arms forward, lowering your chest towards the mat while keeping your core engaged.
4. **Breathing**: Breathe deeply and relax into the pose.

This Warrior I variation focuses on the back and is practiced in restorative sessions.


### Retrieval Evaluation

In [13]:
data_question = pd.read_csv('../data/ground-truth-retrieval.csv')

In [14]:
data_question.head()

Unnamed: 0,id,question
0,7ce8c60e,What modifications can I make to the Downward ...
1,7ce8c60e,How does the Twisted variation of Downward Dog...
2,7ce8c60e,What are some common mistakes practitioners ma...
3,7ce8c60e,Can you explain how to safely integrate a chai...
4,7ce8c60e,In what sequences or routines is the Twisted D...


In [15]:
ground_truth = data_question.to_dict(orient='records')

In [16]:
ground_truth[0]

{'id': '7ce8c60e',
 'question': 'What modifications can I make to the Downward Dog pose to adapt it for my level as a beginner?'}

In [17]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [18]:
def minsearch_search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [19]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [20]:
evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/720 [00:00<?, ?it/s]

{'hit_rate': 0.8694444444444445, 'mrr': 0.5250424382716046}

### Finding the best paramenters

In [31]:
data_validation = data_question[:360]
data_test = data_question[360:]

In [32]:
def simple_optimize(param_ranges, objective_function, n_iterations=10):
    best_params = None
    best_score = float('-inf')  # Assuming we're minimizing. Use float('-inf') if maximizing.

    for _ in range(n_iterations):
        # Generate random parameters
        current_params = {}
        for param, (min_val, max_val) in param_ranges.items():
            if isinstance(min_val, int) and isinstance(max_val, int):
                current_params[param] = random.randint(min_val, max_val)
            else:
                current_params[param] = random.uniform(min_val, max_val)
        
        # Evaluate the objective function
        current_score = objective_function(current_params)
        
        # Update best if current is better
        if current_score > best_score:  # Change to > if maximizing
            best_score = current_score
            best_params = current_params
    
    return best_params, best_score


In [33]:
gt_val = data_validation.to_dict(orient='records')

In [34]:
def minsearch_search(query, boost=None):
    if boost is None:
        boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [35]:
param_ranges = {
    'pose_name': (0.0, 3.0),
    'type_of_practice': (0.0, 3.0),
    'variation': (0.0, 3.0),
    'position': (0.0, 3.0),
    'difficulty': (0.0, 3.0),
    'props_required': (0.0, 3.0),
    'body_focus': (0.0, 3.0),
    'benefits': (0.0, 3.0),
    'synonyms': (0.0, 3.0),
    'instructions': (0.0, 3.0),
    'context': (0.0, 3.0)
}

def objective(boost_params):
    def search_function(q):
        return minsearch_search(q['question'], boost_params)

    results = evaluate(gt_val, search_function)
    return results['mrr']

In [36]:
simple_optimize(param_ranges, objective, n_iterations=25)

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

({'pose_name': 1.77295549488741,
  'type_of_practice': 1.7646875012919119,
  'variation': 0.39512555991207565,
  'position': 2.0631444783206327,
  'difficulty': 1.4963276491573105,
  'props_required': 0.2392338995716874,
  'body_focus': 1.0491245848640036,
  'benefits': 1.7364406525582377,
  'synonyms': 2.5022067788712308,
  'instructions': 0.49163944386874336,
  'context': 2.1715194651138052},
 0.6116754850088183)

In [37]:
def minsearch_improved(query):
    boost = {
        'pose_name': 1.77295549488741,
        'type_of_practice': 1.7646875012919119,
        'variation': 0.39512555991207565,
        'position': 2.0631444783206327,
        'difficulty': 1.4963276491573105,
        'props_required': 0.2392338995716874,
        'body_focus': 1.0491245848640036,
        'benefits': 1.7364406525582377,
        'synonyms': 2.5022067788712308,
        'instructions': 0.49163944386874336,
        'context': 2.1715194651138052
    }

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

evaluate(ground_truth, lambda q: minsearch_improved(q['question']))


  0%|          | 0/720 [00:00<?, ?it/s]

{'hit_rate': 0.9597222222222223, 'mrr': 0.5865272266313926}

In [38]:
prompt2_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [39]:
len(ground_truth)

720

In [40]:
record = ground_truth[0]
question = record['question']
answer_llm = rag(question)

In [41]:
prompt = prompt2_template.format(question=question, answer_llm=answer_llm)
print(prompt)

You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: What modifications can I make to the Downward Dog pose to adapt it for my level as a beginner?
Generated Answer: To adapt the Downward Dog pose for your level as a beginner, consider the following modifications:

1. **Use a Chair**: Practice the Downward Dog variation with a chair for added support while maintaining the pose's benefits for flexibility and balance. 

2. **Lying Down**: To further simplify, you can practice a lying down position, where you lie on your back, bend your knees, and lift your hips toward the ceiling. Keep your feet planted on the mat for stability.

These modifications will help you ease into the pose while ensuring comfort and safety.

Please analyze the content and

In [42]:
data_sample = data_question.sample(n=200, random_state=1)

In [43]:
sample = data_sample.to_dict(orient='records')

In [45]:
evaluations = []

for record in tqdm(sample):
    question = record['question']
    answer_llm = rag(question) 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)

    evaluations.append((record, answer_llm, evaluation))

  0%|          | 0/200 [00:00<?, ?it/s]

In [46]:
data_eval = pd.DataFrame(evaluations, columns=['record', 'answer', 'evaluation'])

data_eval['id'] = data_eval.record.apply(lambda d: d['id'])
data_eval['question'] = data_eval.record.apply(lambda d: d['question'])

data_eval['relevance'] = data_eval.evaluation.apply(lambda d: d['Relevance'])
data_eval['explanation'] = data_eval.evaluation.apply(lambda d: d['Explanation'])

del data_eval['record']
del data_eval['evaluation']

In [48]:
data_eval.relevance.value_counts()

relevance
RELEVANT           132
PARTLY_RELEVANT     63
NON_RELEVANT         5
Name: count, dtype: int64

In [47]:
data_eval.relevance.value_counts(normalize=True)

relevance
RELEVANT           0.660
PARTLY_RELEVANT    0.315
NON_RELEVANT       0.025
Name: proportion, dtype: float64

In [49]:
data_eval.to_csv('../data/rag-eval-gpt-4o-mini.csv', index=False)

In [50]:
data_eval[data_eval.relevance == 'NON_RELEVANT']

Unnamed: 0,answer,id,question,relevance,explanation
53,To achieve the correct form in Chair Pose and ...,c6a20421,What are the key instructions for achieving th...,NON_RELEVANT,The generated answer does not correctly addres...
83,The key instructions for performing the Seated...,d1d86d31,What are the key instructions for performing t...,NON_RELEVANT,The generated answer provides instructions tha...
93,The essential steps to correctly perform the T...,0fc8bd91,What are the essential steps to correctly perf...,NON_RELEVANT,The generated answer does not correctly addres...
120,"To perform the Bridge Pose effectively, the st...",88c36c85,Can you explain the proper standing position r...,NON_RELEVANT,The answer discusses a standing position that ...
143,To perform Half Pigeon Pose correctly:\n\n1. *...,bbfe10fc,Can you describe how to perform Half Pigeon Po...,NON_RELEVANT,The generated answer does not address the spec...


In [51]:
evaluations_gpt4o = []

for record in tqdm(sample):
    question = record['question']
    answer_llm = rag(question, model='gpt-4o') 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)
    
    evaluations_gpt4o.append((record, answer_llm, evaluation))

  0%|          | 0/200 [00:00<?, ?it/s]

In [52]:
data_eval = pd.DataFrame(evaluations_gpt4o, columns=['record', 'answer', 'evaluation'])

data_eval['id'] = data_eval.record.apply(lambda d: d['id'])
data_eval['question'] = data_eval.record.apply(lambda d: d['question'])

data_eval['relevance'] = data_eval.evaluation.apply(lambda d: d['Relevance'])
data_eval['explanation'] = data_eval.evaluation.apply(lambda d: d['Explanation'])

del data_eval['record']
del data_eval['evaluation']

In [53]:
data_eval.relevance.value_counts()

relevance
RELEVANT           98
PARTLY_RELEVANT    80
NON_RELEVANT       22
Name: count, dtype: int64

In [54]:
data_eval.relevance.value_counts(normalize=True)

relevance
RELEVANT           0.49
PARTLY_RELEVANT    0.40
NON_RELEVANT       0.11
Name: proportion, dtype: float64

In [55]:
data_eval.to_csv('../data/rag-eval-gpt-4o.csv', index=False)