In [38]:
import pandas as pd

In [39]:
df = pd.read_csv('data/data.csv')
documents = df.to_dict(orient='records')

In [41]:
prompt_template = """
You emulate a user of our Nutrition assistant application.
Formulate 5 questions this user might ask based on a provided dietary_tags.
Make the questions specific to this diet.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record. 

The record:
recipe_name:{recipe_name}         
ingredients:{ingredients}
nutritional_information:{nutritional_information}
dietary_tags:{dietary_tags}         
meal_type:{meal_type}        
cuisine_type:{cuisine_type}          
instructions:{instructions}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [42]:
# API Configuration
import openai

openai.api_key = "Your_Key"


In [43]:
# Generate the Prompt for the First Record
prompt = prompt_template.format(**documents[0])

In [44]:
# Create the LLM Request Function
def llm(prompt):
    response = openai.ChatCompletion.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content



In [45]:
# Step 6: Make the API Request and Get the Questions
import requests
import json
questions = llm(prompt)

In [46]:
json.loads(questions)

{'questions': ['What are the main ingredients in the Vegetarian Chili recipe?',
  'How many calories does the Vegetarian Chili contain?',
  'Is the Vegetarian Chili suitable for a vegan diet?',
  'What is the preparation method for making the Vegetarian Chili?',
  'What type of cuisine does the Vegetarian Chili belong to?']}

In [47]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = openai.ChatCompletion.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [48]:
from tqdm.auto import tqdm

In [49]:
results = {}

In [50]:

for doc in tqdm(documents): 
    doc_id = doc['id']
    if doc_id in results:
        continue

    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc_id] = questions['questions']

  0%|          | 0/170 [00:00<?, ?it/s]

In [51]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))

In [52]:
final_results[0]

(1, 'Is the Vegetarian Chili recipe suitable for a vegan diet?')

In [53]:

df_results = pd.DataFrame(final_results, columns=['id', 'question'])

In [54]:
#Saving the Ground Truth Data to CSV
df_results.to_csv('data/ground-truth-retrieval.csv', index=False)

In [55]:
!head data/ground-truth-retrieval.csv

id,question
1,Is the Vegetarian Chili recipe suitable for a vegan diet?
1,What are the main ingredients used in the Vegetarian Chili?
1,How many calories does the Vegetarian Chili contain per serving?
1,Can I add anything else to the Vegetarian Chili for more flavor?
1,What is the recommended meal type for the Vegetarian Chili?
2,Is the Grilled Chicken Salad suitable for someone with a gluten intolerance?
2,What is the calorie content of the Grilled Chicken Salad per serving?
2,Can you list the main ingredients used in this salad recipe?
2,How much protein does the Grilled Chicken Salad provide?
