In [1]:
import os
from dotenv import load_dotenv
import pandas as pd
import json
import openai
from openai import OpenAI
import hashlib
from tqdm.auto import tqdm

In [2]:
df=pd.read_json('../data/recipes.json')

In [3]:
df['id']=df.apply(lambda row:hashlib.md5(str(row).encode('utf-8')).hexdigest()[:4], axis=1)

In [4]:
df.drop(columns=['output','date'],inplace=True)

In [5]:
df.head()

Unnamed: 0,title,tags,introduction,ingredients,direction,id
0,Creamy Mashed Potatoes,"[potato, side, cheesefare]",![Creamy Mashed Potatoes](/pix/creamy-mashed-p...,The quantities here are for about four adult p...,1. Peel and cut the potatoes into medium sized...,83f4
1,Red Sauce (Ragu all'Italiana),"[italian, sauce]",My great-grandma's red sauce. All purpose: goo...,- 1/3 lb salt pork - 2 lb chuck roast - 3 Clov...,"1. Mince the garlic, peel the carrot, peel and...",f114
2,Turmeric Flatbread,"[bread, turkish]",A great companion to Turkish Red Lentil Soup. ...,- 1 cup Wheat Flour (white or whole) - 1/2 Tbs...,"1. Combine flour, turmeric, salt and baking po...",5a90
3,Zurich-Style Meat Saute,"[beef, cream, swiss, quick]","Originally called ""Züri Gschnätzlets"" (Zurich ...","- 600g\tBeef, finely sliced - 250g\tMushrooms,...",1. Fry the meat in butter at high heat until i...,0b3f
4,Tuscan Style Pork Roast,"[italian, pork, roast]",![tuscan-style-pork-roast](/pix/tuscan-style-p...,- 1 pork Roast - 2-3 Tbsp fresh rosemary - 8 c...,1.\tPreheat oven to 275°F (135°C) 2.\tButterfl...,bacf


In [6]:
df.to_csv('../data/cleandata_recipes.csv',index=False)

In [6]:
documents=df.to_dict(orient='records')

In [7]:
documents[57]

{'title': 'Soleier (pickled eggs)',
 'tags': ['snack', 'german', 'eggs', 'pub'],
 'introduction': '![Soleier](/pix/soleier.webp) Soleier are a german pub dish. They are typically enjoyed between two beers. - ⏲️ Preparation time: 10min - 🍳 Cook time: 10min - 🍽️ Servings: 10',
 'ingredients': '- peels of five onions - 10 eggs - 2 loorber leaves - 3 cloves - 4 allspice - 1Tbsp peppercorns - 1Tbsp caraway seeds - 1Tbsp mustard seeds - 1tsp of Szechuan pepper (optional) - 1tsp lovage (optional) - 1tsp nutmeg (optional) - 100mL vinegar - 3 cloves of garlic',
 'direction': '1. Put 750mL of water and the peels of five onions in a pot and let them cook for five minutes. Remove the peels about twenty minutes later. 2. Put the eggs, one teaspoon of sugar, two tablespoons of salt and all spices (but vinegar and garlic) in the pot, bring it to a boil and cook them for seven minutes. 3. Use a spoon to remove the eggs from the pot and crack them all around. Do not remove the eggshell! 4. Place the eg

In [14]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
client=OpenAI()

In [15]:
prompt_template="""
    you emulate a user of our recipe instrictions application.
    formulate 5 questions this user might ask based on the dish.
    make the questions specific to this dish,
    recipe_docs should contain answer to this dish,and the questions should be complete  based on each user preferences.
    recipe_docs:
    title:{title}
    tags:{tags}
    introduction:{introduction}
    ingredients:{ingredients}
    direction:{direction}
    provide the output in parsable JSON without using code blocks:
    {{"questions":["question1","question2", ...,"question5"]}}
    """.strip()
prompt=prompt_template.format(**documents[0])
    
    

In [16]:
def llm(prompt):
        response=client.chat.completions.create(
                        model='gpt-4o-mini',
                        messages=[{"role":"user","content":prompt}]
        )
        return response.choices[0].message.content

In [17]:
questions=llm(prompt)

In [18]:
json.loads(questions)

{'questions': ['Can I substitute the bacon with a vegetarian option in the creamy mashed potatoes recipe?',
  'What type of cheese works best for adding flavor to the creamy mashed potatoes?',
  'How can I make the mashed potatoes gluten-free while still keeping them creamy?',
  'What can I use instead of mayonnaise if I want a lighter version of the creamy mashed potatoes?',
  'How do I adjust the recipe if I want to make a smaller portion for just two people?']}

In [19]:
def generate_questions(doc):
    prompt=prompt_template.format(**doc)
    response=client.chat.completions.create(
                        model='gpt-4o-mini',
                        messages=[{"role":"user","content":prompt}]
    )
    response_json=response.choices[0].message.content
    return response_json
    

In [20]:
from json.decoder import JSONDecodeError 

In [21]:
results={}
for doc in tqdm(documents):
        question_id = doc['id']
        if question_id in results:
            continue
        try:
            llm_questions=generate_questions(doc)
            #print(llm_questions) 
            questions=json.loads(llm_questions)
            results[question_id] = questions['questions']
        except JSONDecodeError as e:
              print(f"Error parsing JSON: {e}")

  0%|          | 0/360 [00:00<?, ?it/s]

Error parsing JSON: Expecting ',' delimiter: line 1 column 397 (char 396)
Error parsing JSON: Expecting ',' delimiter: line 1 column 499 (char 498)
Error parsing JSON: Unterminated string starting at: line 1 column 441 (char 440)


In [22]:
final_questions=[]
for question_id,questions in results.items():
        for q in questions:
                final_questions.append((question_id,q))
            

In [23]:
df_questions=pd.DataFrame(final_questions,columns=['id','questions'])

In [24]:
df_questions.head()

Unnamed: 0,id,questions
0,83f4,What type of potatoes are best for making crea...
1,83f4,Can I use a substitute for mayonnaise in the r...
2,83f4,How can I adjust the recipe if I want to make ...
3,83f4,What can I do to make the mashed potatoes extr...
4,83f4,Is it possible to prepare the creamy mashed po...


In [25]:
df_questions.to_csv('../data/ground_truth_data.csv',index=False)

In [26]:
!head ../data/ground_truth_data.csv

id,questions
83f4,"What type of potatoes are best for making creamy mashed potatoes, and do you recommend any specific variety?"
83f4,"Can I use a substitute for mayonnaise in the recipe, and if so, what would you suggest for a similar creamy texture?"
83f4,"How can I adjust the recipe if I want to make it vegan or dairy-free, especially considering the cheese and milk ingredients?"
83f4,"What can I do to make the mashed potatoes extra cheesy without overwhelming the flavor, and how much more cheese should I add?"
83f4,"Is it possible to prepare the creamy mashed potatoes in advance, and if so, how should I store and reheat them to maintain their creaminess?"
f114,What can I substitute for salt pork if I can't find it in my local store?
f114,How should I adjust the cooking time if I want to make a smaller batch of the red sauce?
f114,"Can I use fresh tomatoes instead of canned tomatoes, and if so, how would that affect the recipe?"
f114,What are some ideal dishes to pair with this red 