In [1]:
import pandas as pd
import os, json
from tqdm.auto import tqdm
from openai import OpenAI

In [2]:
#!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py
#!pip install minsearch
import minsearch as minsearch

### Ingestion

In [3]:
df_ck = pd.read_csv('../data/cooking_knowledge.csv')
df_ck.insert(0, 'ID', df_ck.index)
documents = df_ck.to_dict(orient='records')

In [17]:
prompt_template = """
    You emulate a the role of a chef trainee.  Using the system to answer question regarding meals that you are trying to cook.
    Make the generated question specific to this question.
    Formulate 5 questions that this users might ask based on the provided exercise.
    The questions should be complete and not too short.  Use as few words as possible from the record.
    Replace non standard characters with alphanumeric characters.  Make sure the answers are valid JSON format.

    The record:
    type: {type}
    question: {question}
    answer: {response}

    Provide the output in a parsable JSON without code blocks in the format like this and make sure the JSON is parsable

    {{ "questions": ['question1', 'question2', 'question3', ... , 'question5']}}
    """.strip()

In [18]:
# os.environ['OPENAI_API_KEY'] = 'OPENAI_API_KEY' # For testing purposes

In [19]:
client = OpenAI()

In [20]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [21]:
results = {}

In [36]:
for doc in tqdm(documents[:1000]):
    doc_id = doc['ID']
    if doc_id in results:
        continue

    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc_id] = questions['questions']

  0%|          | 0/1000 [00:00<?, ?it/s]

In [37]:
results

{0: ['Can you explain the purpose of HACCP in food safety?',
  'How does HACCP identify hazards in food production?',
  'What are the key principles of the HACCP system?',
  'Why is it important to control hazards in cooking?',
  'How can I implement HACCP in my kitchen?'],
 1: ['Can you explain the different types of frying methods?',
  'What foods are best suited for frying?',
  'How does frying enhance the flavor of a dish?',
  'What are the safety precautions for frying with hot oil?',
  'What are the health implications of frying foods?'],
 2: ['What ingredients are typically used in Navarin stew?',
  'How long should I cook the lamb for Navarin?',
  'Can I use other meats instead of lamb for Navarin?',
  'What type of vegetables work best in a Navarin stew?',
  'Is there a specific type of stock recommended for Navarin?'],
 3: ['Can you explain the role of betalin in vegetables?',
  'Which vegetables contain betalin?',
  'How does betalin affect the color of beets?',
  'Are there

In [38]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))

In [39]:
final_results[10]

(2, 'What ingredients are typically used in Navarin stew?')

In [45]:
df_results = pd.DataFrame(final_results, columns=['id', 'question'])
df_results.head(20)

Unnamed: 0,id,question
0,0,Can you explain the purpose of HACCP in food s...
1,0,How does HACCP identify hazards in food produc...
2,0,What are the key principles of the HACCP system?
3,0,Why is it important to control hazards in cook...
4,0,How can I implement HACCP in my kitchen?
5,1,Can you explain the different types of frying ...
6,1,What foods are best suited for frying?
7,1,How does frying enhance the flavor of a dish?
8,1,What are the safety precautions for frying wit...
9,1,What are the health implications of frying foods?


In [46]:
df_results.shape

(5000, 2)

In [47]:
df_results.to_csv('../data/ground_truth_2.csv', index=False)

In [48]:
!head ../data/ground_truth_2.csv

id,question
0,Can you explain the purpose of HACCP in food safety?
0,How does HACCP identify hazards in food production?
0,What are the key principles of the HACCP system?
0,Why is it important to control hazards in cooking?
0,How can I implement HACCP in my kitchen?
1,Can you explain the different types of frying methods?
1,What foods are best suited for frying?
1,How does frying enhance the flavor of a dish?
1,What are the safety precautions for frying with hot oil?
