In [1]:
import pandas as pd
import os, json
from tqdm.auto import tqdm
from openai import OpenAI

In [2]:
#!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py
#!pip install minsearch
import minsearch



### Ingestion

In [3]:
df_ck = pd.read_csv('cooking_knowledge.csv')
df_ck.insert(0, 'ID', df_ck.index)
df_ck.columns

Index(['ID', 'type', 'question', 'response'], dtype='object')

In [4]:
documents = df_ck.to_dict(orient='records')

In [38]:
prompt_template = """
    You emulate a the role of a chef trainee.  Using the system to answer question regarding meals that you are trying to cook.
    Make the generated question specific to this question.
    Formulate 5 questions that this users might ask based on the provided exercise.
    The questions should be complete and not too short.  Use as few words as possible from the record.
    Replace non standard characters with alphanumeric characters

    The record:
    type: {type}
    question: {question}
    answer: {response}

    Provide the output in a parsable JSON without code blocks in the format like this and make sure the JSON is parsable

    {{ "questions": ['question1', 'question2', 'question3', ... , 'question5']}}
    """.strip()

In [None]:
os.environ['OPENAI_API_KEY'] = 'OPENAI_API_KEY'

In [39]:
client = OpenAI()

In [40]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [41]:
results = {}

In [46]:
for doc in tqdm(documents[:100]):
    doc_id = doc['ID']
    if doc_id in results:
        continue

    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc_id] = questions['questions']

  0%|          | 0/100 [00:00<?, ?it/s]

In [47]:
results

{0: ['Can you explain the HACCP system in detail?',
  'What steps are involved in implementing HACCP?',
  'How does HACCP contribute to food safety?',
  'What are some common hazards identified by HACCP?',
  'Why is HACCP important for food production?'],
 1: ['What methods are involved in frying food?',
  'How does frying enhance the flavor of ingredients?',
  'What types of oil are best for frying dishes?',
  'What is the difference between shallow frying and deep frying?',
  'What precautions should be taken while frying food?'],
 2: ['What ingredients are typically used in Navarin?',
  'How long should I simmer Navarin for best flavor?',
  'Can I use different meat for Navarin instead of lamb?',
  'What vegetables pair well with Navarin stew?',
  'Are there any specific cooking techniques required for making Navarin?'],
 3: ['What type of vegetables contain betalin?',
  'How does betalin affect the color of beets?',
  'Is betalin found in any other food besides beets?',
  'What are

In [48]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))

In [49]:
final_results[10]

(2, 'What ingredients are typically used in Navarin?')

In [51]:
df_results = pd.DataFrame(final_results, columns=['id', 'question'])
df_results.head(20)

Unnamed: 0,id,question
0,0,Can you explain the HACCP system in detail?
1,0,What steps are involved in implementing HACCP?
2,0,How does HACCP contribute to food safety?
3,0,What are some common hazards identified by HACCP?
4,0,Why is HACCP important for food production?
5,1,What methods are involved in frying food?
6,1,How does frying enhance the flavor of ingredie...
7,1,What types of oil are best for frying dishes?
8,1,What is the difference between shallow frying ...
9,1,What precautions should be taken while frying ...


In [52]:
df_results.to_csv('ground_truth.csv', index=False)

In [53]:
!head ground_truth.csv

id,question
0,Can you explain the HACCP system in detail?
0,What steps are involved in implementing HACCP?
0,How does HACCP contribute to food safety?
0,What are some common hazards identified by HACCP?
0,Why is HACCP important for food production?
1,What methods are involved in frying food?
1,How does frying enhance the flavor of ingredients?
1,What types of oil are best for frying dishes?
1,What is the difference between shallow frying and deep frying?
