In [1]:
import pandas as pd
import numpy as np
import os
from openai import AzureOpenAI
from dotenv import load_dotenv
from sklearn.metrics.pairwise import cosine_similarity
import ast
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility, MilvusClient, SearchResult

load_dotenv()


True

# Raw Recipe Processing

In [5]:
recipe_data = pd.read_csv("../data/RAW_recipes.csv")

In [10]:
recipe_data.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


In [None]:
print(recipe_data['ingredients'][100])


def parse_recipe_ingredients(ingredients:str)->str:
    ingredient_list= ast.literal_eval(ingredients)
    result = ""
    for i, ingredient in enumerate(ingredient_list):
        if(i < len(ingredient_list)-1):
            result+= f"{ingredient}, "
        else:
            result+= f"{ingredient}"
    return result

parsed_ingredients = parse_recipe_ingredients(recipe_data['ingredients'][100])
print(parsed_ingredients)

In [None]:
recipe_data['steps'][0]

## Testing Model

In [None]:
client = AzureOpenAI(
  api_key = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_KEY"),  
  api_version = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_API_VERSION"),
  azure_endpoint =os.getenv("AZURE_OPENAI_ENDPOINT")
)

response = client.embeddings.create(
            input = [parse_recipe_ingredients(recipe_data['ingredients'][100]),"cucumber, lamb, chicken"],
            model= os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT")
        )
json_response = response.model_dump()


In [None]:
embedding_1=np.array(json_response['data'][0]['embedding'])
embedding_2=np.array(json_response['data'][1]['embedding'])
# similarity_score = np.dot(embedding_1, embedding_2) / (np.linalg.norm(embedding_1) * np.linalg.norm(embedding_2))
similarity_score = cosine_similarity(embedding_1.reshape(1,-1),embedding_2.reshape(1,-1)) 
print(similarity_score)

In [None]:
client_gpt = AzureOpenAI(
              api_key=os.getenv("AZURE_OPENAI_KEY_GPT_35T"),  
              api_version=os.getenv("AZURE_OPENAI_API_VERSION_GPT_35T"),
              azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
            )

response = client_gpt.chat.completions.create(
        model=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_GPT_35T"),
        messages=[
            { "role": "system", "content": "You are a helpful assistant." },
            { "role": "user", "content": [  
                { 
                    "type": "text", 
                    "text": f"I have this ingredients: {parse_recipe_ingredients(recipe_data.iloc[100]['ingredients'])}" 
                },
                { 
                    "type": "text", 
                    "text": f"And this is the step to create a food from my ingredients: {recipe_data.iloc[100]['steps']}" 
                },
                { 
                    "type": "text", 
                    "text": "Please generate a maximum 3 sentences of food review based on the information above" 
                },
            ] } 
        ],
        max_tokens=2000 
    )
 


In [None]:
print(response.choices[0].message.content)

## Continue pre-processing data

In [None]:
recipe_data = recipe_data[["name", "id","steps","description","ingredients"]]


In [None]:
# Function to check if a value is a string
def is_string(value):
    return isinstance(value, str)

In [None]:
# Apply the function to the DataFrame and replace non-string values with NaN
recipe_data['ingredients'] = recipe_data['ingredients'].apply(lambda x: x if is_string(x) else np.nan)
recipe_data['name'] = recipe_data['name'].apply(lambda x: x if is_string(x) else np.nan)
# Drop rows with any NaN values
recipe_data = recipe_data.dropna()

In [None]:
first_1000_recipe_data = recipe_data.head(1000)

In [None]:
import re
def clean_text(text):
    # Remove \r\n and other special characters
    text = re.sub(r'\s+', ' ', text)  # Replace multiple whitespace with a single space
    text = re.sub(r'\\r\\n', ' ', text)  # Remove \r\n
    text = re.sub(r'[^A-Za-z0-9\s.,!?]', '', text)  # Remove special characters except punctuation
    text = text.strip()  # Remove leading and trailing whitespace
    return text


In [None]:
first_1000_recipe_data['name'] = first_1000_recipe_data['name'].apply(clean_text)
first_1000_recipe_data['ingredients'] = first_1000_recipe_data['ingredients'].apply(clean_text)

In [None]:
first_1000_recipe_data

In [None]:
first_1000_recipe_data.to_csv("../data/filtered_raw_recipe.csv",index=False)

# Start Embedding

In [None]:
first_1000_recipe_data = pd.read_csv("../data/filtered_raw_recipe.csv")

In [None]:
print(first_1000_recipe_data['name'][100])
print(first_1000_recipe_data['ingredients'][100])

In [None]:
client = AzureOpenAI(
    api_key = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_KEY"),  
    api_version = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_API_VERSION"),
    azure_endpoint =os.getenv("AZURE_OPENAI_ENDPOINT") 
)


def convert_column_to_embedding(column_name:str,increment:int=10,max_row:int=1000) -> list[list[int]]:
    list_of_embedding = []
    counter = 0
    while counter < max_row:
        # Get a response from Embedding API
        input_api = [row_value for row_value in first_1000_recipe_data[column_name][counter:counter+increment]]
        print("counter",counter)
        response = client.embeddings.create(
            input = input_api,
            model= os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT")
        )
        json_response = response.model_dump()
        for data_response in json_response['data']:
            list_of_embedding.append(data_response['embedding'])
        counter += increment
    return list_of_embedding


In [None]:
name_embedding = convert_column_to_embedding('name',20,len(first_1000_recipe_data))
ingredients_embedding = convert_column_to_embedding('ingredients',20,len(first_1000_recipe_data))

In [None]:
for embed in ingredients_embedding:
    print(len(embed))


In [None]:
first_1000_recipe_data['name_embedding']=[str(name) for name in name_embedding]
first_1000_recipe_data['ingredients_embedding']=[str(desc) for desc in ingredients_embedding]

In [None]:
first_1000_recipe_data

In [None]:
first_1000_recipe_data.to_csv("../data/embedded_raw_recipe.csv",index=False)

# Add syntethic food review

In [None]:
embedding_recipe = pd.read_csv("../data/embedded_raw_recipe.csv")

In [None]:
embedding_recipe.head()

In [None]:
embedding_recipe= embedding_recipe.head(500)

In [None]:
def generate_syntethic_review()->list[str]:
    client_gpt = AzureOpenAI(
              api_key=os.getenv("AZURE_OPENAI_KEY_GPT_35T"),  
              api_version=os.getenv("AZURE_OPENAI_API_VERSION_GPT_35T"),
              azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
            )
    generated_reviews = []
    for i in range(len(embedding_recipe)):
        response = client_gpt.chat.completions.create(
            model=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_GPT_35T"),
            messages=[
                { "role": "system", "content": "You are a helpful assistant." },
                { "role": "user", "content": [  
                    { 
                        "type": "text", 
                        "text": f"I have this ingredients: {embedding_recipe.iloc[i]['ingredients']}" 
                    },
                    { 
                        "type": "text", 
                        "text": f"And this is the step to create a food from my ingredients: {embedding_recipe.iloc[i]['steps']}" 
                    },
                    { 
                        "type": "text", 
                        "text": "Please generate a maximum 2 sentences of food review based on the information above" 
                    },
                ] } 
            ],
            max_tokens=2000 
        )
        print("iter: ",i)
        generated_reviews.append(response.choices[0].message.content)
    return generated_reviews

In [None]:
generated_reviews = generate_syntethic_review()

In [None]:
embedding_recipe['synthetic_review'] = generated_reviews

In [None]:
embedding_recipe.to_csv("../data/recipe_with_review.csv")

# Add embedding for synthetic review

In [None]:
recipe_with_review = pd.read_csv("../data/recipe_with_review.csv")

In [None]:
recipe_with_review.head()

In [None]:
client = AzureOpenAI(
    api_key = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_KEY"),  
    api_version = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_API_VERSION"),
    azure_endpoint =os.getenv("AZURE_OPENAI_ENDPOINT") 
)


def convert_column_to_embedding(column_name:str,increment:int=10,max_row:int=1000) -> list[list[int]]:
    list_of_embedding = []
    counter = 0
    while counter < max_row:
        # Get a response from Embedding API
        input_api = [row_value for row_value in recipe_with_review[column_name][counter:counter+increment]]
        print("counter",counter)
        response = client.embeddings.create(
            input = input_api,
            model= os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT")
        )
        json_response = response.model_dump()
        for data_response in json_response['data']:
            list_of_embedding.append(data_response['embedding'])
        counter += increment
    return list_of_embedding

In [None]:
review_embedding = convert_column_to_embedding('synthetic_review',20,len(recipe_with_review))

In [None]:
recipe_with_review['synthetic_review_embedding'] = review_embedding

In [None]:
recipe_with_review

In [None]:
recipe_with_review.to_csv("../data/embedded_recipe_with_review.csv",index=False)

# Insert Embedding Data into DB

In [3]:
connections.connect(host='0.0.0.0', port='19530')

def create_milvus_collection(collection_name:str, dim:int, drop_existing_collection:bool=True):
    connections.connect(host='0.0.0.0', port='19530')
    if drop_existing_collection and utility.has_collection(collection_name):
        utility.drop_collection(collection_name)
    
    fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=False),
            FieldSchema(name="name", dtype=DataType.VARCHAR, max_length=500),   
            FieldSchema(name="name_embedding", dtype=DataType.FLOAT_VECTOR, dim=dim),
            FieldSchema(name="steps", dtype=DataType.VARCHAR, max_length=5000),
            FieldSchema(name="description", dtype=DataType.VARCHAR, max_length=3000),
            FieldSchema(name="ingredients", dtype=DataType.VARCHAR, max_length=3000),
            FieldSchema(name="ingredients_embedding", dtype=DataType.FLOAT_VECTOR, dim=dim),
            FieldSchema(name="synthetic_review", dtype=DataType.VARCHAR, max_length=3000),
            FieldSchema(name="synthetic_review_embedding", dtype=DataType.FLOAT_VECTOR, dim=dim),
    ]
    schema = CollectionSchema(fields=fields, description='Food recipe data')
    collection = Collection(name=collection_name, schema=schema,enable_dynamic_field=False)
    
    vector_index_params = {
        'metric_type': "COSINE",
        'index_type': "IVF_FLAT",
        'params': {"nlist": 240}
    }
    scalar_index_params = {
        'index_name': "name_index",
        "index_type":"INVERTED"
    }
    collection.create_index(field_name='name_embedding', index_params=vector_index_params)
    collection.create_index(field_name='ingredients_embedding', index_params=vector_index_params)
    collection.create_index(field_name='synthetic_review_embedding', index_params=vector_index_params)
    collection.create_index(field_name='name', index_params=scalar_index_params)

    return collection

In [4]:
collection = create_milvus_collection("food_recipe_collection",1536)

In [5]:
collection.num_entities

0

In [2]:
recipe_with_review_embedded = pd.read_csv("../data/embedded_recipe_with_review.csv",converters={'name_embedding': lambda x: ast.literal_eval(x), 
                                                                                                'ingredients_embedding': lambda x: ast.literal_eval(x),
                                                                                                'synthetic_review_embedding': lambda x: ast.literal_eval(x)})

In [3]:
recipe_with_review_embedded

Unnamed: 0.1,Unnamed: 0,name,id,steps,description,ingredients,name_embedding,ingredients_embedding,synthetic_review,synthetic_review_embedding
0,0,arriba baked winter squash mexican style,137739,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"winter squash, mexican seasoning, mixed spice,...","[0.0053487359546124935, 0.008235931396484375, ...","[0.0007305422332137823, -0.0072620403952896595...",I tried the winter squash with Mexican seasoni...,"[-0.007140638772398233, 0.0011853587348014116,..."
1,1,a bit different breakfast pizza,31490,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"prepared pizza crust, sausage patty, eggs, mil...","[0.01900552771985531, 0.0007024979568086565, 0...","[0.030630841851234436, -0.00014163280138745904...",This sausage and egg breakfast pizza is a deli...,"[0.03296177089214325, -0.0009218796622008085, ..."
2,2,all in the kitchen chili,112140,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"ground beef, yellow onions, diced tomatoes, to...","[0.006139178294688463, 0.0013280771672725677, ...","[-0.0016437169397249818, 0.0006793703651055694...",The chili made from the combination of ground ...,"[0.0051086535677313805, 0.013087042607367039, ..."
3,3,alouette potatoes,59389,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","spreadable cheese with garlic and herbs, new p...","[-0.0026074324268847704, -0.01385664101690054,...","[0.0013258986873552203, -0.012965815141797066,...",This dish is a flavorful and satisfying combin...,"[0.012579272501170635, -0.007429368793964386, ..."
4,4,amish tomato ketchup for canning,44061,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"tomato juice, apple cider vinegar, sugar, salt...","[-0.0009257544879801571, -0.009939315728843212...","[0.00800400972366333, -0.009005345404148102, -...","The homemade ketchup made with tomato juice, a...","[0.022948384284973145, 0.0011680859606713057, ..."
...,...,...,...,...,...,...,...,...,...,...
495,495,philly roll sushi,285568,['place one full sheet of sushi nori on a bamb...,makes 2 delicious & easy sushi rolls!,"cream cheese, smoked salmon, english cucumber,...","[-0.01217602752149105, -0.0192753616720438, 0....","[-0.006269816774874926, 0.008479120209813118, ...","The sushi rolls made with cream cheese, smoked...","[-0.0024783106055110693, -0.003023313125595450..."
496,496,pink stuff,432027,['mix cool whip and cottage cheese in large bo...,this is my friends moms recipe. i vary it from...,"cool whip, lowfat smallcurd cottage cheese, st...","[-0.004030769690871239, 0.0007469958509318531,...","[-0.014467619359493256, 0.007093412335962057, ...","The combination of cool whip, cottage cheese, ...","[-0.016909033060073853, 0.007528887130320072, ..."
497,497,pita the great tuna salad,207743,"['combine tuna , red pepper , green onion and ...","i found this recipe in the ""looney spoons"" coo...","tuna in water, sweet red pepper, green onions,...","[-0.006472115870565176, 0.0011167735792696476,...","[-0.012082647532224655, -0.014462264254689217,...",The tuna pita pockets were a delightful combin...,"[-0.004645794164389372, 0.009324008598923683, ..."
498,498,pizza beans,299968,"['soak the beans overnight', 'drain beans , pl...","i make this often for my son, who gave the dis...","dried beans, bay leaves, onion, green peppers,...","[-0.007129927631467581, -0.03355567157268524, ...","[0.00811612419784069, -0.0014494240749627352, ...","The bean dish is flavorful and satisfying, wit...","[0.0009466591291129589, -0.004376595374196768,..."


In [4]:
recipe_with_review_embedded.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 10 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   Unnamed: 0                  500 non-null    int64 
 1   name                        500 non-null    object
 2   id                          500 non-null    int64 
 3   steps                       500 non-null    object
 4   description                 500 non-null    object
 5   ingredients                 500 non-null    object
 6   name_embedding              500 non-null    object
 7   ingredients_embedding       500 non-null    object
 8   synthetic_review            500 non-null    object
 9   synthetic_review_embedding  500 non-null    object
dtypes: int64(2), object(8)
memory usage: 39.2+ KB


In [9]:
entities = []

for i in range(len(recipe_with_review_embedded)):
    # generate random values for each field in the schema
    name_field = recipe_with_review_embedded['name'].iloc[i]
    id_field = recipe_with_review_embedded['id'].iloc[i]
    steps_field = recipe_with_review_embedded['steps'].iloc[i]
    description_field = recipe_with_review_embedded['description'].iloc[i]
    ingredients_field = recipe_with_review_embedded['ingredients'].iloc[i]
    name_embedding_field = recipe_with_review_embedded['name_embedding'].iloc[i]
    ingredients_embedding_field = recipe_with_review_embedded['ingredients_embedding'].iloc[i]
    synthetic_review_field = recipe_with_review_embedded['synthetic_review'].iloc[i]
    synthetic_review_embedding_field = recipe_with_review_embedded['synthetic_review_embedding'].iloc[i]

    # create a dictionary for each entity
    entity = {
        "id":id_field,
        "name": name_field,
        "name_embedding": name_embedding_field,
        "steps": steps_field,
        "description": description_field,
        "ingredients": ingredients_field,
        "ingredients_embedding": ingredients_embedding_field,
        "synthetic_review": synthetic_review_field,
        "synthetic_review_embedding": synthetic_review_embedding_field
    }

    # add the entity to the list
    entities.append(entity)
    
collection.insert(entities)

(insert count: 500, delete count: 0, upsert count: 0, timestamp: 451559022804598786, success count: 500, err count: 0, cost: 0)

In [10]:
collection.load()
collection.num_entities

0

# Query Embedding

In [None]:
# Connect to Milvus
connections.connect(host='0.0.0.0', port='19530')

In [None]:
collection = create_milvus_collection("food_recipe_collection",1536,drop_existing_collection=False)

In [None]:
from pymilvus import AnnSearchRequest
import json

with open('../data/user_data.json') as f:
    user_data = json.load(f)

In [None]:
user_food_preference = user_data[0]['food_preference']

In [None]:
def parse_food_preference_for_ingredient_query(food_preference:list[str])->str:
    result = ""
    for i, food in enumerate(food_preference):
        if i < len(food_preference)-1:
            result+=f"{food}, "
        else:
            result+=food
    return result

def parse_food_preference_for_review_query(food_preference:list[str])->str:
    result = "Food with "
    for i, food in enumerate(food_preference):
        if i < len(food_preference)-1:
            result+= f"{food} or "
        else:
            result+=food
    return result

In [None]:
ingredient_query = parse_food_preference_for_ingredient_query(user_food_preference)
review_query = parse_food_preference_for_review_query(user_food_preference)
print(ingredient_query)
print(review_query)

In [None]:
client = AzureOpenAI(
  api_key = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_KEY"),  
  api_version = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_API_VERSION"),
  azure_endpoint =os.getenv("AZURE_OPENAI_ENDPOINT") 
)

response = client.embeddings.create(
            input = [ingredient_query,review_query],
            model= os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT")
        )
json_response = response.model_dump()


In [None]:
ingredient_query_embedding=json_response['data'][0]['embedding']
review_query_embedding=json_response['data'][1]['embedding']

In [None]:
type(ingredient_query_embedding[0])

In [None]:
search_param_ingredients = {
    "data": [ingredient_query_embedding], # Query vector
    "anns_field": "ingredients_embedding", # Vector field name
    "param": {
        "metric_type": "COSINE", # This parameter value must be identical to the one used in the collection schema
        "params": {"nprobe": 10}
    },
    "limit": 10 # Number of search results to return in this AnnSearchRequest
}
search_request_ingredients = AnnSearchRequest(**search_param_ingredients)

search_param_food_name = {
    "data": [review_query_embedding], # Query vector
    "anns_field": "name_embedding", # Vector field name
    "param": {
        "metric_type": "COSINE", # This parameter value must be identical to the one used in the collection schema
        "params": {"nprobe": 10}
    },
    "limit": 10 # Number of search results to return in this AnnSearchRequest
}
search_request_food_name = AnnSearchRequest(**search_param_food_name)

search_param_review = {
    "data": [review_query_embedding], # Query vector
    "anns_field": "synthetic_review_embedding", # Vector field name
    "param": {
        "metric_type": "COSINE", # This parameter value must be identical to the one used in the collection schema
        "params": {"nprobe": 10}
    },
    "limit": 10 # Number of search results to return in this AnnSearchRequest
}
search_request_review = AnnSearchRequest(**search_param_review)

# Store these two requests as a list in `reqs`
search_reqs = [search_request_ingredients, search_request_food_name, search_request_review]

In [None]:
from pymilvus import WeightedRanker
# Use WeightedRanker to combine results with specified weights
# Assign weights of 0.7 to ingredients search and 0.3 to name search
rerank = WeightedRanker(0.4, 0.2,0.4)

In [None]:
# Before conducting hybrid search, load the collection into memory.
collection.load()

In [None]:
res = collection.hybrid_search(
    search_reqs, # List of AnnSearchRequests created in step 1
    rerank, # Reranking strategy specified in step 2
    limit=10, # Number of final search results to return,
    output_fields=["name","ingredients","synthetic_review"]
)

In [None]:
print(res)

In [None]:
def parse_db_search_result(res:SearchResult)->list[dict[str,str]]:
    result = {}
    for hits in res:
        for i,hit in enumerate(hits):
            result[i+1] = {
                "recipe_name":hit.get('name'),
                "ingredients":hit.get('ingredients'),
                "review":hit.get('synthetic_review')
            }
    return result

In [None]:
search_result = parse_db_search_result(res)

In [None]:
search_result

In [None]:
str(search_result)

In [None]:
response.choices[0].message.content

# Scalar search DB

In [11]:
from pymilvus import Collection

collection = create_milvus_collection("food_recipe_collection",1536,drop_existing_collection=False)

In [16]:
res=collection.query(
    expr='name == "21 apple pie"',
    limit=5,
    output_fields=["ingredients","steps","synthetic_review"]
)

In [22]:
res[0]

{'id': 146,
 'ingredients': 'allpurpose flour, salt, unsalted butter, ice water, apples, lemons, dark brown sugar, light brown sugar, southern comfort, cinnamon, egg',
 'steps': "['add the cold butter cubes and , with a pastry blender , blend in the flour until mixture becomes mealy ans butter forms peas-size nuggets plainly visible in the flour', 'the water should be added to the flour slowly to ensure that only the minimum amount of water is used', 'do not over-mix the dough', 'when the dough has been mixed just sufficiently to combine the ingredients , form it into a ball and wrap in a piece of plastic wrap and refrigerate for several hours', 'to prepare the filling , melt the butter in a large saute pan until it begins to brown', 'add the sliced apples and cook over medium heat until they begin to soften , about 2 to 3 minutes', 'in a bowl , combine the lemon juice , sugars , southern comfort , and cinnamon', 'add this mixture to the apples and cook 2 to 3 minutes over medium-high 