In [1]:
from dotenv import load_dotenv
load_dotenv('../.vscode/.env')

True

In [2]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
df = pd.read_csv('../data/diabetic_recipe.csv')

In [4]:
row = df.iloc[0]

In [5]:
ASSISTANT_INSTRUCTION = f'''
You are helpful diabetic-friendly recipe assistant who give different recipe each time when user ask. Recipe can be for vegetarian, vegan, and non-vegeterian diets.You generate diabetic friendly recipe in below format:
<recipe>
    <recipe_name> {row['recipe_name']} </recipe_name>
    <ingredients> {row['ingredients']} </ingredients>
    <directions> {row['directions']} </directions>
    <nutrition> {row['nutrition']} </nutrition> 
</recipe>
Always use above format to give recipe.
'''

In [44]:
# Reusing assistant created earlier for recipe genration using GPT4
ASSISTANT_NAME = 'Diabetic-Friendly Recipe Assistant'


In [6]:
from openai import OpenAI
import os

In [7]:
import json

def show_json(obj):
    display(json.loads(obj.model_dump_json()))

In [8]:
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", ""))


In [48]:
assistant = client.beta.assistants.create(
    name=ASSISTANT_NAME,
    instructions=ASSISTANT_INSTRUCTION,
    model="gpt-4-1106-preview",
)
show_json(assistant)

{'id': 'asst_i014pbQ36JXw3VpqtybiYGrt',
 'created_at': 1708591408,
 'description': None,
 'file_ids': [],
 'instructions': '\nYou are helpful diabetic-friendly recipe assistant who give different recipe each time when user ask. Recipe can be for vegetarian, vegan, and non-vegeterian diets.You generate diabetic friendly recipe in below format:\n<recipe>\n    <recipe_name> Shrimp scampi </recipe_name>\n    <ingredients> 8 ounces uncooked spaghetti, 3 tablespoons olive oil, divided, 2 pounds large shrimp, peeled and deveined, 1 tablespoon minced garlic, 1/4 cup chopped shallots or green onions, 2 tablespoons lemon juice, 1/4 cup chopped parsley, 1/4 teaspoon salt, Ground black pepper, to taste, 2 tablespoons brandy or sherry, optional , 2 tablespoons butter </ingredients>\n    <directions> Fill a large pot 3/4 full with water and bring to a boil. Add the pasta and cook according to package directions. Drain the pasta thoroughly, While the pasta is cooking, heat 1 tablespoon olive oil in a

In [9]:
ASSISTANT_ID='asst_i014pbQ36JXw3VpqtybiYGrt'

In [10]:
from bs4 import BeautifulSoup
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", ""))

def submit_message(assistant_id, thread, user_message):
    client.beta.threads.messages.create(
        thread_id=thread.id, role="user", content=user_message
    )
    return client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant_id,
    )


def get_response(thread):
    messages = client.beta.threads.messages.list(thread_id=thread.id)
    for m in messages:
        if m.role == 'assistant':
            return m.content[0].text.value
# Write recipe in xml format using Beautiful soup
def write_recipe(id: str, recipe_content:str):
    soup = BeautifulSoup(recipe_content, "xml")
    with open(f'../data/random_diabetic_recipe/{id}.xml', 'w') as f:
        f.write(soup.prettify())

def create_thread_and_run(assistant_id: str, user_input:str):
    thread = client.beta.threads.create()
    run = submit_message(assistant_id, thread, user_input)
    return thread, run

In [11]:
import time

def wait_on_run(run, thread):
    while run.status == "queued" or run.status == "in_progress":
        run = client.beta.threads.runs.retrieve(
            thread_id=thread.id,
            run_id=run.id,
        )
        time.sleep(0.5)
    return run

In [16]:
for i in range(5,6):
    thread, run = create_thread_and_run(assistant_id= ASSISTANT_ID, user_input=f"Give me diabetic-friendly lunch recipe.")
    run = wait_on_run(thread=thread, run = run)
    recipe_content = get_response(thread=thread)
    write_recipe(id=str(i), recipe_content=recipe_content)
    recipe_id = str(i)
    print(f'Generated recipe for id:{recipe_id}')

Generated recipe for id:5


In [17]:
import re

# Function to extract nutritional values
def extract_nutrition_values(text):
    # Dictionary to hold the extracted values
    nutrition_values = {}

    # Regular expressions for extracting fat, carbohydrate, and protein
    patterns = {
        'fat': r"Total Fat (\d+\.?\d*g)",
        'carbohydrate': r"Total Carbohydrate (\d+\.?\d*g)",
        'protein': r"Protein (\d+\.?\d*g)",
    }

    # Loop through the patterns and search in the text
    for key, pattern in patterns.items():
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            # If a match is found, add it to the dictionary
            nutrition_values[key] = match.group(1)
        else:
            # If no match is found, set the value to None
            nutrition_values[key] = None

    return nutrition_values

In [18]:
import glob
from bs4 import  BeautifulSoup
file_paths = glob.glob(pathname='../data/random_diabetic_recipe/*.xml')

In [19]:
with open(file_paths[0], 'r') as f:
    file = f.read() 

# 'xml' is the parser used. For html files, which BeautifulSoup is typically used for, it would be 'html.parser'.
soup = BeautifulSoup(file, 'xml')

In [20]:
from pydantic import BaseModel
from typing import  List, Optional
class Nutrition(BaseModel):
    fat: Optional[str] = None
    protein: Optional[str] = None
    carbohydrate: Optional[str] = None

class Recipe(BaseModel):
    id: str
    recipe_name:str
    ingredients: str
    directions: str
    nutrition: Nutrition 

class Recipes(BaseModel):
    recipes: List[Recipe] = []

In [21]:
def read_xml_file(file_path: str):
    with open(file_path, 'r') as f:
        file = f.read() 
        return file

def clean_text(content: str):
    clean_content = content.strip()
    return clean_content

In [22]:
from pathlib import Path
def parse_llm_recipes(file_paths:list):
    recipes = Recipes()
    for file_path in file_paths:
        print(f'Parsing file:{file_path}')
        id = Path(file_path).stem
        file = read_xml_file(file_path)
        soup = BeautifulSoup(file, 'xml')
        recipe_name = soup.find('recipe_name')
        recipe_name = clean_text(recipe_name.text)
        ingredients = soup.find('ingredients')
        ingredients = clean_text(ingredients.text)
        directions = soup.find('directions')
        directions = clean_text(directions.text)
        nutrition = soup.find('nutrition')
        nutrition = clean_text(nutrition.text)
        parsed_nutrition = extract_nutrition_values(text=nutrition)
        nutrition = Nutrition(fat=parsed_nutrition['fat'], protein=parsed_nutrition['protein'],carbohydrate=parsed_nutrition['carbohydrate'])
        recipe = Recipe(id = id, recipe_name = recipe_name, ingredients = ingredients,directions=directions, nutrition=nutrition)
        recipes.recipes.append(recipe) 
    return recipes 

In [23]:
llm_recipes = parse_llm_recipes(file_paths)


Parsing file:../data/random_diabetic_recipe/8.xml
Parsing file:../data/random_diabetic_recipe/9.xml
Parsing file:../data/random_diabetic_recipe/10.xml
Parsing file:../data/random_diabetic_recipe/4.xml
Parsing file:../data/random_diabetic_recipe/5.xml
Parsing file:../data/random_diabetic_recipe/7.xml
Parsing file:../data/random_diabetic_recipe/6.xml
Parsing file:../data/random_diabetic_recipe/2.xml
Parsing file:../data/random_diabetic_recipe/3.xml
Parsing file:../data/random_diabetic_recipe/1.xml


In [24]:
import json
with open('../data/random_diabetic_recipe/gpt4_1106_recipes.json', 'w') as f:
    json.dump(llm_recipes.model_dump(mode='json'), f, indent=4, ensure_ascii=False)

In [25]:
import json
with open('../data/random_diabetic_recipe/gpt4_1106_recipes.json') as f:
    gpt4_recipe = json.load(f)

In [26]:
df_gpt4_recipe = pd.DataFrame.from_records(gpt4_recipe['recipes'])

In [27]:
df_gpt4_recipe

Unnamed: 0,id,recipe_name,ingredients,directions,nutrition
0,8,Grilled Chicken Salad with Strawberry Vinaigrette,"2 boneless, skinless chicken breasts, 6 cups m...",Preheat grill to medium-high heat. Season chic...,"{'fat': '15g', 'protein': '29g', 'carbohydrate..."
1,9,Grilled Lemon-Herb Chicken Salad,"4 boneless, skinless chicken breasts, 1 tables...",Preheat grill to medium-high heat. In a small ...,"{'fat': '15g', 'protein': '30g', 'carbohydrate..."
2,10,Mediterranean Quinoa Salad,"1 cup quinoa, 2 cups water, 1 cup cherry tomat...","In a medium saucepan, combine quinoa and water...","{'fat': '10g', 'protein': '6g', 'carbohydrate'..."
3,4,Grilled Chicken Salad with Orange Vinaigrette,"2 boneless, skinless chicken breasts, 6 cups m...",Preheat grill to medium-high heat. Season the ...,"{'fat': '14g', 'protein': '28g', 'carbohydrate..."
4,5,Grilled Chicken Salad with Avocado Dressing,"2 boneless, skinless chicken breasts, 1 teaspo...",Preheat grill to medium-high heat. Brush chick...,"{'fat': '14g', 'protein': '35g', 'carbohydrate..."
5,7,Grilled Mediterranean Vegetable Salad,"1 medium eggplant, sliced into 1/2-inch rounds...",Preheat grill to medium-high heat. In a small ...,"{'fat': '12g', 'protein': '4g', 'carbohydrate'..."
6,6,Grilled Chicken with Spinach and Pine Nut Pesto,"4 boneless, skinless chicken breasts, 1 tables...",Preheat grill to medium-high heat. Brush chick...,"{'fat': '15g', 'protein': '34g', 'carbohydrate..."
7,2,Spinach and Feta Egg Muffins,"8 large eggs, 1 cup fresh spinach, chopped, 1/...",Preheat your oven to 350°F (175°C). Spray a mu...,"{'fat': '7g', 'protein': '9g', 'carbohydrate':..."
8,3,Spinach and Feta Omelette,"2 large eggs, 1/4 cup chopped spinach, 1 table...","In a small bowl, beat the eggs with the dried ...","{'fat': '15g', 'protein': '19g', 'carbohydrate..."
9,1,Spinach and Feta Breakfast Scramble,"4 large eggs, 2 large egg whites, 2 tablespoon...","In a bowl, whisk together the eggs, egg whites...","{'fat': '15g', 'protein': '19g', 'carbohydrate..."


In [29]:
recipe = pd.read_csv('../data/recipes.csv')

In [30]:
recipe = recipe.rename(columns={'Unnamed: 0':'id'})

In [31]:
recipe_filter = recipe.drop_duplicates(subset=['recipe_name'], keep='first')

In [33]:
recipe_filter = recipe_filter.reset_index(drop=True)

In [34]:
recipe_filter

Unnamed: 0,id,recipe_name,prep_time,cook_time,total_time,servings,yield,ingredients,directions,rating,url,cuisine_path,nutrition,timing,img_src
0,0,Apple-Cranberry Crostada,,,,8,6 to 8 - servings,"3 tablespoons butter, 2 pounds Granny Smith ap...",Heat butter in a large skillet over medium-hig...,4.4,https://www.allrecipes.com/recipe/76931/apple-...,/Desserts/Fruit Desserts/Apple Dessert Recipes/,"Total Fat 18g 23%, Saturated Fat 7g 34%, Chole...","Servings: 8, Yield: 6 to 8 - servings",https://www.allrecipes.com/thmb/Tf1wH73bfH6Oql...
1,1,Apple Pie by Grandma Ople,30 mins,1 hrs,1 hrs 30 mins,8,1 9-inch pie,"8 small Granny Smith apples, or as needed, ½ c...","Peel and core apples, then thinly slice. Set a...",4.8,https://www.allrecipes.com/recipe/12682/apple-...,/Desserts/Pies/Apple Pie Recipes/,"Total Fat 19g 24%, Saturated Fat 9g 46%, Chole...","Prep Time: 30 mins, Cook Time: 1 hrs, Total Ti...",https://www.allrecipes.com/thmb/1I95oiTGz6aEpu...
2,2,Sarah's Homemade Applesauce,10 mins,15 mins,25 mins,4,,"4 apples - peeled, cored and chopped, ¾ cup w...","Combine apples, water, sugar, and cinnamon in ...",4.8,https://www.allrecipes.com/recipe/51301/sarahs...,/Side Dish/Applesauce Recipes/,"Total Fat 0g 0%, Sodium 3mg 0%, Total Carbohyd...","Prep Time: 10 mins, Cook Time: 15 mins, Total ...",https://www.allrecipes.com/thmb/VY5d0tZHB8xz6y...
3,3,Apple Crisp,30 mins,45 mins,1 hrs 15 mins,12,1 9x13-inch pan,"10 cups all-purpose apples, peeled, cored and ...",Preheat the oven to 350 degrees F (175 degrees...,4.7,https://www.allrecipes.com/recipe/12409/apple-...,/Desserts/Crisps and Crumbles Recipes/Apple Cr...,"Total Fat 8g 11%, Saturated Fat 5g 25%, Choles...","Prep Time: 30 mins, Cook Time: 45 mins, Total ...",https://www.allrecipes.com/thmb/uAzhPOh86PfR-N...
4,4,Apple Pie Filling,20 mins,20 mins,2 hrs 40 mins,40,5 9-inch pies,"18 cups thinly sliced apples, 3 tablespoons le...",Toss apples with lemon juice in a large bowl a...,4.7,https://www.allrecipes.com/recipe/12681/apple-...,/Desserts/Pies/Apple Pie Recipes/,"Total Fat 0g 0%, Sodium 61mg 3%, Total Carbohy...","Prep Time: 20 mins, Cook Time: 20 mins, Additi...",https://www.allrecipes.com/thmb/c0bbYaS1V_mTt_...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
956,1085,Date-Nut Balls,15 mins,10 mins,30 mins,48,4 dozen balls,"14 tablespoons butter, 1 cup pitted chopped da...",Melt butter in a saucepan over medium heat; co...,3.8,https://www.allrecipes.com/recipe/240653/date-...,/Desserts/Cookies/Fruit Cookie Recipes/Date/,"Total Fat 5g 6%, Saturated Fat 2g 12%, Cholest...","Prep Time: 15 mins, Cook Time: 10 mins, Additi...",https://www.allrecipes.com/thmb/LXm_15zLymhZ7c...
957,1086,Jamaica Cake,,,,14,1 - 13x9 inch pan,"2 cups white sugar, 1 ½ cups vegetable oil, 1 ...","Mix together sugar, vegetable oil, pecans, flo...",4.6,https://www.allrecipes.com/recipe/7509/jamaica...,/Desserts/Fruit Desserts/Banana Dessert Recipes/,"Total Fat 33g 43%, Saturated Fat 4g 21%, Chole...","Servings: 14, Yield: 1 - 13x9 inch pan",https://www.allrecipes.com/thmb/tI7g5xY5-qPV5v...
958,1087,Best Hot Sauce,10 mins,20 mins,30 mins,100,2 cups,"10 fresh hot chile peppers, stems removed, 1 ...","Place peppers, onion, dates, basil, parsley, t...",4.9,https://www.allrecipes.com/recipe/242153/best-...,/Side Dish/Sauces and Condiments/,"Total Fat 1g 1%, Saturated Fat 0g 1%, Sodium 2...","Prep Time: 10 mins, Cook Time: 20 mins, Total ...",https://www.allrecipes.com/thmb/80wUYzFvbycfxx...
959,1088,Moist Date Nut Bread,15 mins,50 mins,1 hrs 35 mins,12,1 9x5-inch loaf,"2 ½ cups chopped dates, ¼ cup butter, 1 cup bo...",Preheat the oven to 350 degrees F (175 degrees...,4.5,https://www.allrecipes.com/recipe/51446/moist-...,/Breakfast and Brunch/Breakfast Bread Recipes/,"Total Fat 10g 13%, Saturated Fat 3g 16%, Chole...","Prep Time: 15 mins, Cook Time: 50 mins, Additi...",https://www.allrecipes.com/thmb/bYsnompJZ6UAF9...


In [46]:
dataset_recipes = recipe_filter[['id','recipe_name']]

In [47]:
dataset_recipes['recipe_name'] = dataset_recipes['recipe_name'].map(lambda x : x.lower())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset_recipes['recipe_name'] = dataset_recipes['recipe_name'].map(lambda x : x.lower())


In [48]:
dataset_recipes

Unnamed: 0,id,recipe_name
0,0,apple-cranberry crostada
1,1,apple pie by grandma ople
2,2,sarah's homemade applesauce
3,3,apple crisp
4,4,apple pie filling
...,...,...
956,1085,date-nut balls
957,1086,jamaica cake
958,1087,best hot sauce
959,1088,moist date nut bread


In [53]:
gpt4_recipes_sub = df_gpt4_recipe[['id','recipe_name']]

In [54]:
gpt4_recipes_sub['recipe_name'] = gpt4_recipes_sub['recipe_name'].map(lambda x : x.lower())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpt4_recipes_sub['recipe_name'] = gpt4_recipes_sub['recipe_name'].map(lambda x : x.lower())


In [55]:
gpt4_recipes_sub

Unnamed: 0,id,recipe_name
0,8,grilled chicken salad with strawberry vinaigrette
1,9,grilled lemon-herb chicken salad
2,10,mediterranean quinoa salad
3,4,grilled chicken salad with orange vinaigrette
4,5,grilled chicken salad with avocado dressing
5,7,grilled mediterranean vegetable salad
6,6,grilled chicken with spinach and pine nut pesto
7,2,spinach and feta egg muffins
8,3,spinach and feta omelette
9,1,spinach and feta breakfast scramble


In [76]:
# Get chicken salad recipe
chicken_salad_recipe = []
for index, row in gpt4_recipes_sub.iterrows():
    if 'salad' in row['recipe_name'] and 'chicken' in row['recipe_name']:
        chicken_salad_recipe.append({'id':row['id'],'recipe_name': row['recipe_name']})


In [77]:
chicken_salad_recipe

[{'id': '8',
  'recipe_name': 'grilled chicken salad with strawberry vinaigrette'},
 {'id': '9', 'recipe_name': 'grilled lemon-herb chicken salad'},
 {'id': '4', 'recipe_name': 'grilled chicken salad with orange vinaigrette'},
 {'id': '5', 'recipe_name': 'grilled chicken salad with avocado dressing'}]

In [74]:
dataset_chicken_salad_recipe = []
for index, row in dataset_recipes.iterrows():
    if 'salad' in row['recipe_name'] and 'chicken' in row['recipe_name']:
        dataset_chicken_salad_recipe.append({'id':row['id'],'recipe_name': row['recipe_name']})

In [75]:
dataset_chicken_salad_recipe

[{'id': 177, 'recipe_name': 'cherry chicken salad'},
 {'id': 721, 'recipe_name': 'chicken, avocado and mango salad'},
 {'id': 817,
  'recipe_name': 'spinach salad with chicken, avocado, and goat cheese'}]

Named Entity Recognition Ingredients

In [78]:
import json
import logging
import os

import openai
import wikipedia

from typing import Optional
from IPython.display import display, Markdown
from tenacity import retry, wait_random_exponential, stop_after_attempt

logging.basicConfig(level=logging.INFO, format=' %(asctime)s - %(levelname)s - %(message)s')

OPENAI_MODEL = 'gpt-3.5-turbo-0613'


In [79]:
labels = [
    "person",      # people, including fictional characters
    "fac",         # buildings, airports, highways, bridges
    "org",         # organizations, companies, agencies, institutions
    "gpe",         # geopolitical entities like countries, cities, states
    "loc",         # non-gpe locations
    "product",     # vehicles, foods, appareal, appliances, software, toys 
    "event",       # named sports, scientific milestones, historical events
    "work_of_art", # titles of books, songs, movies
    "law",         # named laws, acts, or legislations
    "language",    # any named language
    "date",        # absolute or relative dates or periods
    "time",        # time units smaller than a day
    "percent",     # percentage (e.g., "twenty percent", "18%")
    "money",       # monetary values, including unit
    "quantity",    # measurements, e.g., weight or distance
]

In [80]:
def system_message(labels):
    return f"""
You are an expert in Natural Language Processing. Your task is to identify common Named Entities (NER) in a given text.
The possible common Named Entities (NER) types are exclusively: ({", ".join(labels)})."""

In [81]:
def assisstant_message():
    return f"""
EXAMPLE:
    Text: '1 pie crust (store-bought or homemade), 6 cups thinly sliced apples (such as Granny Smith or a combination of tart and sweet apples), 3/4 cup granulated sugar, 2 tbsp all-purpose flour, 1 tsp ground cinnamon, 1/4 tsp ground nutmeg, 1/4 tsp salt, 1 tsp vanilla extract, 1/2 cup unsalted butter (cold and cut into small pieces), 3/4 cup all-purpose flour (for crumb topping), 1/2 cup brown sugar (packed, for crumb topping), 1/4 tsp baking powder (for crumb topping), 1/4 tsp salt (for crumb topping), Optional: Vanilla ice cream or whipped cream for serving'
    {{
        "product": ["pie crust","apples", "granulated sugar", "all-purpose flour", "ground cinnamon", "ground nutmeg", "salt", "vanilla extract", "unsalted water","brown sugar","baking powder", "vanilla ice cream", "whipped cream"],
    }}
--"""

In [82]:
def user_message(text):
    return f"""
TASK:
    Text: {text}
"""

In [85]:
def recipe_ingredients(text: str, label_entities: dict) -> list:
    """
    Recipe ingredients
    """
    return label_entities['product']

In [86]:
def generate_functions(labels: dict) -> list:
    return [
        {   
            "type": "function",
            "function": {
                "name": "recipe_ingredients",
                "description": "Enrich Text with recipe ingredients",
                "parameters": {
                    "type": "object",
                        "properties": {
                            "r'^(?:' + '|'.join({labels}) + ')$'": 
                            {
                                "type": "array",
                                "items": {
                                    "type": "string"
                                }
                            }
                        },
                        "additionalProperties": False
                },
            }
        }
    ]

In [89]:
@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(5))
def run_openai_task(labels, text):
    messages = [
          {"role": "system", "content": system_message(labels=labels)},
          {"role": "assistant", "content": assisstant_message()},
          {"role": "user", "content": user_message(text=text)}
      ]

    # TODO: functions and function_call are deprecated, need to be updated
    # See: https://platform.openai.com/docs/api-reference/chat/create#chat-create-tools
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo-0613",
        messages=messages,
        tools=generate_functions(labels),
        tool_choice={"type": "function", "function" : {"name": "recipe_ingredients"}}, 
        temperature=0,
        frequency_penalty=0,
        presence_penalty=0,
    )

    response_message = response.choices[0].message
    
    available_functions = {"recipe_ingredients": recipe_ingredients}  
    function_name = response_message.tool_calls[0].function.name
    
    function_to_call = available_functions[function_name]
    logging.info(f"function_to_call: {function_to_call}")

    function_args = json.loads(response_message.tool_calls[0].function.arguments)
    logging.info(f"function_args: {function_args}")

    function_response = function_to_call(text, function_args)

    return {"model_response": response, 
            "function_response": function_response}

In [90]:
text = """6 cups thinly sliced apples, 1 tablespoon lemon juice (Optional), ¾ cup white sugar, 2 tablespoons all-purpose flour, ½ teaspoon ground cinnamon, ⅛ teaspoon ground nutmeg, ½ cup raisins (Optional), ½ cup chopped walnuts (Optional), 1 (9 inch) pie shell, ½ cup all-purpose flour, ½ cup packed brown sugar, 3 tablespoons butter"""
result = run_openai_task(labels, text)

 2024-02-24 13:52:08,579 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-02-24 13:52:08,583 - INFO - function_to_call: <function recipe_ingredients at 0x12ed91260>
 2024-02-24 13:52:08,584 - INFO - function_args: {'product': ['apples', 'lemon juice', 'white sugar', 'all-purpose flour', 'ground cinnamon', 'ground nutmeg', 'raisins', 'chopped walnuts', 'pie shell', 'packed brown sugar', 'butter']}


In [100]:
result['function_response']

['apples',
 'lemon juice',
 'white sugar',
 'all-purpose flour',
 'ground cinnamon',
 'ground nutmeg',
 'raisins',
 'chopped walnuts',
 'pie shell',
 'packed brown sugar',
 'butter']

In [126]:
def word_match_count(arr1, arr2):
    # Split each phrase in both arrays into words and flatten the list of lists
    words1 = set(word for phrase in arr1 for word in phrase.split())
    words2 = set(word for phrase in arr2 for word in phrase.split())
    
    # Find the intersection of the two sets to get the matching words
    matching_words = words1.intersection(words2)
    
    # Return the matching words and their count
    return matching_words, len(matching_words)

In [129]:
recipe_authenticity_result = []
for item1 in chicken_salad_recipe:
    item_recipe = {'id': item1['id'], 'recipe_name':item1['recipe_name'], 'matched_recipes':[]}
    ingredients1 = str(df_gpt4_recipe[df_gpt4_recipe['id'] == str(item1['id'])]['ingredients'].values[0])
    result1 = run_openai_task(labels, ingredients1)
    ingredients1_arr = result1['function_response']
    item_recipe['ingredients'] = ingredients1
    item_recipe['ingredients_ner'] = ingredients1_arr
    for item2 in dataset_chicken_salad_recipe:
        ingredients2 = str(recipe_filter[recipe_filter['id'] == item2['id']]['ingredients'].values[0])
        result2 = run_openai_task(labels, ingredients2)
        ingredients2_arr = result2['function_response'] 
        matched_words,matched_count = word_match_count(ingredients1_arr, ingredients2_arr)
        matched_percentage = (matched_count/len(ingredients2_arr))*100
        item_recipe['matched_recipes'].append({'id':item2['id'], 'recipe_name':item2['recipe_name'],'ingredients':ingredients2,'ingredients_ner':ingredients2_arr, 'matched_percentage':matched_percentage, 'matched_words':matched_words})        
    recipe_authenticity_result.append(item_recipe)

 2024-02-24 14:52:24,941 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-02-24 14:52:24,944 - INFO - function_to_call: <function recipe_ingredients at 0x12ed91260>
 2024-02-24 14:52:24,944 - INFO - function_args: {'product': ['chicken breasts', 'mixed salad greens', 'strawberries', 'slivered almonds', 'red onion', 'feta cheese', 'balsamic vinegar', 'olive oil', 'Dijon mustard', 'honey', 'Salt', 'pepper']}
 2024-02-24 14:52:25,921 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-02-24 14:52:25,924 - INFO - function_to_call: <function recipe_ingredients at 0x12ed91260>
 2024-02-24 14:52:25,925 - INFO - function_args: {'product': ['cooked chicken breast', 'dried cherries', 'celery', 'pecans', 'mayonnaise', 'buttermilk', 'salt', 'ground black pepper', 'apples']}
 2024-02-24 14:52:27,708 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-02-24 14:52:27,712

In [142]:
class MatchedRecipe(BaseModel):
    id: str
    recipe_name: str
    ingredients: str
    ingredients_ner: list = []
    matched_percentage: float
    matched_words: set = []

class RecipeAuthenticity(BaseModel):
    id: str
    recipe_name: str
    ingredients: str
    ingredients_ner: list = []
    matched_recipes:List[MatchedRecipe] = []

class RecipiesAuthenticity(BaseModel):
    recpies_authenticity: List[RecipeAuthenticity] = []

In [143]:
recpies = RecipiesAuthenticity()
for item in recipe_authenticity_result:
    recipe_authenticity = RecipeAuthenticity(id=item['id'], 
                                            recipe_name=item['recipe_name'],
                                            ingredients= item['ingredients'],
                                            ingredients_ner = item['ingredients_ner'])
    for item_matched in item['matched_recipes']:
        recipe_authenticity.matched_recipes.append(MatchedRecipe(id=str(item_matched['id']), 
                                            recipe_name=item_matched['recipe_name'],
                                            ingredients= item_matched['ingredients'],
                                            ingredients_ner = item_matched['ingredients_ner'],
                                            matched_percentage=item_matched['matched_percentage'],
                                            matched_words= item_matched['matched_words']))
    recpies.recpies_authenticity.append(recipe_authenticity)

In [144]:
import json
with open('../data/random_diabetic_recipe/chicken_salad_recipe.json', 'w') as f:
    json.dump(recpies.model_dump(mode='json'), f, indent=4, ensure_ascii=False)