###### Loading an environment variable from .env file

In [7]:
from dotenv import load_dotenv
load_dotenv('../.vscode/.env')

True

###### Importing requests

In [8]:
import requests

###### Url of the API

In [9]:
url = "https://api.nal.usda.gov/fdc/v1/foods/search?query={0}&dataType={1}&requireAllWords=true"

###### Declaring the data_types and nutrient_ids

In [10]:
data_types = ['Foundation','SR Legacy', 'Branded']
nutrient_ids = [1008,1008,2047]

###### Importing OS module

In [11]:
import os

In [12]:
headers = {
  'X-Api-Key': os.environ.get('USDA_API_KEY')
}
payload = {}

In [13]:
response = requests.request("GET", url.format('cucumber', data_types[0]), headers=headers, data=payload)

In [14]:
resp_json = response.json()

In [15]:
resp_json

{'totalHits': 2,
 'currentPage': 1,
 'totalPages': 1,
 'pageList': [1],
 'foodSearchCriteria': {'dataType': ['Foundation'],
  'query': 'cucumber',
  'generalSearchInput': 'cucumber',
  'pageNumber': 1,
  'numberOfResultsPerPage': 50,
  'pageSize': 50,
  'requireAllWords': True,
  'foodTypes': ['Foundation']},
 'foods': [{'fdcId': 2346406,
   'description': 'Cucumber, with peel, raw',
   'commonNames': '',
   'additionalDescriptions': '',
   'dataType': 'Foundation',
   'ndbNumber': 11205,
   'publishedDate': '2022-10-28',
   'foodCategory': 'Vegetables and Vegetable Products',
   'mostRecentAcquisitionDate': '2022-05-02',
   'allHighlightFields': '',
   'score': 418.943,
   'microbes': [],
   'foodNutrients': [{'nutrientId': 1089,
     'nutrientName': 'Iron, Fe',
     'nutrientNumber': '303',
     'unitName': 'MG',
     'derivationCode': 'A',
     'derivationDescription': 'Analytical',
     'derivationId': 1,
     'value': 0.0,
     'foodNutrientSourceId': 1,
     'foodNutrientSourceCo

In [16]:
for item in resp_json['foods'][0]['foodNutrients']:
    if item['nutrientId'] == 2047:
        print(item)

{'nutrientId': 2047, 'nutrientName': 'Energy (Atwater General Factors)', 'nutrientNumber': '957', 'unitName': 'KCAL', 'derivationCode': 'NC', 'derivationDescription': 'Calculated', 'derivationId': 49, 'value': 15.9, 'foodNutrientSourceId': 2, 'foodNutrientSourceCode': '4', 'foodNutrientSourceDescription': 'Calculated or imputed', 'rank': 280, 'indentLevel': 1, 'foodNutrientId': 28912853}


In [17]:
from pydantic import BaseModel

In [18]:
class USDAFoodIngredient(BaseModel):
    fdc_id: int
    name: str
    nutrition_id: int
    energy: float
    unit: str 

In [19]:
def http_get(url:str):
    return requests.request("GET", url, headers=headers, data=payload)

In [20]:
class Ingredient(BaseModel):
    name: str
    quantity: str
    ner_name: str

In [21]:
from typing import Optional
def retrieve_energy(query: str, formatted_url:str)-> Optional[Ingredient]:
    response = http_get(url=formatted_url)
    if response.status_code != 200:
        print(f'Fail to get response for ingredient:{ingredient}')
        return None
    resp_json = response.json()
    foods = resp_json['foods']
    if len(foods) == 0:
        return None
    food_details = foods[0]
    fdc_id = food_details['fdcId']    
    for item in food_details['foodNutrients']:
        if item['nutrientId'] in nutrient_ids:
            energy = item['value']
            nutrition_id = item['nutrientId']
            unit = item['unitName']
            return USDAFoodIngredient(name=query,fdc_id=fdc_id,nutrition_id=nutrition_id,unit=unit,energy=energy)
    return None
            

In [22]:
import re
def clean_string(input_string):
    # Convert to lowercase
    input_string = input_string.lower()

    # Define replacement rules
    replacements = {
        r'\bfresh\b': '',
        r'\bgrated\b': '',
        r'\bextra virgin\b': '',
        r'\bextravirgin\b': '',
        r'\bvirgin\b': '',
        r'\blowfat\b': '',
        r'\bred\b': '',
        r'\bgreen\b': '',
        r'\bwhite\b': '',
        r'\bwhites\b': '',
        r'\bbrown\b': '',
        r'\bblack\b': '',
        r'\bkalamata\b': '',
        r'\bstalks\b': '',
        r'\bleaves\b': '',
        r'\bcremini\b': '',
        r'\bslices\b': '',
        r'\bzest\b': '',
        r'\bzucchinis\b': 'zucchini'
    }

    # Apply replacements using regular expressions
    for pattern, replacement in replacements.items():
        input_string = re.sub(pattern, replacement, input_string)

    # Remove extra spaces by splitting and joining words
    return ' '.join(input_string.split())
    

In [23]:
clean_string('grated Parmesean')

'parmesean'

In [24]:
def exact_match_query(query:str):
    return '"'+query+'"'

In [66]:
def calculate_energy(query: str):
    clean_query = clean_string(query)
    format_query= exact_match_query(clean_query)
    if 'water' in clean_query or 'ice cubes' in clean_query:
        return USDAFoodIngredient(name=format_query,fdc_id='174158',nutrition_id='1008',unit='KCAL',energy=0.0)
    if 'tomatoes' in clean_query or 'cherry tomatoes' in clean_query:
        return USDAFoodIngredient(name=format_query,fdc_id='2543214',nutrition_id='1008',unit='KCAL',energy=22.0)
    formatted_url = url.format(format_query, data_types[0])
    ingredient = retrieve_energy(query=format_query,formatted_url=formatted_url)
    if ingredient is None:
        formatted_url = url.format(format_query, data_types[1])
        ingredient = retrieve_energy(query=format_query,formatted_url=formatted_url)
    if ingredient is None:
        formatted_url = url.format(format_query, data_types[2])
        ingredient = retrieve_energy(query=format_query,formatted_url=formatted_url)
    return ingredient

In [26]:
calculate_energy('water')

USDAFoodIngredient(fdc_id=174158, name='"water"', nutrition_id=1008, energy=0.0, unit='KCAL')

In [27]:
import json
with open('../data/generated_recipe_v2/gpt4_1106_recipes.json') as f:
    gpt4_recipe = json.load(f)

In [28]:
from typing import  List
class Nutrition(BaseModel):
    fat: Optional[str] = None
    protein: Optional[str] = None
    carbohydrate: Optional[str] = None

class Recipe(BaseModel):
    id: str
    recipe_name:str
    ingredients: List[dict]
    directions: str
    nutrition: Nutrition
    total_calories_estimation: str  

class Recipes(BaseModel):
    recipes: List[Recipe] = []

In [29]:
recipes = Recipes(**gpt4_recipe)

In [30]:
import json
import logging
import os

import openai

from typing import Optional
from IPython.display import display, Markdown
from tenacity import retry, wait_random_exponential, stop_after_attempt

logging.basicConfig(level=logging.INFO, format=' %(asctime)s - %(levelname)s - %(message)s')

OPENAI_MODEL = 'gpt-3.5-turbo-0613'

In [31]:
labels = [
    "person",      # people, including fictional characters
    "fac",         # buildings, airports, highways, bridges
    "org",         # organizations, companies, agencies, institutions
    "gpe",         # geopolitical entities like countries, cities, states
    "loc",         # non-gpe locations
    "product",     # vehicles, foods, appareal, appliances, software, toys 
    "event",       # named sports, scientific milestones, historical events
    "work_of_art", # titles of books, songs, movies
    "law",         # named laws, acts, or legislations
    "language",    # any named language
    "date",        # absolute or relative dates or periods
    "time",        # time units smaller than a day
    "percent",     # percentage (e.g., "twenty percent", "18%")
    "money",       # monetary values, including unit
    "quantity",    # measurements, e.g., weight or distance
]

In [32]:
def system_message(labels):
    return f"""
You are an expert in Natural Language Processing. Your task is to identify common Named Entities (NER) in a given text.
The possible common Named Entities (NER) types are exclusively: ({", ".join(labels)})."""


In [33]:
def assisstant_message():
    return f"""
EXAMPLE:
    Text: '1 pie crust (store-bought or homemade), 6 cups thinly sliced apples (such as Granny Smith or a combination of tart and sweet apples), 3/4 cup granulated sugar, 2 tbsp all-purpose flour, 1 tsp ground cinnamon, 1/4 tsp ground nutmeg, 1/4 tsp salt, 1 tsp vanilla extract, 1/2 cup unsalted butter (cold and cut into small pieces), 3/4 cup all-purpose flour (for crumb topping), 1/2 cup brown sugar (packed, for crumb topping), 1/4 tsp baking powder (for crumb topping), 1/4 tsp salt (for crumb topping), Optional: Vanilla ice cream or whipped cream for serving'
    {{
        "product": ["pie crust","apples", "granulated sugar", "all-purpose flour", "ground cinnamon", "ground nutmeg", "salt", "vanilla extract", "unsalted water","brown sugar","baking powder", "vanilla ice cream", "whipped cream"],
    }}
--"""

In [34]:
def user_message(text):
    return f"""
TASK:
    Text: {text}
"""

In [35]:
def recipe_ingredients(text: str, label_entities: dict) -> list:
    """
    Recipe ingredients
    """
    return label_entities['product']

In [36]:
def generate_functions(labels: dict) -> list:
    return [
        {   
            "type": "function",
            "function": {
                "name": "recipe_ingredients",
                "description": "Enrich Text with recipe ingredients",
                "parameters": {
                    "type": "object",
                        "properties": {
                            "r'^(?:' + '|'.join({labels}) + ')$'": 
                            {
                                "type": "array",
                                "items": {
                                    "type": "string"
                                }
                            }
                        },
                        "additionalProperties": False
                },
            }
        }
    ]

In [40]:
@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(5))
def run_openai_task(labels, text):
    messages = [
          {"role": "system", "content": system_message(labels=labels)},
          {"role": "assistant", "content": assisstant_message()},
          {"role": "user", "content": user_message(text=text)}
      ]

    # TODO: functions and function_call are deprecated, need to be updated
    # See: https://platform.openai.com/docs/api-reference/chat/create#chat-create-tools
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo-0613",
        messages=messages,
        tools=generate_functions(labels),
        tool_choice={"type": "function", "function" : {"name": "recipe_ingredients"}}, 
        temperature=0,
        frequency_penalty=0,
        presence_penalty=0,
    )

    response_message = response.choices[0].message
    
    available_functions = {"recipe_ingredients": recipe_ingredients}  
    function_name = response_message.tool_calls[0].function.name
    
    function_to_call = available_functions[function_name]
    logging.info(f"function_to_call: {function_to_call}")

    function_args = json.loads(response_message.tool_calls[0].function.arguments)
    logging.info(f"function_args: {function_args}")

    function_response = function_to_call(text, function_args)

    return {"model_response": response, 
            "function_response": function_response}

In [41]:
text = """fresh ginger"""
result = run_openai_task(labels, text)

 2024-04-14 18:52:00,239 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-14 18:52:00,248 - INFO - function_to_call: <function recipe_ingredients at 0x11544c0e0>
 2024-04-14 18:52:00,249 - INFO - function_args: {'product': ['fresh ginger']}


In [42]:
result

{'model_response': ChatCompletion(id='chatcmpl-9E5nkswRJVdo1JVQzC2AsIqtJuc9T', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_f5OknKEThxzo3n7lz3KwwPX8', function=Function(arguments='{\n    "product": ["fresh ginger"]\n}', name='recipe_ingredients'), type='function')]))], created=1713145920, model='gpt-3.5-turbo-0613', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=10, prompt_tokens=382, total_tokens=392)),
 'function_response': ['fresh ginger']}

In [43]:
from typing import  Optional


class EnrichRecipe(BaseModel):
    id: str
    recipe_name:str
    ingredients: List[Ingredient]
    directions: str
    nutrition: Nutrition
    total_calories_estimation: str  

class EnrichRecipes(BaseModel):
    recipes: List[EnrichRecipe] = []

In [44]:
class IngredientWithEngergy(BaseModel):
    name: str
    quantity: str
    ner_name: str
    usda_food_ingredient:Optional[USDAFoodIngredient]=None

In [45]:
class EnrichRecipeWithEnergy(BaseModel):
    id: str
    recipe_name:str
    ingredients: List[IngredientWithEngergy]
    directions: str
    nutrition: Nutrition
    total_calories_estimation: str 

class EnrichRecipesWithEnergy(BaseModel):
    recipes: List[EnrichRecipeWithEnergy] = []

In [55]:
def enrich_recipes_with_ner(recipes:Recipes):
    enrich_recipes = EnrichRecipes()
    for recipe in recipes.recipes:
        ingredients_list=[]
        for item in recipe.ingredients:
            ner_response = run_openai_task(labels,item['name'])
            ner_ingredients = ner_response['function_response']
            ingredient = Ingredient(name=item['name'],
                                    quantity=item['quantity'],
                                    ner_name=ner_ingredients[0])
            ingredients_list.append(ingredient)
        enrich_recipe = EnrichRecipe(id=recipe.id,
                                    recipe_name=recipe.recipe_name,
                                    ingredients=ingredients_list,
                                    directions=recipe.directions,
                                    nutrition=recipe.nutrition,
                                    total_calories_estimation=recipe.total_calories_estimation)
        enrich_recipes.recipes.append(enrich_recipe)
    return enrich_recipes

In [56]:
enrich_recpies = enrich_recipes_with_ner(recipes=recipes)

 2024-04-14 18:54:26,580 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-14 18:54:26,584 - INFO - function_to_call: <function recipe_ingredients at 0x11544c0e0>
 2024-04-14 18:54:26,584 - INFO - function_args: {'product': ['chickpeas']}
 2024-04-14 18:54:26,872 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-14 18:54:26,875 - INFO - function_to_call: <function recipe_ingredients at 0x11544c0e0>
 2024-04-14 18:54:26,876 - INFO - function_args: {'product': ['cucumber']}
 2024-04-14 18:54:27,423 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-14 18:54:27,426 - INFO - function_to_call: <function recipe_ingredients at 0x11544c0e0>
 2024-04-14 18:54:27,427 - INFO - function_args: {'product': ['cherry tomatoes']}
 2024-04-14 18:54:27,973 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-14 18:54:27

In [57]:
import json
with open('../data/generated_recipe_v2/gpt4_1106_ner_recipes.json', 'w') as f:
    json.dump(enrich_recpies.model_dump(mode='json'), f, indent=4, ensure_ascii=False)

In [58]:
import json
with open('../data/generated_recipe_v2/gpt4_1106_ner_recipes.json') as f:
    gpt4_recipe_ner = json.load(f)


In [59]:
recipes_ner = EnrichRecipes(**gpt4_recipe_ner)

In [67]:
enrich_recipes_with_energy = EnrichRecipesWithEnergy()
for recipe in recipes_ner.recipes:
    ingredients_list=[]
    for item in recipe.ingredients:
        usda_food_ingredient = calculate_energy(item.ner_name)
        print(f'Engergy calculated for:{item.ner_name}')
        ingredient = IngredientWithEngergy(name=item.name,
                                           quantity=item.quantity,
                                           ner_name=item.ner_name,
                                           usda_food_ingredient=usda_food_ingredient)
        ingredients_list.append(ingredient)
    enrich_recipe = EnrichRecipeWithEnergy(id=recipe.id,
                                recipe_name=recipe.recipe_name,
                                ingredients=ingredients_list,
                                directions=recipe.directions,
                                nutrition=recipe.nutrition,
                                total_calories_estimation=recipe.total_calories_estimation)
    enrich_recipes_with_energy.recipes.append(enrich_recipe)

Engergy calculated for:chickpeas
Engergy calculated for:cucumber
Engergy calculated for:cherry tomatoes
Engergy calculated for:red onion
Engergy calculated for:Kalamata olives
Engergy calculated for:feta cheese
Engergy calculated for:extravirgin olive oil
Engergy calculated for:lemon juice
Engergy calculated for:fresh parsley
Engergy calculated for:dried oregano
Engergy calculated for:salt
Engergy calculated for:black pepper
Engergy calculated for:cremini mushrooms
Engergy calculated for:extravirgin olive oil
Engergy calculated for:garlic
Engergy calculated for:spinach
Engergy calculated for:feta cheese
Engergy calculated for:black olives
Engergy calculated for:sundried tomatoes
Engergy calculated for:fresh basil
Engergy calculated for:almond meal
Engergy calculated for:lemon zest
Engergy calculated for:salt
Engergy calculated for:black pepper
Engergy calculated for:plain Greek yogurt
Engergy calculated for:extravirgin olive oil
Engergy calculated for:lemon juice
Engergy calculated for

In [68]:
len(enrich_recipes_with_energy.recipes)

10

In [69]:
import json
with open('../data/generated_recipe_v2/gpt4_1106_enrich_recipes.json', 'w') as f:
    json.dump(enrich_recipes_with_energy.model_dump(mode='json'), f, indent=4, ensure_ascii=False)