###### Loading an environment variable from .env file

In [124]:
from dotenv import load_dotenv
load_dotenv('../.vscode/.env')

True

###### Importing requests

In [125]:
import requests

###### Url of the API

In [126]:
url = "https://api.nal.usda.gov/fdc/v1/foods/search?query={0}&dataType={1}&requireAllWords=true"

###### Declaring the data_types and nutrient_ids

In [127]:
data_types = ['Foundation', 'Branded']
nutrient_ids = [1008, 2047]

###### Importing OS module

In [128]:
import os

In [129]:
headers = {
  'X-Api-Key': os.environ.get('USDA_API_KEY')
}
payload = {}

In [130]:
response = requests.request("GET", url.format('cucumber', data_types[0]), headers=headers, data=payload)

In [131]:
resp_json = response.json()

In [132]:
resp_json

{'totalHits': 2,
 'currentPage': 1,
 'totalPages': 1,
 'pageList': [1],
 'foodSearchCriteria': {'dataType': ['Foundation'],
  'query': 'cucumber',
  'generalSearchInput': 'cucumber',
  'pageNumber': 1,
  'numberOfResultsPerPage': 50,
  'pageSize': 50,
  'requireAllWords': True,
  'foodTypes': ['Foundation']},
 'foods': [{'fdcId': 2346406,
   'description': 'Cucumber, with peel, raw',
   'commonNames': '',
   'additionalDescriptions': '',
   'dataType': 'Foundation',
   'ndbNumber': 11205,
   'publishedDate': '2022-10-28',
   'foodCategory': 'Vegetables and Vegetable Products',
   'mostRecentAcquisitionDate': '2022-05-02',
   'allHighlightFields': '',
   'score': 418.943,
   'microbes': [],
   'foodNutrients': [{'nutrientId': 1089,
     'nutrientName': 'Iron, Fe',
     'nutrientNumber': '303',
     'unitName': 'MG',
     'derivationCode': 'A',
     'derivationDescription': 'Analytical',
     'derivationId': 1,
     'value': 0.0,
     'foodNutrientSourceId': 1,
     'foodNutrientSourceCo

In [133]:
for item in resp_json['foods'][0]['foodNutrients']:
    if item['nutrientId'] == 2047:
        print(item)

{'nutrientId': 2047, 'nutrientName': 'Energy (Atwater General Factors)', 'nutrientNumber': '957', 'unitName': 'KCAL', 'derivationCode': 'NC', 'derivationDescription': 'Calculated', 'derivationId': 49, 'value': 15.9, 'foodNutrientSourceId': 2, 'foodNutrientSourceCode': '4', 'foodNutrientSourceDescription': 'Calculated or imputed', 'rank': 280, 'indentLevel': 1, 'foodNutrientId': 28912853}


In [134]:
from pydantic import BaseModel

In [135]:
class USDAFoodIngredient(BaseModel):
    fdc_id: int
    name: str
    nutrition_id: int
    energy: float
    unit: str 

In [136]:
def http_get(url:str):
    return requests.request("GET", url, headers=headers, data=payload)

In [137]:
from typing import Optional
def retrieve_energy(query: str, formatted_url:str)-> Optional[Ingredient]:
    response = http_get(url=formatted_url)
    if response.status_code != 200:
        print(f'Fail to get response for ingredient:{ingredient}')
        return None
    resp_json = response.json()
    foods = resp_json['foods']
    if len(foods) == 0:
        return None
    food_details = foods[0]
    fdc_id = food_details['fdcId']    
    for item in food_details['foodNutrients']:
        if item['nutrientId'] in nutrient_ids:
            energy = item['value']
            nutrition_id = item['nutrientId']
            unit = item['unitName']
            return USDAFoodIngredient(name=query,fdc_id=fdc_id,nutrition_id=nutrition_id,unit=unit,energy=energy)
    return None
            

In [138]:
def calculate_energy(query: str):
    formatted_url = url.format(query, data_types[0])
    ingredient = retrieve_energy(query=query,formatted_url=formatted_url)
    if ingredient is None:
        formatted_url = url.format(query, data_types[1])
        ingredient = retrieve_energy(query=query,formatted_url=formatted_url)
    return ingredient

In [139]:
import json
with open('../data/generated_recipe_v2/gpt4_1106_recipes.json') as f:
    gpt4_recipe = json.load(f)

In [142]:
from typing import  List
class Nutrition(BaseModel):
    fat: Optional[str] = None
    protein: Optional[str] = None
    carbohydrate: Optional[str] = None

class Recipe(BaseModel):
    id: str
    recipe_name:str
    ingredients: List[dict]
    directions: str
    nutrition: Nutrition
    total_calories_estimation: str  

class Recipes(BaseModel):
    recipes: List[Recipe] = []

In [143]:
recipes = Recipes(**gpt4_recipe)

In [144]:
import json
import logging
import os

import openai
import wikipedia

from typing import Optional
from IPython.display import display, Markdown
from tenacity import retry, wait_random_exponential, stop_after_attempt

logging.basicConfig(level=logging.INFO, format=' %(asctime)s - %(levelname)s - %(message)s')

OPENAI_MODEL = 'gpt-3.5-turbo-0613'

In [145]:
labels = [
    "person",      # people, including fictional characters
    "fac",         # buildings, airports, highways, bridges
    "org",         # organizations, companies, agencies, institutions
    "gpe",         # geopolitical entities like countries, cities, states
    "loc",         # non-gpe locations
    "product",     # vehicles, foods, appareal, appliances, software, toys 
    "event",       # named sports, scientific milestones, historical events
    "work_of_art", # titles of books, songs, movies
    "law",         # named laws, acts, or legislations
    "language",    # any named language
    "date",        # absolute or relative dates or periods
    "time",        # time units smaller than a day
    "percent",     # percentage (e.g., "twenty percent", "18%")
    "money",       # monetary values, including unit
    "quantity",    # measurements, e.g., weight or distance
]

In [146]:
def system_message(labels):
    return f"""
You are an expert in Natural Language Processing. Your task is to identify common Named Entities (NER) in a given text.
The possible common Named Entities (NER) types are exclusively: ({", ".join(labels)})."""


In [147]:
def assisstant_message():
    return f"""
EXAMPLE:
    Text: '1 pie crust (store-bought or homemade), 6 cups thinly sliced apples (such as Granny Smith or a combination of tart and sweet apples), 3/4 cup granulated sugar, 2 tbsp all-purpose flour, 1 tsp ground cinnamon, 1/4 tsp ground nutmeg, 1/4 tsp salt, 1 tsp vanilla extract, 1/2 cup unsalted butter (cold and cut into small pieces), 3/4 cup all-purpose flour (for crumb topping), 1/2 cup brown sugar (packed, for crumb topping), 1/4 tsp baking powder (for crumb topping), 1/4 tsp salt (for crumb topping), Optional: Vanilla ice cream or whipped cream for serving'
    {{
        "product": ["pie crust","apples", "granulated sugar", "all-purpose flour", "ground cinnamon", "ground nutmeg", "salt", "vanilla extract", "unsalted water","brown sugar","baking powder", "vanilla ice cream", "whipped cream"],
    }}
--"""

In [148]:
def user_message(text):
    return f"""
TASK:
    Text: {text}
"""

In [149]:
def recipe_ingredients(text: str, label_entities: dict) -> list:
    """
    Recipe ingredients
    """
    return label_entities['product']

In [150]:
def generate_functions(labels: dict) -> list:
    return [
        {   
            "type": "function",
            "function": {
                "name": "recipe_ingredients",
                "description": "Enrich Text with recipe ingredients",
                "parameters": {
                    "type": "object",
                        "properties": {
                            "r'^(?:' + '|'.join({labels}) + ')$'": 
                            {
                                "type": "array",
                                "items": {
                                    "type": "string"
                                }
                            }
                        },
                        "additionalProperties": False
                },
            }
        }
    ]

In [151]:
@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(5))
def run_openai_task(labels, text):
    messages = [
          {"role": "system", "content": system_message(labels=labels)},
          {"role": "assistant", "content": assisstant_message()},
          {"role": "user", "content": user_message(text=text)}
      ]

    # TODO: functions and function_call are deprecated, need to be updated
    # See: https://platform.openai.com/docs/api-reference/chat/create#chat-create-tools
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo-0613",
        messages=messages,
        tools=generate_functions(labels),
        tool_choice={"type": "function", "function" : {"name": "recipe_ingredients"}}, 
        temperature=0,
        frequency_penalty=0,
        presence_penalty=0,
    )

    response_message = response.choices[0].message
    
    available_functions = {"recipe_ingredients": recipe_ingredients}  
    function_name = response_message.tool_calls[0].function.name
    
    function_to_call = available_functions[function_name]
    logging.info(f"function_to_call: {function_to_call}")

    function_args = json.loads(response_message.tool_calls[0].function.arguments)
    logging.info(f"function_args: {function_args}")

    function_response = function_to_call(text, function_args)

    return {"model_response": response, 
            "function_response": function_response}

In [160]:
text = """fresh ginger"""
result = run_openai_task(labels, text)

 2024-04-08 23:26:48,514 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-08 23:26:48,518 - INFO - function_to_call: <function recipe_ingredients at 0x12f1f8ae0>
 2024-04-08 23:26:48,519 - INFO - function_args: {'product': ['fresh ginger']}


In [153]:
result

{'model_response': ChatCompletion(id='chatcmpl-9Bz9ZNd1nqCCn0kVECPiA8KJV91FS', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_XEHQjBgpUTlP0UIVmgfUh0S9', function=Function(arguments='{\n    "product": ["lemon"]\n}', name='recipe_ingredients'), type='function')]))], created=1712643709, model='gpt-3.5-turbo-0613', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=10, prompt_tokens=383, total_tokens=393)),
 'function_response': ['lemon']}

In [154]:
sample_ingredients = gpt4_recipe['recipes'][2]['ingredients']

In [155]:
sample_ingredients

[{'name': 'plain Greek yogurt', 'quantity': '1 cup'},
 {'name': 'extravirgin olive oil', 'quantity': '2 tablespoons'},
 {'name': 'lemon juice', 'quantity': '2 tablespoons'},
 {'name': 'garlic (minced)', 'quantity': '1 clove'},
 {'name': 'Dijon mustard', 'quantity': '1 teaspoon'},
 {'name': 'honey (or a sugar substitute for a lower carb option)',
  'quantity': '1 teaspoon'},
 {'name': 'dried oregano', 'quantity': '1/2 teaspoon'},
 {'name': 'salt', 'quantity': '1/4 teaspoon'},
 {'name': 'black pepper', 'quantity': '1/8 teaspoon'}]

In [156]:
ingredients_list = []
for item in sample_ingredients:
    ner_response = run_openai_task(labels,item['name'])
    ner_ingredients = ner_response['function_response']
    ingredient = calculate_energy(ner_ingredients[0])
    ingredients_list.append(ingredient)

 2024-04-08 23:23:11,355 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-08 23:23:11,358 - INFO - function_to_call: <function recipe_ingredients at 0x12f1f8ae0>
 2024-04-08 23:23:11,359 - INFO - function_args: {'product': ['plain Greek yogurt']}
 2024-04-08 23:23:12,211 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-08 23:23:12,214 - INFO - function_to_call: <function recipe_ingredients at 0x12f1f8ae0>
 2024-04-08 23:23:12,215 - INFO - function_args: {'product': ['extravirgin olive oil']}
 2024-04-08 23:23:13,345 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-08 23:23:13,348 - INFO - function_to_call: <function recipe_ingredients at 0x12f1f8ae0>
 2024-04-08 23:23:13,348 - INFO - function_args: {'product': ['lemon juice']}
 2024-04-08 23:23:14,621 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2

In [157]:
ingredients_list

[USDAFoodIngredient(fdc_id=330137, name='plain Greek yogurt', nutrition_id=1008, energy=61.0, unit='KCAL'),
 USDAFoodIngredient(fdc_id=2654005, name='extravirgin olive oil', nutrition_id=1008, energy=800.0, unit='KCAL'),
 USDAFoodIngredient(fdc_id=2438529, name='lemon juice', nutrition_id=1008, energy=0.0, unit='KCAL'),
 USDAFoodIngredient(fdc_id=1104647, name='garlic', nutrition_id=1008, energy=143.0, unit='KCAL'),
 USDAFoodIngredient(fdc_id=2099042, name='Dijon mustard', nutrition_id=1008, energy=100.0, unit='KCAL'),
 USDAFoodIngredient(fdc_id=1548359, name='honey', nutrition_id=1008, energy=286.0, unit='KCAL'),
 USDAFoodIngredient(fdc_id=2098976, name='dried oregano', nutrition_id=1008, energy=357.0, unit='KCAL'),
 USDAFoodIngredient(fdc_id=1492094, name='salt', nutrition_id=1008, energy=0.0, unit='KCAL'),
 USDAFoodIngredient(fdc_id=2157235, name='black pepper', nutrition_id=1008, energy=0.0, unit='KCAL')]

In [158]:
from typing import  Optional
class Ingredient(BaseModel):
    name: str
    quantity: str
    usda_food_ingredient:Optional[USDAFoodIngredient] = None

class EnrichRecipe(BaseModel):
    id: str
    recipe_name:str
    ingredients: List[Ingredient]
    directions: str
    nutrition: Nutrition
    total_calories_estimation: str  

class EnrichRecipes(BaseModel):
    recipes: List[EnrichRecipe] = []

In [159]:
enrich_recipes = EnrichRecipes()
for recipe in recipes.recipes:
    ingredients_list=[]
    for item in recipe.ingredients:
        ner_response = run_openai_task(labels,item['name'])
        ner_ingredients = ner_response['function_response']
        usda_food_ingredient = calculate_energy(ner_ingredients[0])
        ingredient = Ingredient(name=item['name'],quantity=item['quantity'], usda_food_ingredient=usda_food_ingredient)
        ingredients_list.append(ingredient)
    enrich_recipe = EnrichRecipe(id=recipe.id,
                                recipe_name=recipe.recipe_name,
                                ingredients=ingredients_list,
                                directions=recipe.directions,
                                nutrition=recipe.nutrition,
                                total_calories_estimation=recipe.total_calories_estimation)
    enrich_recipes.recipes.append(enrich_recipe)

 2024-04-08 23:24:28,916 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-08 23:24:28,919 - INFO - function_to_call: <function recipe_ingredients at 0x12f1f8ae0>
 2024-04-08 23:24:28,919 - INFO - function_args: {'product': ['chickpeas']}
 2024-04-08 23:24:29,820 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-08 23:24:29,824 - INFO - function_to_call: <function recipe_ingredients at 0x12f1f8ae0>
 2024-04-08 23:24:29,825 - INFO - function_args: {'product': ['cucumber']}
 2024-04-08 23:24:30,557 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-08 23:24:30,562 - INFO - function_to_call: <function recipe_ingredients at 0x12f1f8ae0>
 2024-04-08 23:24:30,563 - INFO - function_args: {'product': ['cherry tomatoes']}
 2024-04-08 23:24:31,988 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
 2024-04-08 23:24:31

In [161]:
import json
with open('../data/generated_recipe_v2/gpt4_1106_enrich_recipes.json', 'w') as f:
    json.dump(enrich_recipes.model_dump(mode='json'), f, indent=4, ensure_ascii=False)