# NLP and Nutition Information
This notebook is used to get nutrition information such as Fat, Calories, Sugar, Carbohydrates, and Protein.

In [1]:
# import dependencies
import pandas as pd
import numpy as np
import re
from re import search

# import dependencies
import requests
import json

# import API key
from config import API_KEY

In [2]:
# import the raw csv
recipes_df_raw = pd.read_csv('technical_list.csv', encoding="utf-8")
recipes_df = recipes_df_raw.copy()
recipes_df_raw.head

<bound method NDFrame.head of                                                  title           judge  \
0                   Rav Gill’s Macaron Snack-Cessories    Ravneet Gill   
1    Liam Charles’s Biscuit Card Tower & Deck of Cards    Liam Charles   
2                 Rav Gill’s Cherry & Almond Deco Roll    Ravneet Gill   
3               Liam Charles’s Yorkshire Pudding Wraps    Liam Charles   
4          Rav Gill’s Churros with Banana Butterscotch    Ravneet Gill   
..                                                 ...             ...   
124                    Mary Berry’s Hazelnut Dacquoise  Paul Hollywood   
125                           Mary Berry’s Religieuses  Paul Hollywood   
126                  Paul Hollywood’s Apricot Couronne  Paul Hollywood   
127          Mary Berry’s Tuiles with Chocolate Mousse  Paul Hollywood   
128                      Mary Berry’s Charlotte Royale  Paul Hollywood   

            makes              difficulty hands_on_time baking_time  \
0         

In [62]:
# Formats for ingredients
# 1g unsalted butter
# 1 tsp vanilla extract
# slivered pistachios
# 2 large eggs
# 2 dill pickles, quartered lengthways
# juice of 1/2 lemon

def split_ingredient(ingredient):
    '''
    The split_ingredients funcion takes an ingredient in a string format. The function then splits the ingredient into quantity, measurement, and any comments like temperature.
    '''
    comment = 'na'
    quantity = 'na'
    measurement = 'na'
    item = 'na'

    # if the ingredient contains a comment (need to deal when the comment is before the recipe, like in 50)
    if ',' in ingredient:
        comment = ingredient.split(',')[1]
        ingredient = ingredient.split(',')[0]

    # if ingredient is eggs
    if 'egg' in ingredient:
        quantity = re.findall('\d+', ingredient)
        measurement = re.findall('\d+(.*) ', ingredient)
        item = 'egg'


    # if measurement is weight or volume
    elif re.match('^\d+[a-zA-Z]',ingredient) is not None:
        quantity = re.findall('\d+', ingredient)[0]
        measurement = re.findall('^\d+([a-zA-Z]+) ', ingredient)[0]
        item = re.findall('\d+[a-zA-Z]+ (.*)', ingredient)[0]

    # Juice of (still need zest of)
    elif 'juice of' in ingredient:
        item = re.findall('juice of [^a-z]+([a-z ]+)', ingredient, re.IGNORECASE)[0]
        if 'zest' in ingredient:
            comment = 'juice and zest'
        else:
            comment = 'juice'
        if re.findall('[0-9]+', ingredient) is not None:
            quantity = re.findall('[0-9]+', ingredient)[0]
        else:
            quantity = '1'


    # if ingredient uses tsp or tbsp
    elif 'tbsp' in ingredient: 
        quantity = ingredient.split(' ',1)[0]
        measurement = 'tbsp'
        item = re.findall('tbsp (.*)', ingredient)[0]

    elif 'tsp' in ingredient:
        quantity = ingredient.split(' ')[0]
        measurement = 'tsp'
        item = re.findall('tsp (.*)', ingredient)[0]
    
    # if it just a number and ingredient (2 pickles)
    elif re.match('^[0-9]+/*[0-9]* ', ingredient) is not None:
        quantity = re.findall('^([0-9]+/*[0-9]*)', ingredient)[0]
        item = re.findall('^[0-9]+/*[0-9]* (.*)', ingredient)[0]
    
    # a pinch of something
    elif search('pinch', ingredient) is not None:
        measurement = re.findall('(.*pinch)', ingredient)[0]
        item = re.findall('.*pinch of (.*)', ingredient)[0]
        if re.match('^[0-9]', ingredient) is not None:
            quantity = re.findall('^([0-9])', ingredient)[0]

    # Other
    else:
        return ingredient
    
    # format into dict
    ingredient_list_split = {'quantity':quantity, 'measurement':measurement, 'ingredient':item, 'comment':comment}
    
    return ingredient_list_split

In [4]:
# still need to deal with line 22 in the csv with the <br>
def seperate_ingredients(ingredient_string):
    '''
    The seperate_ingredients funcion takes an ingredient list with each ingredient inside a <p></p> tag and returns a list containing all the ingredients in a string format.
    '''
    ingredient_list = re.findall('<p>(.*?)</p>', ingredient_string)
    
    return ingredient_list

In [5]:
test_ingredient = '200ml whole milk'
test_ingredient_split = (split_ingredient(test_ingredient))
test_item = test_ingredient_split['ingredient']
print(test_item)

whole milk


# Nutrtion API

In [46]:
# set api key
api_key = API_KEY

def get_fdcid (food_item):
    '''
    Gets the fdcid for a food. Uses the first foundation item found.
    '''
    # create base URL
    search_request_url = 'https://api.nal.usda.gov/fdc/v1/search?api_key='

    # make call
    response = requests.get(f'{search_request_url}{api_key}&query={food_item}&datatype=Foundation')

    # parse the returned JSON
    parsed = json.loads(response.content)

    # Select first fdcId
    fdcId = parsed['foods'][0]['fdcId']

    return(fdcId)


In [68]:
def get_macros(item):
    # find the fdcid
    item_fdcid = get_fdcid(item)

    # create the url base
    requested_nutr_url = f'https://api.nal.usda.gov/fdc/v1/food/'

    # make call
    response = requests.get(f'{requested_nutr_url}{item_fdcid}?api_key={api_key}')

    # parse the returned json
    parsed_nutr = json.loads(response.content)

    # select the nutrients you'd like
    nutrients = ['Total lipid (fat)', 'Protein', 'Energy' , 'Cholesterol', 'Sugars, total including NLEA']
    nutr_return = {'Total lipid (fat)': 0.0, 'Protein': 0.0, 'Energy': 0.0, 'Cholesterol': 0.0, 'Sugars, total including NLEA': 0.0}

    # get the serving size
    #nutr_return['Serving Size'] = parsed_nutr['servingSize']
    #nutr_return['Serving Size Unit'] = parsed_nutr['servingSizeUnit']

    # parse the JSON for the nutrients and append them to the dict
    for nutrient in nutrients:
        for nutr in parsed_nutr['foodNutrients']:
            if nutr['nutrient']['name'] == nutrient:
                nutr_return[nutrient] = nutr['amount']
        

    return (nutr_return)

    



In [70]:
item = (get_macros('Ground Almonds'))
print(item)

KeyError: 'foods'

In [69]:
# seperate the ingredients on the p tag
ingredient_list = seperate_ingredients(recipes_df['ingredients'][1])

# for each ingredient split it using nlp
ingredients = []
for ingredient in ingredient_list:
        ingredients.append(split_ingredient(ingredient))

# for each ingredient, get the nutrition information and add to total
nutrition_totals = {'Total lipid (fat)': 0.0, 'Protein': 0.0, 'Energy': 0.0, 'Cholesterol': 0.0, 'Sugars, total including NLEA': 0.0}
for ing in ingredients:
    macros = get_macros(ing['ingredient'])
    print(macros)
    
    # update macro totals
    nutrition_totals['Total lipid (fat)'] += macros['Total lipid (fat)']
    nutrition_totals['Protein'] += macros['Protein']
    nutrition_totals['Energy'] += macros['Energy']
    nutrition_totals['Cholesterol'] += macros['Cholesterol']
    nutrition_totals['Sugars, total including NLEA'] += macros['Sugars, total including NLEA']

print(nutrition_totals)

{'Total lipid (fat)': 3.5, 'Protein': 9.1, 'Energy': 1594.0, 'Cholesterol': 0.0, 'Sugars, total including NLEA': 2.21}
{'Total lipid (fat)': 0.0, 'Protein': 0.0, 'Energy': 59.0, 'Cholesterol': 0.0, 'Sugars, total including NLEA': 0.0}


KeyError: 'foods'

In [9]:
recipes_df['ing_split'] = ""
for i in range(recipes_df.size):
    ingredient_list = seperate_ingredients(recipes_df['ingredients'][i])
    ingredients = []
    for ingredient in ingredient_list:
        ingredients.append(split_ingredient(ingredient))
    
    recipes_df['ing_split'][i] = ingredients

IndexError: list index out of range

In [38]:
ingredient_list = seperate_ingredients(recipes_df['ingredients'][90])
for i in ingredient_list:
    print(split_ingredient(i))

it has eggs yo
{'quantity': '225', 'measurement': 'g', 'ingredient': 'caster sugar', 'comment': ' plus extra for sprinkling'}
{'quantity': '225', 'measurement': 'g', 'ingredient': 'self-raising flour', 'comment': 'na'}
{'quantity': '1', 'measurement': 'tsp', 'ingredient': 'baking powder', 'comment': 'na'}
{'quantity': '225', 'measurement': 'g', 'ingredient': 'unsalted butter', 'comment': ' softened'}


In [18]:
recipes_df['ingredients'][23]

'[<p>375g strong white bread flour </p>, <p>5g fast-action dried yeast</p>, <p>2 tsp caster sugar</p>, <p>1 tsp fine salt</p>, <p>240ml lukewarm water </p>, <p>Red, orange, yellow, green and blue food-colouring paste</p>, <p>1 tsp bicarbonate of soda </p>]'