# Using model to detect ingredient list section from post

Applying NER model to posts and then using heuristics to identify the ingredients list section from the post.

In [1]:
import pandas as pd
import numpy as np
import spacy

import sys
sys.path.append("/Users/maxkirwan/Desktop/Uni/Data Science MSc/Data Science Project/nutrition-insta")
import functions

In [2]:
posts1 = pd.read_csv("/Users/maxkirwan/Desktop/Uni/Data Science MSc/Data Science Project/nutrition-insta/Instagram Data Scraping/Phantom Buster/recipe_posts.csv")
posts2 = pd.read_csv("/Users/maxkirwan/Desktop/Uni/Data Science MSc/Data Science Project/nutrition-insta/Instagram Data Scraping/Phantom Buster/recipe_posts_2.csv")
posts3 = pd.read_csv("/Users/maxkirwan/Desktop/Uni/Data Science MSc/Data Science Project/nutrition-insta/Instagram Data Scraping/Phantom Buster/recipe_posts_3.csv")

posts = pd.concat([posts1,posts2,posts3])

In [3]:
# Getting english posts
posts = functions.get_english_posts(posts)

Detecting language of each post...
Language detection complete.
Time taken: 0:07:40.709580


In [6]:
# Preprocessing text descriptions
posts['description_preprocessed'] = posts['description'].apply(functions.preprocess_text)

In [8]:
def includes_ingredient_list(text):
    
    text_str = str(text)
    if ("ingredients:" in text or "ingredients :" in text) and text_str.count('\n') > 8:
        return True
        
    else:
        return False
    
# Adding includes_ingredient_list binary column
posts['includes_ingredient_list'] = posts['description_preprocessed'].apply(includes_ingredient_list)
# Keeping only posts with ingredient_list
posts = posts[posts['includes_ingredient_list']]

In [None]:
posts = pd.read_csv('preprocessed_descriptions.csv')

In [4]:
nlp = spacy.load("./model-best")

# Merging entities into single tokens
nlp.add_pipe("merge_entities")

<function spacy.pipeline.functions.merge_entities(doc: spacy.tokens.doc.Doc)>

In [10]:
doc = nlp(posts['description_preprocessed'][3])
colors = {
    "MEASUREMENT": "#bfeeb7",
    "INGREDIENT": "#feca74",
    "QUANTITY": "#e4e7d2"
}
options = {"ents": list(colors), "colors": colors}
spacy.displacy.render(doc, style="ent", options=options, jupyter=True)

In [30]:
def get_ingredient_triplets(text):
    
    '''
    Function to identify entity triplets which relate to unique ingredients.
    These triplets come in the form consecutive QUANTITY, MEASUREMENT, INGREDIENT entities.
    '''
    
    doc = nlp(text)
    ingredient_triplets = []
    
    for i in range(len(doc)-10):
        
        if doc[i].ent_type_ == 'QUANTITY' and doc[i+1].ent_type_ == 'MEASUREMENT':
            
            quantity_index = i
            measurement_index = i+1
                
            for j in range(i+1,i+8):

                if doc[j].ent_type_ == 'INGREDIENT':

                    ingredient_index = j
                    ingredient_triplets.append((quantity_index,measurement_index,ingredient_index))
                    break
                    
                    
    def ingredient_triplets_to_list(triplets):

        dict_list = []

        for i, j, k in triplets:

            ing_dict = {}

            ing_dict['ingredient'] = doc[k]
            ing_dict['quantity'] = doc[i]
            ing_dict['measurement'] = doc[j]

            dict_list.append(ing_dict)

        return dict_list
        
    return ingredient_triplets_to_list(ingredient_triplets)

In [31]:
get_ingredient_triplets(doc)

[{'ingredient': oil, 'quantity': 1/4, 'measurement': cup},
 {'ingredient': milk, 'quantity': 1/4, 'measurement': cup},
 {'ingredient': sugar, 'quantity': 1/2, 'measurement': cup},
 {'ingredient': vanilla, 'quantity': 2, 'measurement': teaspoon},
 {'ingredient': flour, 'quantity': 2, 'measurement': cup},
 {'ingredient': cinnamon, 'quantity': 1, 'measurement': teaspoon},
 {'ingredient': baking soda, 'quantity': 1, 'measurement': teaspoon},
 {'ingredient': salt, 'quantity': 1/2, 'measurement': teaspoon}]

In [32]:
# Getting ingredient triplets for all posts
posts['ingredient_list'] = posts['description_preprocessed'].apply(get_ingredient_triplets)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  posts['ingredient_list'] = posts['description_preprocessed'].apply(get_ingredient_triplets)


In [33]:
for i in posts['ingredient_list'][0:5]:
    print(i,"\n\n")

[{'ingredient': oats, 'quantity': 1/2, 'measurement': cup}, {'ingredient': milk, 'quantity': 1/2, 'measurement': cup}, {'ingredient': water, 'quantity': 1/2, 'measurement': cup}, {'ingredient': sugar, 'quantity': 1, 'measurement': teaspoon}, {'ingredient': peanut butter, 'quantity': 1, 'measurement': tablespoon}, {'ingredient': banana, 'quantity': 1, 'measurement': tablespoon}] 


[{'ingredient': pesto, 'quantity': 2, 'measurement': teaspoon}] 


[{'ingredient': ginger, 'quantity': 1, 'measurement': tablespoon}, {'ingredient': tamari, 'quantity': 6, 'measurement': tablespoon}, {'ingredient': hemp seeds, 'quantity': 1, 'measurement': tablespoon}, {'ingredient': water, 'quantity': 2, 'measurement': tablespoon}, {'ingredient': oil, 'quantity': 1, 'measurement': tablespoon}] 


[{'ingredient': mushrooms, 'quantity': 200, 'measurement': gram}, {'ingredient': sugar, 'quantity': 20, 'measurement': gram}, {'ingredient': baking powder, 'quantity': 6, 'measurement': gram}, {'ingredient': salt, '

In [34]:
# Average length of ingredient list
np.mean([len(ing_list) for ing_list in list(posts['ingredient_list'])])

4.017964071856287

In [35]:
# Number of empty ingredient lists
[len(ing_list) for ing_list in list(posts['ingredient_list'])].count(0)

59

### Getting list of ingredients

In [41]:
def get_ingredients(ing_list):
    return [ing['ingredient'] for ing in ing_list]

In [43]:
# Get list of ingredients for all posts
posts['ingredients'] = posts['ingredient_list'].apply(get_ingredients)

In [44]:
posts

Unnamed: 0,postUrl,profileUrl,username,fullName,commentCount,likeCount,pubDate,description,location,imgUrl,...,isSidecar,sidecarMedias,videoUrl,viewCount,language,score,description_preprocessed,includes_ingredient_list,ingredient_list,ingredients
81,https://www.instagram.com/p/Cgd_iR_vT2d/,https://www.instagram.com/all.about.oats,all.about.oats,Anushka Lodhi,0,2,2022-07-26T08:23:14.000Z,Chocolate fudge protein oatmeal🤎 💪\nIngredient...,"Ghaziabad, India",https://scontent-lhr8-2.cdninstagram.com/v/t51...,...,True,3.0,,,en,0.999996,chocolate fudge protein oatmeal \ningredients:...,True,"[{'ingredient': oats, 'quantity': 1/2, 'measur...","[oats, milk, water, sugar, peanut butter, banana]"
104,https://www.instagram.com/p/Cgd8bLXDaCq/,https://www.instagram.com/a_m_eats,a_m_eats,Alice & Meg 🍴,3,12,2022-07-26T07:56:03.000Z,⁣Caprese Chicken with Pesto 🌿🍅⠀\n⠀\nThis dish ...,"Glasgow, United Kingdom",https://scontent-lhr8-1.cdninstagram.com/v/t51...,...,True,3.0,,,en,0.999997,⁣caprese chicken with pesto ⠀\n⠀\nthis dish is...,True,"[{'ingredient': pesto, 'quantity': 2, 'measure...",[pesto]
169,https://www.instagram.com/p/CgdylqtOy9s/,https://www.instagram.com/hescottwellness,hescottwellness,"Natasha Hescott, RDN, CDN",0,1,2022-07-26T06:30:06.000Z,Looking for a fast recipe to make for lunch? T...,,https://scontent-lhr8-1.cdninstagram.com/v/t51...,...,False,,,,en,0.999996,looking for a fast recipe to make for lunch? t...,True,"[{'ingredient': ginger, 'quantity': 1, 'measur...","[ginger, tamari, hemp seeds, water, oil]"
172,https://www.instagram.com/p/CgdyENyLgmv/,https://www.instagram.com/rainbowpiatto,rainbowpiatto,Rainbow Piatto,1,14,2022-07-26T06:25:32.000Z,Sweet & Savoury Mushroom Scones~🍄 This easy an...,Singapore / Singapura / 新加坡 / சிங்கப்பூர்,https://scontent-lhr8-1.cdninstagram.com/v/t51...,...,False,,,,en,0.999998,sweet & savoury mushroom scones~ this easy and...,True,"[{'ingredient': mushrooms, 'quantity': 200, 'm...","[mushrooms, sugar, baking powder, salt, butter..."
226,https://www.instagram.com/p/CgdpvTNvrWt/,https://www.instagram.com/foodiesfood_court,foodiesfood_court,Foodies Food Court,43,1819,2022-07-26T05:12:47.000Z,Paneer Tikka Recipe😍\nSave it to try later♥️\n...,Bihar,https://scontent-lhr8-2.cdninstagram.com/v/t51...,...,True,2.0,,,en,0.999996,paneer tikka recipe\nsave it to try later\n\ni...,True,"[{'ingredient': flour, 'quantity': 1/2, 'measu...","[flour, carom seeds, ginger, red mirch, cumin ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3208,https://www.instagram.com/p/CgCX2p_DSsu/,https://www.instagram.com/alwayshungryinlondon,alwayshungryinlondon,𝐇𝐚𝐧𝐧𝐚𝐡 𝐃𝐉,111,6861,2022-07-15T14:57:00.000Z,Prawn Taco Bowl\n—————————————————————-\nPerfe...,"London, United Kingdom",https://scontent-cdg2-1.cdninstagram.com/v/t51...,...,False,,,,en,0.999997,prawn taco bowl\n—————————————————————-\nperfe...,True,"[{'ingredient': prawns, 'quantity': 90, 'measu...","[prawns, garlic powder, chilli, coriander, ric..."
3389,https://www.instagram.com/p/ChFyI8OLgWy/,https://www.instagram.com/everyday_homecooking,everyday_homecooking,JESS WHARTON | FOODIE,2,23,2022-08-10T19:15:48.000Z,"SALMON, BUTTERNUT SQUASH, SWEET POTATO AND KAL...",,https://scontent-cdt1-1.cdninstagram.com/v/t51...,...,False,,,,en,0.999995,"salmon, butternut squash, sweet potato and kal...",True,"[{'ingredient': rice, 'quantity': 1, 'measurem...","[rice, paprika, paprika, garlic powder, turmer..."
3463,https://www.instagram.com/p/ChFdeY_P89I/,https://www.instagram.com/masterclassuk,masterclassuk,MasterClass,1,11,2022-08-10T16:15:14.000Z,Are you feeling lazy? We have just the recipe ...,,https://scontent-cdt1-1.cdninstagram.com/v/t51...,...,False,,,,en,0.999995,are you feeling lazy? we have just the recipe ...,True,"[{'ingredient': pasta, 'quantity': 200, 'measu...","[pasta, salt, bread, olive, butter, hazelnuts,..."
3556,https://www.instagram.com/p/ChFKUXyoR-t/,https://www.instagram.com/ketoguide24,ketoguide24,keto diet | meal plan,2,21,2022-08-10T13:27:50.000Z,🍽 Servings: 1\n⠀ ⠀\nIngredients:\n⠀\n1/4 cup s...,USA,https://scontent-cdt1-1.cdninstagram.com/v/t51...,...,False,,,,en,0.999998,servings: 1 \n⠀ ⠀\ningredients:\n⠀\n1/4 cup sh...,True,"[{'ingredient': cheddar, 'quantity': 1/4, 'mea...","[cheddar, flour]"


## Using USDA API

In [47]:
import requests

headers = {
    # Already added when you pass json= but not when you pass data=
    # 'Content-Type': 'application/json',
}

params = {
    'api_key': 'XBZBkznaG3lFCfED92u489nCZFlm9zdBNdH6lCEZ',
}

json_data = {
    'query': 'Cheddar cheese',
    'dataType': [
        'Branded',
    ],
    'sortBy': 'fdcId',
    'sortOrder': 'desc',
}

response = requests.post('https://api.nal.usda.gov/fdc/v1/foods/list', params=params, headers=headers, json=json_data)

In [80]:
json_data = {
    'query': 'oats',
    'dataType': [
        'Foundation',
        'SR Legacy'
    ],
    'pageSize' : 1,
    'pageNumber' : 1,
    'sortBy': 'fdcId',
    'sortOrder': 'desc',
}

response = requests.post('https://api.nal.usda.gov/fdc/v1/foods/list', params=params, headers=headers, json=json_data)

In [81]:
print(len([nutrient['name'] for nutrient in response.json()[0]['foodNutrients']]))
[nutrient['name'] for nutrient in response.json()[0]['foodNutrients']]

58


['Thiamin',
 'Riboflavin',
 'Niacin',
 'Vitamin B-6',
 'Biotin',
 'Folate, total',
 'Vitamin B-12',
 'Water',
 'Galactose',
 'Fiber, total dietary',
 'Tryptophan',
 'Threonine',
 'Isoleucine',
 'Leucine',
 'Lysine',
 'Methionine',
 'Calcium, Ca',
 'Iron, Fe',
 'Phenylalanine',
 'Tyrosine',
 'Magnesium, Mg',
 'Valine',
 'Phosphorus, P',
 'Arginine',
 'Potassium, K',
 'Histidine',
 'Sodium, Na',
 'Alanine',
 'Zinc, Zn',
 'Aspartic acid',
 'Glutamic acid',
 'Glycine',
 'Copper, Cu',
 'Proline',
 'Serine',
 'Hydroxyproline',
 'Manganese, Mn',
 'Molybdenum, Mo',
 'Selenium, Se',
 'Cysteine',
 'Retinol',
 'Vitamin D2 (ergocalciferol)',
 'Vitamin D3 (cholecalciferol)',
 'Nitrogen',
 'Total lipid (fat)',
 'Ash',
 'Sucrose',
 'Glucose',
 'Fructose',
 'Lactose',
 'Maltose',
 'Protein',
 'Carbohydrate, by difference',
 'Vitamin A',
 'Vitamin D (D2 + D3)',
 'Energy (Atwater General Factors)',
 'Vitamin D (D2 + D3), International Units',
 'Sugars, Total NLEA']

In [145]:
def get_usda_info_json(ingredient):
    
    headers = {}
    params = {'api_key': 'XBZBkznaG3lFCfED92u489nCZFlm9zdBNdH6lCEZ'}
    
    json_data = {
        'query': ingredient,
        'dataType': [
            'Foundation',
            'SR Legacy'
        ],
        'pageSize' : 1,
        'pageNumber' : 1,
        'sortBy': 'dataType.keyword',
        'sortOrder': 'asc',
    }

    response = requests.post('https://api.nal.usda.gov/fdc/v1/foods/list', params=params, headers=headers, json=json_data)
    
    return response.json()

In [93]:
get_usda_info_json('peanut butter')

{'fdcId': 2262072,
 'description': 'Peanut butter, creamy',
 'dataType': 'Foundation',
 'publicationDate': '2022-04-28',
 'ndbNumber': '16098',
 'foodNutrients': [{'number': '717',
   'name': 'Daidzin',
   'amount': 0.649,
   'unitName': 'MG',
   'derivationCode': 'A',
   'derivationDescription': 'Analytical'},
  {'number': '718',
   'name': 'Genistin',
   'amount': 1.93,
   'unitName': 'MG',
   'derivationCode': 'A',
   'derivationDescription': 'Analytical'},
  {'number': '719',
   'name': 'Glycitin',
   'amount': 0.412,
   'unitName': 'MG',
   'derivationCode': 'A',
   'derivationDescription': 'Analytical'},
  {'number': '645',
   'name': 'Fatty acids, total monounsaturated',
   'amount': 30.7,
   'unitName': 'G',
   'derivationCode': 'A',
   'derivationDescription': 'Analytical'},
  {'number': '646',
   'name': 'Fatty acids, total polyunsaturated',
   'amount': 9.78,
   'unitName': 'G',
   'derivationCode': 'A',
   'derivationDescription': 'Analytical'},
  {'number': '404',
   'name

In [101]:
get_usda_info_json('banana')['foodNutrients']

[{'number': '322',
  'name': 'Carotene, alpha',
  'amount': 8.0,
  'unitName': 'UG',
  'derivationCode': 'A',
  'derivationDescription': 'Analytical'},
 {'number': '335',
  'name': 'Cryptoxanthin, alpha',
  'amount': 0.0,
  'unitName': 'UG',
  'derivationCode': 'A',
  'derivationDescription': 'Analytical'},
 {'number': '334',
  'name': 'Cryptoxanthin, beta',
  'amount': 0.0,
  'unitName': 'UG',
  'derivationCode': 'A',
  'derivationDescription': 'Analytical'},
 {'number': '321.1',
  'name': 'cis-beta-Carotene',
  'amount': 1.0,
  'unitName': 'UG',
  'derivationCode': 'A',
  'derivationDescription': 'Analytical'},
 {'number': '338.3',
  'name': 'cis-Lutein/Zeaxanthin',
  'amount': 0.0,
  'unitName': 'UG',
  'derivationCode': 'A',
  'derivationDescription': 'Analytical'},
 {'number': '337.1',
  'name': 'cis-Lycopene',
  'amount': 0.0,
  'unitName': 'UG',
  'derivationCode': 'A',
  'derivationDescription': 'Analytical'},
 {'number': '321',
  'name': 'Carotene, beta',
  'amount': 10.0,
  '

In [181]:
def calc_nutrient_levels_recipe(ing_list):
    '''
    USDA food lookup for each ingredient in recipe.
    Then totals levels of required nutrients.
    To give total nutrient levels for whole recipe.
    '''
    
    energy = 0         # KCAL
    protein = 0        # G
    carbohydrates = 0  # G
    sugars = 0         # G
    sodium = 0         # MG
    fiber = 0          # G
    
    # Making sure we don't count dupliacte energy values
    energy_dup = 0
    
    for ing in ing_list:
        
        # Making sure we don't count duplicate energy values
        energy_dup = False
        
        # Query ingredient against FoodData Central database
        response = get_usda_info_json(str(ing))
        
        # If query returned a response
        if response:
        
            # Get list of nutrients
            nutrients = response[0]['foodNutrients']

            for nutrient in nutrients:

                if nutrient['name'] == 'Sugars, Total NLEA':
                    sugars += nutrient['amount']
                    print("added",ing,'//',nutrient['name'],nutrient['amount'])

                elif nutrient['name'] == 'Carbohydrate, by difference':
                    carbohydrates += nutrient['amount']
                    print("added",ing,'//',nutrient['name'],nutrient['amount'])

                elif nutrient['name'] == 'Sodium, Na':
                    sodium += nutrient['amount']
                    print("added",ing,'//',nutrient['name'],nutrient['amount'])

                elif nutrient['name'] == 'Protein':
                    protein += nutrient['amount']
                    print("added",ing,'//',nutrient['name'],nutrient['amount'])

                elif 'Energy' in nutrient['name'] and energy_dup==False:
                    energy += nutrient['amount']
                    energy_dup = True
                    print("added",ing,'//',nutrient['name'],nutrient['amount'])

                elif nutrient['name'] == 'Fiber, total dietary':
                    fiber += nutrient['amount']
                    print("added",ing,'//',nutrient['name'],nutrient['amount'])
                
                
    nutrient_levels = {'energy': energy,
                       'protein' : protein,
                       'carbohydrates' : carbohydrates,
                       'sugars' : sugars,
                       'sodium' : sodium,
                       'fiber' : fiber
                      }
    
    return nutrient_levels



def convert_dict_to_series_values(nutrient_levels_dict):
    
    return pd.Series(nutrient_levels_dict.values())

In [179]:
calc_nutrient_levels_recipe(posts['ingredients'].iloc[0])

added oats // Sodium, Na 3.62
added oats // Fiber, total dietary 10.5
added oats // Carbohydrate, by difference 69.9
added oats // Protein 13.2
added oats // Energy (Atwater General Factors) 389
added milk // Protein 7.81
added milk // Sodium, Na 105
added milk // Energy 657
added milk // Carbohydrate, by difference 6.86
added water // Energy 90.0
added water // Protein 19.0
added water // Sodium, Na 219
added water // Carbohydrate, by difference 0.08
added water // Sugars, Total NLEA 0.0
added sugar // Energy 1610.0
added sugar // Protein 0.0
added sugar // Sodium, Na 1.0
added sugar // Carbohydrate, by difference 99.6
added sugar // Sugars, Total NLEA 99.8
added peanut butter // Fiber, total dietary 6.32
added peanut butter // Sodium, Na 221
added peanut butter // Protein 24.0
added peanut butter // Carbohydrate, by difference 22.7
added peanut butter // Energy (Atwater General Factors) 632
added banana // Fiber, total dietary 1.7
added banana // Protein 0.73
added banana // Carbohyd

{'energy': 3463.0,
 'protein': 64.74,
 'carbohydrates': 219.23999999999998,
 'sugars': 115.6,
 'sodium': 549.62,
 'fiber': 18.52}

### Adding nutrient levels to (a slice of) the dataframe

In [198]:
small_posts = posts.head(50)

In [199]:
nutrient_cols = ['energy','protein','carbohydrates','sugars','sodium','fiber']

small_posts[nutrient_cols] = small_posts['ingredients'].apply(calc_nutrient_levels_recipe).apply(convert_dict_to_series)

added oats // Sodium, Na 3.62
added oats // Fiber, total dietary 10.5
added oats // Carbohydrate, by difference 69.9
added oats // Protein 13.2
added oats // Energy (Atwater General Factors) 389
added milk // Protein 7.81
added milk // Sodium, Na 105
added milk // Energy 657
added milk // Carbohydrate, by difference 6.86
added water // Energy 90.0
added water // Protein 19.0
added water // Sodium, Na 219
added water // Carbohydrate, by difference 0.08
added water // Sugars, Total NLEA 0.0
added sugar // Energy 1610.0
added sugar // Protein 0.0
added sugar // Sodium, Na 1.0
added sugar // Carbohydrate, by difference 99.6
added sugar // Sugars, Total NLEA 99.8
added peanut butter // Fiber, total dietary 6.32
added peanut butter // Sodium, Na 221
added peanut butter // Protein 24.0
added peanut butter // Carbohydrate, by difference 22.7
added peanut butter // Energy (Atwater General Factors) 632
added banana // Fiber, total dietary 1.7
added banana // Protein 0.73
added banana // Carbohyd

added pasta // Fiber, total dietary 1.8
added pasta // Carbohydrate, by difference 8.05
added pasta // Energy 45.0
added pasta // Sodium, Na 419
added pasta // Protein 1.41
added pasta // Sugars, Total NLEA 5.5
added sugar // Energy 1610.0
added sugar // Protein 0.0
added sugar // Sodium, Na 1.0
added sugar // Carbohydrate, by difference 99.6
added sugar // Sugars, Total NLEA 99.8
added basil // Carbohydrate, by difference 2.65
added basil // Energy 23.0
added basil // Fiber, total dietary 1.6
added basil // Sodium, Na 4.0
added basil // Protein 3.15
added cashews // Protein 18.2
added cashews // Sodium, Na 12.0
added cashews // Energy 2310.0
added cashews // Carbohydrate, by difference 30.2
added cashews // Fiber, total dietary 3.3
added pasta // Fiber, total dietary 1.8
added pasta // Carbohydrate, by difference 8.05
added pasta // Energy 45.0
added pasta // Sodium, Na 419
added pasta // Protein 1.41
added pasta // Sugars, Total NLEA 5.5
added butter // Fiber, total dietary 9.72
adde

added basil // Carbohydrate, by difference 2.65
added basil // Energy 23.0
added basil // Fiber, total dietary 1.6
added basil // Sodium, Na 4.0
added basil // Protein 3.15
added salt // Sodium, Na 524
added salt // Sugars, Total NLEA 0.58
added pepper // Sodium, Na 0.0
added pepper // Fiber, total dietary 0.942
added pepper // Protein 0.715
added pepper // Carbohydrate, by difference 4.78
added pepper // Energy (Atwater General Factors) 22.9
added salt // Sodium, Na 524
added salt // Sugars, Total NLEA 0.58
added pepper // Sodium, Na 0.0
added pepper // Fiber, total dietary 0.942
added pepper // Protein 0.715
added pepper // Carbohydrate, by difference 4.78
added pepper // Energy (Atwater General Factors) 22.9
added mushrooms // Sodium, Na 0.891
added mushrooms // Protein 2.18
added mushrooms // Carbohydrate, by difference 6.76
added mushrooms // Energy (Atwater General Factors) 39.8
added potato // Sodium, Na 47.7
added potato // Fiber, total dietary 5.4
added potato // Carbohydrate,

added baking powder // Fiber, total dietary 2.3
added baking powder // Carbohydrate, by difference 49.2
added baking powder // Energy 270
added baking powder // Protein 9.43
added baking powder // Sodium, Na 477
added baking powder // Sugars, Total NLEA 5.34
added baking powder // Fiber, total dietary 2.3
added baking powder // Carbohydrate, by difference 49.2
added baking powder // Energy 270
added baking powder // Protein 9.43
added baking powder // Sodium, Na 477
added baking powder // Sugars, Total NLEA 5.34
added vanilla // Energy 1200.0
added vanilla // Carbohydrate, by difference 12.6
added vanilla // Protein 0.06
added vanilla // Fiber, total dietary 0.0
added vanilla // Sodium, Na 9.0
added ginger // Sodium, Na 13.0
added ginger // Energy 333
added ginger // Protein 1.82
added ginger // Carbohydrate, by difference 17.8
added ginger // Fiber, total dietary 2.0
added tamari // Carbohydrate, by difference 5.57
added tamari // Energy 60.0
added tamari // Protein 10.5
added tamari 

added cinnamon // Carbohydrate, by difference 44.4
added cinnamon // Energy 253
added cinnamon // Protein 7.05
added cinnamon // Fiber, total dietary 3.5
added cinnamon // Sodium, Na 388
added baking soda // Fiber, total dietary 2.3
added baking soda // Carbohydrate, by difference 49.2
added baking soda // Energy 270
added baking soda // Protein 9.43
added baking soda // Sodium, Na 477
added baking soda // Sugars, Total NLEA 5.34
added salt // Sodium, Na 524
added salt // Sugars, Total NLEA 0.58
added parsley // Carbohydrate, by difference 6.33
added parsley // Energy 36.0
added parsley // Protein 2.97
added parsley // Fiber, total dietary 3.3
added parsley // Sodium, Na 56.0
added pasta // Fiber, total dietary 1.8
added pasta // Carbohydrate, by difference 8.05
added pasta // Energy 45.0
added pasta // Sodium, Na 419
added pasta // Protein 1.41
added pasta // Sugars, Total NLEA 5.5
added ghee // Protein 0.0
added ghee // Energy 3770.0
added ghee // Fiber, total dietary 0.0
added ghee 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  small_posts[nutrient_cols] = small_posts['ingredients'].apply(calc_nutrient_levels_recipe).apply(convert_dict_to_series)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  small_posts[nutrient_cols] = small_posts['ingredients'].apply(calc_nutrient_levels_recipe).apply(convert_dict_to_series)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.h

In [201]:
small_posts.to_csv("posts_with_nutritional_info.csv")

In [193]:
np.sum([len(ing) for ing in posts.head(100)['ingredients']])

451

In [202]:
small_posts

Unnamed: 0,postUrl,profileUrl,username,fullName,commentCount,likeCount,pubDate,description,location,imgUrl,...,description_preprocessed,includes_ingredient_list,ingredient_list,ingredients,energy,protein,carbohydrates,sugars,sodium,fiber
81,https://www.instagram.com/p/Cgd_iR_vT2d/,https://www.instagram.com/all.about.oats,all.about.oats,Anushka Lodhi,0,2,2022-07-26T08:23:14.000Z,Chocolate fudge protein oatmeal🤎 💪\nIngredient...,"Ghaziabad, India",https://scontent-lhr8-2.cdninstagram.com/v/t51...,...,chocolate fudge protein oatmeal \ningredients:...,True,"[{'ingredient': oats, 'quantity': 1/2, 'measur...","[oats, milk, water, sugar, peanut butter, banana]",3463.0,64.74,219.24,115.6,549.62,18.52
104,https://www.instagram.com/p/Cgd8bLXDaCq/,https://www.instagram.com/a_m_eats,a_m_eats,Alice & Meg 🍴,3,12,2022-07-26T07:56:03.000Z,⁣Caprese Chicken with Pesto 🌿🍅⠀\n⠀\nThis dish ...,"Glasgow, United Kingdom",https://scontent-lhr8-1.cdninstagram.com/v/t51...,...,⁣caprese chicken with pesto ⠀\n⠀\nthis dish is...,True,"[{'ingredient': pesto, 'quantity': 2, 'measure...",[pesto],1560.0,4.16,6.93,0.0,1030.0,2.1
169,https://www.instagram.com/p/CgdylqtOy9s/,https://www.instagram.com/hescottwellness,hescottwellness,"Natasha Hescott, RDN, CDN",0,1,2022-07-26T06:30:06.000Z,Looking for a fast recipe to make for lunch? T...,,https://scontent-lhr8-1.cdninstagram.com/v/t51...,...,looking for a fast recipe to make for lunch? t...,True,"[{'ingredient': ginger, 'quantity': 1, 'measur...","[ginger, tamari, hemp seeds, water, oil]",1095.0,52.32,40.55,3.14,6354.0,13.1
172,https://www.instagram.com/p/CgdyENyLgmv/,https://www.instagram.com/rainbowpiatto,rainbowpiatto,Rainbow Piatto,1,14,2022-07-26T06:25:32.000Z,Sweet & Savoury Mushroom Scones~🍄 This easy an...,Singapore / Singapura / 新加坡 / சிங்கப்பூர்,https://scontent-lhr8-1.cdninstagram.com/v/t51...,...,sweet & savoury mushroom scones~ this easy and...,True,"[{'ingredient': mushrooms, 'quantity': 200, 'm...","[mushrooms, sugar, baking powder, salt, butter...",3440.8,63.91,200.32,105.72,1004.883,21.74
226,https://www.instagram.com/p/CgdpvTNvrWt/,https://www.instagram.com/foodiesfood_court,foodiesfood_court,Foodies Food Court,43,1819,2022-07-26T05:12:47.000Z,Paneer Tikka Recipe😍\nSave it to try later♥️\n...,Bihar,https://scontent-lhr8-2.cdninstagram.com/v/t51...,...,paneer tikka recipe\nsave it to try later\n\ni...,True,"[{'ingredient': flour, 'quantity': 1/2, 'measu...","[flour, carom seeds, ginger, red mirch, cumin ...",2269.9,67.205,253.4,17.2,1358.0,54.102
230,https://www.instagram.com/p/CgdmRS0DWm4/,https://www.instagram.com/corner_to_discover,corner_to_discover,Shivangi Gupta,4,102,2022-07-26T05:04:54.000Z,MASALA PANEER 🥰\n\nTo set the paneer why to us...,,,...,masala paneer \n\nto set the paneer why to use...,True,"[{'ingredient': milk, 'quantity': 1, 'measurem...","[milk, oregano, chili flakes, jeera powder]",1521.0,29.45,238.66,0.0,161.0,96.2
251,https://www.instagram.com/p/Cgdl5Uehh6u/,https://www.instagram.com/mixin._.goodness,mixin._.goodness,Sunaina,2,47,2022-07-26T04:39:12.000Z,Paneer Aaloo mix Paratha 👩‍🍳😃😋\n\nIngredients:...,,https://scontent-lhr8-1.cdninstagram.com/v/t51...,...,paneer aaloo mix paratha \n\ningredients: pane...,True,[],[],0.0,0.0,0.0,0.0,0.0,0.0
278,https://www.instagram.com/p/CgdgJ2eA8Tl/,https://www.instagram.com/sri_vani_cooking_cha...,sri_vani_cooking_channel,Srivani's Kitchen,1,9,2022-07-26T03:49:32.000Z,"Healthy chutney powder to serve with Idli, dos...",,,...,"healthy chutney powder to serve with idli, dos...",True,"[{'ingredient': flax seeds, 'quantity': 1/2, '...","[flax seeds, coriander, urad, sesame, red chil...",1905.0,58.29,101.53,10.32,1731.6,67.07
279,https://www.instagram.com/p/CgdfLOSrAUG/,https://www.instagram.com/enzo_mapua,enzo_mapua,enzo mapua,3,48,2022-07-26T03:40:28.000Z,Boscaiola! @rumourhasit9 had cooked this for m...,,https://scontent-lhr8-2.cdninstagram.com/v/t51...,...,boscaiola! had cooked this for my family and i...,True,"[{'ingredient': tomato, 'quantity': 1/3, 'meas...","[tomato, cream, garlic, parsley, butter, parme...",1507.0,65.056,83.68,5.57,2297.996,18.491
291,https://www.instagram.com/p/CgddMjDMIQx/,https://www.instagram.com/chefproto,chefproto,Zoya Ahmed,2,30,2022-07-26T03:23:11.000Z,Hi besties! I thought it was time I’d share on...,,https://scontent-lhr8-2.cdninstagram.com/v/t51...,...,hi besties! i thought it was time i’d share on...,True,"[{'ingredient': sugar, 'quantity': 2, 'measure...","[sugar, basil, cashews]",3943.0,21.35,132.45,99.8,17.0,4.9


In [206]:
small_posts[['fullName', 'commentCount', 'likeCount', 'description', 'ingredient_list', 'energy', 'protein', 'carbohydrates', 'sugars', 'sodium', 'fiber']].head(6)

Unnamed: 0,fullName,commentCount,likeCount,description,ingredient_list,energy,protein,carbohydrates,sugars,sodium,fiber
81,Anushka Lodhi,0,2,Chocolate fudge protein oatmeal🤎 💪\nIngredient...,"[{'ingredient': oats, 'quantity': 1/2, 'measur...",3463.0,64.74,219.24,115.6,549.62,18.52
104,Alice & Meg 🍴,3,12,⁣Caprese Chicken with Pesto 🌿🍅⠀\n⠀\nThis dish ...,"[{'ingredient': pesto, 'quantity': 2, 'measure...",1560.0,4.16,6.93,0.0,1030.0,2.1
169,"Natasha Hescott, RDN, CDN",0,1,Looking for a fast recipe to make for lunch? T...,"[{'ingredient': ginger, 'quantity': 1, 'measur...",1095.0,52.32,40.55,3.14,6354.0,13.1
172,Rainbow Piatto,1,14,Sweet & Savoury Mushroom Scones~🍄 This easy an...,"[{'ingredient': mushrooms, 'quantity': 200, 'm...",3440.8,63.91,200.32,105.72,1004.883,21.74
226,Foodies Food Court,43,1819,Paneer Tikka Recipe😍\nSave it to try later♥️\n...,"[{'ingredient': flour, 'quantity': 1/2, 'measu...",2269.9,67.205,253.4,17.2,1358.0,54.102
230,Shivangi Gupta,4,102,MASALA PANEER 🥰\n\nTo set the paneer why to us...,"[{'ingredient': milk, 'quantity': 1, 'measurem...",1521.0,29.45,238.66,0.0,161.0,96.2
