# Introduction

After joining the ingredients_df with their respective food_id in the density_db, we need to do a similar matching of their respective food_portion entries.

# Process

This will follow a very similar process as with the food_df joining.

There are a few points to keep in mind whilst doing this. We aren't actually trying to match the units, instead it's the type of ingredient eg. cinnamon ground vs. whole stick. The units themselves matter up to their 'unit_type' ie. weight, volume, or whole. If the ingredient is given in weight, we don't need this density information. Whereas if it's given in volume, we need to match it with a volume portion in order to calculate its density. The same goes for a 'whole' unit, to get an idea of its portion size.

# Setup

In [1]:
#| default_exp density.portion_match

In [2]:
#| export
from pyprojroot import here
root = here()
import sys
sys.path.append(str(root))

In [3]:
#| export
import pandas as pd
import numpy as np
import seaborn as sns

import nltk
import spacy
from spacy.matcher import Matcher
from spacy.util import filter_spans

from thefuzz import fuzz

from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim
import torch
from sklearn.metrics.pairwise import cosine_similarity

import json
from itertools import groupby
import re
import string

from tqdm import tqdm
tqdm.pandas()

from recipe_dataset.utils.utils import *

In [4]:
pd.options.mode.chained_assignment = None  # default='warn'

In [5]:
ingredients_df = pd.read_feather('../../data/local/recipe/partial/ingredients/0.feather')
food_df = pd.read_feather('../../data/local/density/full/food/0.feather')
food_portion_df = pd.read_feather('../../data/local/density/full/food_portion/0.feather')
food_ids = pd.read_feather('../../data/local/density/partial/food_ids/0.feather')

In [6]:
ingredients_df[['comment', 'unit']] = ingredients_df[['comment', 'unit']].fillna('')
ingredients_df = ingredients_df.join(food_ids)

In [7]:
food_portion_df.rename({'remainder_tags':'unit_remainders'}, inplace=True, axis=1)

In [8]:
#| export
with open(f'{root}/config/unit_conversions.json') as f:
    unit_list = json.load(f)

# Food Portion Selection

The main function selecting the appropriate food_portion.

Following the two principles highlighted above, we are first looking at the *unit_type* for a match. The process for weight/volume types are self-explanatory, whereas portions are where more complexity arrises.

In [9]:
ingredient = ingredients_df.iloc[0]
ingredient

name.name                                           butter
name.description                          land lake butter
quantity                                             0.667
unit                                                  cups
comment                                                   
preparation                                       softened
ingredient_string    2/3 cup Land O Lakes Butter, softened
unit_tags                                            [cup]
unit_remainders                                         []
unit_type                                           volume
food_id                                          2345703.0
Name: (1746116, 0), dtype: object

#todo - if no unit AND quantity then that is simply a serving/portion of the item.

In [10]:
#| export
def select_food_portion(ingredient, food_portion_df, debug=False):

    if pd.isnull(ingredient['food_id']): return pd.NA

    portions = food_portion_df.loc[ingredient['food_id'],:]
    debug_info = {
        'tag_filter_results': [],
        'description_search_words': '',
        'description_search_results': [],
    }

    if ingredient['unit_type'] == 'volume':
        # if volume given, find (any) other volume portion
        searched = portions.index[portions['unit_tags'].apply(lambda x: any([i in unit_list['volume'].keys() for i in x]))]
        debug_info['tag_filter_results'].append({'filter': 'volume', 'indices': searched})
        if not searched.empty: portions = portions.loc[searched]
    elif ingredient['unit_type'] == 'weight':
        # if weight given, we don't need any matches
        searched = portions.index[portions['unit_tags'].apply(lambda x: any([i in unit_list['weight'].keys() for i in x]))]
        debug_info['tag_filter_results'].append({'filter': 'weight', 'indices': searched})
        if not searched.empty: portions = portions.loc[searched]
    else:
        # if volume nor weight given
        # test: removing results with units
        searched = portions.index[portions['unit_tags'].apply(lambda x: not any([i not in unit_list['portion'].keys() for i in x]))]
        if not searched.empty: portions = portions.loc[searched]
        # first we want to check if units match
        if len(ingredient['unit_tags']):
            searched = portions.index[portions['unit_tags'].apply(lambda x: any([i in ingredient['unit_tags'] for i in x]))]
            debug_info['tag_filter_results'].append({'filter': 'portion_unit_match', 'indices': searched})
            if not searched.empty: portions = portions.loc[searched]
        # then check if remainders match
        if len(ingredient['unit_remainders']):
            searched = portions.index[portions['unit_remainders'].apply(lambda x: any([i in ingredient['unit_remainders'] for i in x]))]
            debug_info['tag_filter_results'].append({'filter': 'portion_unit_remainder_match', 'indices': searched})
            if not searched.empty: portions = portions.loc[searched]
        # if 'whole' portion specified
        if not len(ingredient['unit_tags']) or any([i == 'whole' for i in ingredient['unit_tags']]):
            # if 'whole' explicitly specified:
            searched = portions.index[portions['unit_tags'].apply(lambda x: any([i == 'whole' for i in x]))]
            debug_info['tag_filter_results'].append({'filter': 'portion_whole_explicit', 'indices': searched})
            if not searched.empty: portions = portions.loc[searched]
            else:
                searched = portions.index[(portions['unit_tags'].str.len() == 0)]
                if not searched.empty: 
                    portions = portions.loc[searched]
                    # select the one with the largest gram weight (#todo factor in `amount` too)
                    searched = portions.index[portions['gram_weight'] == portions['gram_weight'].max()]
                    if not searched.empty: portions = portions.loc[searched]

    # searching for ingredient terms
    search_cols = ['name.description', 'comment', 'unit']
    search_words = []
    for col, value in ingredient[search_cols].items():
        if not pd.isnull(value): 
            search_words.extend(ingredient[col].split(' '))
        else:
            if col == 'unit': 
                search_words.extend(['whole', 'serving', 'portion']) 

    search_words.reverse()

    debug_info['description_search_words'] = search_words

    for search_word in search_words:
        for col in ['unit_tags', 'unit_remainders']:
            searched = portions.index[portions[col].apply(lambda x: search_word in x)]
            if not searched.empty:
                portions = portions.loc[searched]
                debug_info['description_search_results'].append({'search_word': search_word, 'indices': searched})

    # finding globals
    default_words = ['raw', 'regular', 'normal', 'medium']
    for word in default_words:
        searched = portions['description'].str.contains(word)
        if searched.any():
            portions = portions[searched]
            debug_info['tag_filter_results'].append({'filter': 'default_units', 'indices': searched.index})
    
    return portions.iloc[0].name if not debug else (portions.iloc[0].name, debug_info)

In [11]:
ingredient

name.name                                           butter
name.description                          land lake butter
quantity                                             0.667
unit                                                  cups
comment                                                   
preparation                                       softened
ingredient_string    2/3 cup Land O Lakes Butter, softened
unit_tags                                            [cup]
unit_remainders                                         []
unit_type                                           volume
food_id                                          2345703.0
Name: (1746116, 0), dtype: object

In [12]:
food_portion_df.loc[ingredient['food_id']]

Unnamed: 0_level_0,seq_num,amount,gram_weight,description,unit_tags,unit_remainders,unit_type,portion_amount,portion_unit
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
287267,1,1.0,224.0,cup,[cup],[],volume,,
287268,2,1.0,14.0,tablespoon,[tablespoon],[],volume,,
287269,3,1.0,7.0,pat,[],[pat],portion,,
287270,4,1.0,5.0,individual container,[],"[individual, container]",portion,,
287271,5,1.0,7.0,slice bread/roll,[piece],"[bread, roll]",portion,,
287272,6,1.0,14.0,regular sandwich,[whole],[sandwich],portion,,
287273,7,1.0,28.0,large sandwich,[whole],[sandwich],portion,,


In [13]:
selected_portion, debug_info = select_food_portion(ingredient, food_portion_df, True)
food_portion_df.loc[ingredient['food_id'], selected_portion]

seq_num                 1
amount                1.0
gram_weight         224.0
description           cup
unit_tags           [cup]
unit_remainders        []
unit_type          volume
portion_amount        NaN
portion_unit         None
Name: (2345703, 287267), dtype: object

In [14]:
debug_info

{'tag_filter_results': [{'filter': 'volume',
   'indices': Index([287267, 287268], dtype='int64', name='id')}],
 'description_search_words': ['cups', '', 'butter', 'lake', 'land'],
 'description_search_results': []}

In [15]:
ingredients_df[ingredients_df['food_id'].isna()]

Unnamed: 0_level_0,Unnamed: 1_level_0,name.name,name.description,quantity,unit,comment,preparation,ingredient_string,unit_tags,unit_remainders,unit_type,food_id
recipe,ingredient,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1828339,3,long grain brown rice,long grain brown rice,175.0,g,,,175 g long grain brown rice,[gram],[],weight,
1828339,6,dried apricot,dried apricot,100.0,g,,finely chopped,"100 g dried apricots, finely chopped",[gram],[],weight,
1828339,7,sultana,sultana,50.0,g,,,50 g sultanas,[gram],[],weight,
1703,3,mincemeat,mincemeat,1.0,jar,,,1 jar mincemeat,[],[jar],portion,
1262123,0,linguine,linguine,4.0,oz,,,4 oz linguine,[ounce],[],weight,
...,...,...,...,...,...,...,...,...,...,...,...,...
599284,1,potato,frozen shredded hash brown potato,1.0,lb,,completely thawed,"1 lb. frozen shredded hash brown potatoes, com...",[pound],[],weight,
1357213,3,cinnamin,cinnamin,0.5,tsps,,,1/2 tsp. cinnamin,[teaspoon],[],volume,
2006319,2,raspberry,raspberry,8.0,ounces,,,8 ounces raspberries,[ounce],[],weight,
2006319,3,blackberry,blackberry,8.0,ounces,,,8 ounces blackberries,[ounce],[],weight,


In [16]:
food_portion_ids = ingredients_df.progress_apply(select_food_portion, args=(food_portion_df,), axis=1)

  0%|                                                                                                                                                                                                                                          | 0/2450 [00:00<?, ?it/s]

  1%|██▉                                                                                                                                                                                                                             | 32/2450 [00:00<00:07, 317.62it/s]

  5%|███████████▎                                                                                                                                                                                                                   | 124/2450 [00:00<00:03, 668.51it/s]

  9%|███████████████████▊                                                                                                                                                                                                           | 218/2450 [00:00<00:02, 790.73it/s]

 13%|████████████████████████████▏                                                                                                                                                                                                  | 309/2450 [00:00<00:02, 834.18it/s]

 16%|████████████████████████████████████▊                                                                                                                                                                                          | 404/2450 [00:00<00:02, 873.25it/s]

 20%|████████████████████████████████████████████▊                                                                                                                                                                                  | 492/2450 [00:00<00:02, 874.56it/s]

 24%|████████████████████████████████████████████████████▉                                                                                                                                                                          | 581/2450 [00:00<00:02, 878.26it/s]

 27%|█████████████████████████████████████████████████████████████▏                                                                                                                                                                 | 672/2450 [00:00<00:02, 887.48it/s]

 31%|█████████████████████████████████████████████████████████████████████▋                                                                                                                                                         | 766/2450 [00:00<00:01, 902.33it/s]

 35%|██████████████████████████████████████████████████████████████████████████████                                                                                                                                                 | 857/2450 [00:01<00:01, 901.81it/s]

 39%|██████████████████████████████████████████████████████████████████████████████████████▎                                                                                                                                        | 948/2450 [00:01<00:01, 900.25it/s]

 43%|██████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                               | 1044/2450 [00:01<00:01, 916.39it/s]

 47%|███████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                      | 1147/2450 [00:01<00:01, 947.45it/s]

 51%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                             | 1242/2450 [00:01<00:01, 938.05it/s]

 55%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                    | 1340/2450 [00:01<00:01, 947.53it/s]

 59%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                            | 1435/2450 [00:01<00:01, 943.94it/s]

 63%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                   | 1534/2450 [00:01<00:00, 956.69it/s]

 67%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                          | 1630/2450 [00:01<00:00, 947.67it/s]

 71%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                               | 1744/2450 [00:01<00:00, 1004.12it/s]

 75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                      | 1845/2450 [00:02<00:00, 964.96it/s]

 79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                              | 1942/2450 [00:02<00:00, 959.60it/s]

 83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                     | 2039/2450 [00:02<00:00, 949.11it/s]

 87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                            | 2135/2450 [00:02<00:00, 948.12it/s]

 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                    | 2230/2450 [00:02<00:00, 932.95it/s]

 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊           | 2326/2450 [00:02<00:00, 938.75it/s]

 99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 2420/2450 [00:02<00:00, 935.35it/s]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2450/2450 [00:02<00:00, 912.75it/s]




In [17]:
food_portion_ids = food_portion_ids.rename('food_portion_id')

In [18]:
ingredients_df = ingredients_df.join(food_portion_ids)

## Evaluating

Visualising the selections, along with their possible options.

In [19]:
pd.set_option('display.max_rows', None)

In [20]:
ingredients_df.join(food_df['description'], on='food_id').join(food_portion_df[['description','gram_weight']], lsuffix='(food)', rsuffix='(portion)', on=['food_id', 'food_portion_id']).merge(food_portion_df.reset_index().set_index('fdc_id')[['gram_weight', 'description']], left_on='food_id', suffixes=('(portion_selected)', '(portion_options)'), right_index=True).head(100)

Unnamed: 0_level_0,Unnamed: 1_level_0,name.name,name.description,quantity,unit,comment,preparation,ingredient_string,unit_tags,unit_remainders,unit_type,food_id,food_portion_id,description(food),description(portion),gram_weight(portion_selected),gram_weight(portion_options),description
recipe,ingredient,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1746116,0,butter,land lake butter,0.667,cups,,softened,"2/3 cup Land O Lakes Butter, softened",[cup],[],volume,2345703.0,287267,"Butter, NFS",cup,224.0,224.0,cup
1746116,0,butter,land lake butter,0.667,cups,,softened,"2/3 cup Land O Lakes Butter, softened",[cup],[],volume,2345703.0,287267,"Butter, NFS",cup,224.0,14.0,tablespoon
1746116,0,butter,land lake butter,0.667,cups,,softened,"2/3 cup Land O Lakes Butter, softened",[cup],[],volume,2345703.0,287267,"Butter, NFS",cup,224.0,7.0,pat
1746116,0,butter,land lake butter,0.667,cups,,softened,"2/3 cup Land O Lakes Butter, softened",[cup],[],volume,2345703.0,287267,"Butter, NFS",cup,224.0,5.0,individual container
1746116,0,butter,land lake butter,0.667,cups,,softened,"2/3 cup Land O Lakes Butter, softened",[cup],[],volume,2345703.0,287267,"Butter, NFS",cup,224.0,7.0,slice bread/roll
1746116,0,butter,land lake butter,0.667,cups,,softened,"2/3 cup Land O Lakes Butter, softened",[cup],[],volume,2345703.0,287267,"Butter, NFS",cup,224.0,14.0,regular sandwich
1746116,0,butter,land lake butter,0.667,cups,,softened,"2/3 cup Land O Lakes Butter, softened",[cup],[],volume,2345703.0,287267,"Butter, NFS",cup,224.0,28.0,large sandwich
382666,1,butter,butter,1.5,sticks,,,1 1/2 sticks butter,[],[stick],portion,2345703.0,287272,"Butter, NFS",regular sandwich,14.0,224.0,cup
382666,1,butter,butter,1.5,sticks,,,1 1/2 sticks butter,[],[stick],portion,2345703.0,287272,"Butter, NFS",regular sandwich,14.0,14.0,tablespoon
382666,1,butter,butter,1.5,sticks,,,1 1/2 sticks butter,[],[stick],portion,2345703.0,287272,"Butter, NFS",regular sandwich,14.0,7.0,pat


This all looks fine, something more difficult is those with no unit tags:

In [21]:
sample_df = ingredients_df[(ingredients_df['unit_tags'].str.len() == 0)].sample(20, random_state=1330)
sample_df.join(food_df['description'], on='food_id').join(food_portion_df[['description','gram_weight']], lsuffix='(food)', rsuffix='(portion)', on=['food_id', 'food_portion_id']).merge(food_portion_df.reset_index().set_index('fdc_id')[['description','gram_weight']], left_on='food_id', suffixes=('(portion_selected)', '(portion_options)'), right_index=True).head(40)

Unnamed: 0_level_0,Unnamed: 1_level_0,name.name,name.description,quantity,unit,comment,preparation,ingredient_string,unit_tags,unit_remainders,unit_type,food_id,food_portion_id,description(food),description(portion),gram_weight(portion_selected),description,gram_weight(portion_options)
recipe,ingredient,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1627293,5,egg,egg separated,2.0,,,,2 eggs separated,[],[],portion,171287.0,88378,"Egg, whole, raw, fresh",medium,44.0,large,50.0
1627293,5,egg,egg separated,2.0,,,,2 eggs separated,[],[],portion,171287.0,88378,"Egg, whole, raw, fresh",medium,44.0,extra large,56.0
1627293,5,egg,egg separated,2.0,,,,2 eggs separated,[],[],portion,171287.0,88378,"Egg, whole, raw, fresh",medium,44.0,jumbo,63.0
1627293,5,egg,egg separated,2.0,,,,2 eggs separated,[],[],portion,171287.0,88378,"Egg, whole, raw, fresh",medium,44.0,cup 486 large egg,243.0
1627293,5,egg,egg separated,2.0,,,,2 eggs separated,[],[],portion,171287.0,88378,"Egg, whole, raw, fresh",medium,44.0,medium,44.0
1627293,5,egg,egg separated,2.0,,,,2 eggs separated,[],[],portion,171287.0,88378,"Egg, whole, raw, fresh",medium,44.0,small,38.0
855127,0,eggplant,eggplant,1.0,,,sliced,"1 eggplant, sliced",[],[],portion,2345305.0,285990,"Eggplant, raw",whole,500.0,whole,500.0
855127,0,eggplant,eggplant,1.0,,,sliced,"1 eggplant, sliced",[],[],portion,2345305.0,285990,"Eggplant, raw",whole,500.0,cup,80.0
972254,8,italian seasoning mix,dried italian seasoning mix mixture,,,,,"dried italian seasoning mix, mixture",[],[],portion,168955.0,84041,"Pasta mix, Italian lasagna, unprepared",package,141.0,package,141.0
972254,8,italian seasoning mix,dried italian seasoning mix mixture,,,,,"dried italian seasoning mix, mixture",[],[],portion,168955.0,84041,"Pasta mix, Italian lasagna, unprepared",package,141.0,packet,79.0


Here we want to find cases where ingredients with volume measurements don't have a volume portion match. This is significant because we won't be able to get a weight measurement.

In [22]:
from ast import literal_eval

In [23]:
results_df = ingredients_df.join(food_portion_df, on=['food_id', 'food_portion_id'], rsuffix='(portion)')

results_df[results_df['unit_tags'].apply(lambda x: any([t in unit_list['volume'].keys() for t in x])) & ~(results_df['unit_tags(portion)'].apply(lambda x: any([t in unit_list['volume'].keys() for t in x]) if isinstance(x, np.ndarray) else True))]

Unnamed: 0_level_0,Unnamed: 1_level_0,name.name,name.description,quantity,unit,comment,preparation,ingredient_string,unit_tags,unit_remainders,unit_type,...,food_portion_id,seq_num,amount,gram_weight,description,unit_tags(portion),unit_remainders(portion),unit_type(portion),portion_amount,portion_unit
recipe,ingredient,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1781719,3,light molasses,light molasses,1.0,teaspoon,,,1 teaspoon light molasses,[teaspoon],[],volume,...,278155,3,1.0,30.0,medium,[whole],[],portion,,
165596,5,dark molasses,dark molasses,0.667,cups,,,2/3 cup dark molasses,[cup],[],volume,...,278155,3,1.0,30.0,medium,[whole],[],portion,,


In [24]:
food_df[food_df['description'].str.lower().str.contains('molasses')]

Unnamed: 0_level_0,data_type,description,description_list,description_length,description_list_length,default_word_count,exclusion_word_count,volume_exists,portion_exists
fdc_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
172724,sr_legacy_food,"Cookies, molasses","[cooky, molasses]",17,2,0,0,False,True
2343099,survey_fndds_food,"Bread, cornmeal and molasses","[bread, cornmeal molasses]",28,2,0,0,False,True
2343100,survey_fndds_food,"Bread, cornmeal and molasses, toasted","[bread, cornmeal molasses, toasted]",37,3,0,0,False,True
2343424,survey_fndds_food,"Cookie, molasses","[cookie, molasses]",16,2,0,0,False,True


We can forgive these entries. They are occassional blips of foods that are too specific eg. bread, cookies. #TODO: This might be able to be filtered out in the food_df, using the 'density_exists' field as guidance.

# Saving

In [25]:
food_portion_ids.to_frame().to_feather('../../data/local/density/partial/food_portion_ids/0.feather')

In [26]:
from nbdev import nbdev_export; nbdev_export()