# Introduction

Here we are getting the finalised information from the dataset.

Now that we have the densities of all ingredients, we are able to use this to get their concentration in the recipe. This will be done by first obtaining the weight of each ingredient, and then using these values to get the weight ratio in the full recipe.

# Setup

In [1]:
#|default_exp density.finalise

In [2]:
#| export
import pandas as pd
import numpy as np

import json

In [3]:
from tqdm import tqdm
tqdm.pandas()

In [4]:
pd.options.mode.chained_assignment = None  # default='warn'

In [5]:
ingredients_df = pd.read_feather('../../data/local/recipe/partial/ingredients/0.feather')
food_df = pd.read_feather('../../data/local/density/full/food/0.feather')
food_portion_df = pd.read_feather('../../data/local/density/full/food_portion/0.feather')
food_ids = pd.read_feather('../../data/local/density/partial/food_ids/0.feather')
food_portion_ids = pd.read_feather('../../data/local/density/partial/food_portion_ids/0.feather')

In [6]:
ingredients_df = ingredients_df.join(food_ids).join(food_portion_ids)
ingredients_df

Unnamed: 0_level_0,Unnamed: 1_level_0,name.name,name.description,quantity,unit,comment,preparation,ingredient_string,unit_tags,unit_remainders,unit_type,food_id,food_portion_id
recipe,ingredient,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1746116,0,butter,land lake butter,0.667,cups,,softened,"2/3 cup Land O Lakes Butter, softened",[cup],[],volume,2345703.0,287267.0
1746116,1,sugar,sugar,0.5,cups,,,1/2 cup sugar,[cup],[],volume,2345817.0,287772.0
1746116,2,egg,land lake egg,2.0,,(yolks only),,2 Land O Lakes Eggs (yolks only),[],[],portion,171287.0,88378.0
1746116,3,vanilla,vanilla,1.0,teaspoon,,,1 teaspoon vanilla,[teaspoon],[],volume,172236.0,90134.0
1746116,4,flour,all-purpose flour,1.5,cups,,,1 1/2 cups all-purpose flour,[cup],[],volume,169761.0,85466.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
931097,9,red onion,red onion,0.25,cups,,finely chopped,"1/4 cup red onion, finely chopped",[cup],[],volume,2345315.0,286030.0
931097,10,red bell pepper,red bell pepper,0.25,cups,,chopped,1/4 cup chopped red bell pepper,[cup],[],volume,2345321.0,286059.0
931097,11,jasmine rice,jasmine rice,1.0,cup,,,1 cup Jasmine rice,[cup],[],volume,169756.0,85461.0
931097,12,chicken,reduced-sodium chicken broth,1.5,cups,,,1 1/2 cups reduced-sodium chicken broth,[cup],[],volume,2341341.0,270046.0


In [7]:
ingredients_df = ingredients_df.fillna(pd.NA)

In [8]:
#| export
with open('../config/unit_conversions.json') as f:
    unit_list = json.load(f)

# Weight Calculation

Using this information we want to calculate the gram weight of each ingredient.

In [9]:
ingredient = ingredients_df.iloc[0]
ingredient

name.name                                           butter
name.description                          land lake butter
quantity                                             0.667
unit                                                  cups
comment                                               <NA>
preparation                                       softened
ingredient_string    2/3 cup Land O Lakes Butter, softened
unit_tags                                            [cup]
unit_remainders                                         []
unit_type                                           volume
food_id                                          2345703.0
food_portion_id                                   287267.0
Name: (1746116, 0), dtype: object

The ingredient's weight are found according to the ingredient's unit_type ie. weight/volume/portion

In [10]:
#| export
def get_gram_weight(ingredient):

    weight = 0.0

    if pd.isnull(ingredient['food_id']): return pd.NA
    
    portion = food_portion_df.loc[ingredient['food_id'], ingredient['food_portion_id']]

    for info_object in [ingredient, portion]:
        info_object['unit_types'] = []
        for unit_type in unit_list.keys():
            if any([unit_tag for unit_tag in info_object['unit_tags'] if unit_tag in unit_list[unit_type].keys()]):
                info_object['unit_types'].append(unit_type)

    if 'weight' in ingredient['unit_types']:
        ingredient_weight_unit = [unit for unit in ingredient['unit_tags'] if unit in unit_list['weight'].keys()][0]
        weight = unit_list['weight'][ingredient_weight_unit]['conversion'] * ingredient['quantity']
    elif 'volume' in ingredient['unit_types']:
        ingredient_volume_unit = [unit for unit in ingredient['unit_tags'] if unit in unit_list['volume'].keys()][0]
        if 'volume' in portion['unit_types']:
            portion_volume_unit = [unit for unit in portion['unit_tags'] if unit in unit_list['volume'].keys()][0]
            # simple density calculation if exists
            if not pd.notnull(portion['portion_amount']):
                density = portion['gram_weight'] / (portion['amount'] * unit_list['volume'][portion_volume_unit]['conversion'])
            else:
                density = portion['gram_weight'] / (portion['amount'] * portion['portion_amount'] * unit_list['volume'][portion_volume_unit]['conversion']) # #todo can just make porion_amount == 1 or factor this in the amount when creating dataframe
            weight = unit_list['volume'][ingredient_volume_unit]['conversion'] * density * ingredient['quantity']
        else:
            # volume measurement not given -> must be portion (set to NA for now)
            weight = pd.NA
    else: # ingredient whole/portion measurements
        weight = portion['gram_weight'] * ingredient['quantity']


    return weight    

In [11]:
assert get_gram_weight(ingredient) > 100

In [12]:
ingredients_df.iloc[2]

name.name                                         egg
name.description                        land lake egg
quantity                                          2.0
unit                                             <NA>
comment                                  (yolks only)
preparation                                      <NA>
ingredient_string    2 Land O Lakes Eggs (yolks only)
unit_tags                                          []
unit_remainders                                    []
unit_type                                     portion
food_id                                      171287.0
food_portion_id                               88378.0
Name: (1746116, 2), dtype: object

In [13]:
ingredients_df['gram_weight'] = ingredients_df.progress_apply(get_gram_weight, axis=1)

  0%|                                                                                                                                                                                                                                                | 0/2450 [00:00<?, ?it/s]

 12%|████████████████████████████▍                                                                                                                                                                                                       | 306/2450 [00:00<00:00, 3056.11it/s]

 25%|█████████████████████████████████████████████████████████▌                                                                                                                                                                          | 619/2450 [00:00<00:00, 3097.92it/s]

 38%|███████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                                            | 940/2450 [00:00<00:00, 3145.69it/s]

 52%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                            | 1283/2450 [00:00<00:00, 3255.16it/s]

 66%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                             | 1612/2450 [00:00<00:00, 3267.36it/s]

 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                              | 1951/2450 [00:00<00:00, 3308.43it/s]

 93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍               | 2282/2450 [00:00<00:00, 3306.13it/s]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2450/2450 [00:00<00:00, 3253.43it/s]




In [14]:
ingredients_df

Unnamed: 0_level_0,Unnamed: 1_level_0,name.name,name.description,quantity,unit,comment,preparation,ingredient_string,unit_tags,unit_remainders,unit_type,food_id,food_portion_id,gram_weight
recipe,ingredient,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1746116,0,butter,land lake butter,0.667,cups,,softened,"2/3 cup Land O Lakes Butter, softened",[cup],[],volume,2345703.0,287267.0,149.408
1746116,1,sugar,sugar,0.5,cups,,,1/2 cup sugar,[cup],[],volume,2345817.0,287772.0,100.0
1746116,2,egg,land lake egg,2.0,,(yolks only),,2 Land O Lakes Eggs (yolks only),[],[],portion,171287.0,88378.0,88.0
1746116,3,vanilla,vanilla,1.0,teaspoon,,,1 teaspoon vanilla,[teaspoon],[],volume,172236.0,90134.0,4.2
1746116,4,flour,all-purpose flour,1.5,cups,,,1 1/2 cups all-purpose flour,[cup],[],volume,169761.0,85466.0,187.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
931097,9,red onion,red onion,0.25,cups,,finely chopped,"1/4 cup red onion, finely chopped",[cup],[],volume,2345315.0,286030.0,40.0
931097,10,red bell pepper,red bell pepper,0.25,cups,,chopped,1/4 cup chopped red bell pepper,[cup],[],volume,2345321.0,286059.0,37.5
931097,11,jasmine rice,jasmine rice,1.0,cup,,,1 cup Jasmine rice,[cup],[],volume,169756.0,85461.0,185.0
931097,12,chicken,reduced-sodium chicken broth,1.5,cups,,,1 1/2 cups reduced-sodium chicken broth,[cup],[],volume,2341341.0,270046.0,202.5


In [15]:
ingredients_df[ingredients_df['gram_weight'] == 0]

Unnamed: 0_level_0,Unnamed: 1_level_0,name.name,name.description,quantity,unit,comment,preparation,ingredient_string,unit_tags,unit_remainders,unit_type,food_id,food_portion_id,gram_weight
recipe,ingredient,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1


In [16]:
assert ingredients_df[(ingredients_df['quantity'] != 0) & (ingredients_df['gram_weight'] == 0)].empty

In [17]:
ingredients_df[ingredients_df['gram_weight'].isna() & ingredients_df['quantity'].notna()].join(food_portion_df['description'], on=['food_id', 'food_portion_id'])

Unnamed: 0_level_0,Unnamed: 1_level_0,name.name,name.description,quantity,unit,comment,preparation,ingredient_string,unit_tags,unit_remainders,unit_type,food_id,food_portion_id,gram_weight,description
recipe,ingredient,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1828339,3,long grain brown rice,long grain brown rice,175.0,g,,,175 g long grain brown rice,[gram],[],weight,,,,
1828339,6,dried apricot,dried apricot,100.0,g,,finely chopped,"100 g dried apricots, finely chopped",[gram],[],weight,,,,
1828339,7,sultana,sultana,50.0,g,,,50 g sultanas,[gram],[],weight,,,,
1703,3,mincemeat,mincemeat,1.0,jar,,,1 jar mincemeat,[],[jar],portion,,,,
1262123,0,linguine,linguine,4.0,oz,,,4 oz linguine,[ounce],[],weight,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
599284,1,potato,frozen shredded hash brown potato,1.0,lb,,completely thawed,"1 lb. frozen shredded hash brown potatoes, com...",[pound],[],weight,,,,
1357213,3,cinnamin,cinnamin,0.5,tsps,,,1/2 tsp. cinnamin,[teaspoon],[],volume,,,,
2006319,2,raspberry,raspberry,8.0,ounces,,,8 ounces raspberries,[ounce],[],weight,,,,
2006319,3,blackberry,blackberry,8.0,ounces,,,8 ounces blackberries,[ounce],[],weight,,,,


In [18]:
food_portion_df.loc[2343304.0]

Unnamed: 0_level_0,seq_num,amount,gram_weight,description,unit_tags,unit_remainders,unit_type,portion_amount,portion_unit
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
277580,13,1.0,33.0,small,[whole],[],portion,,
277581,14,1.0,45.0,medium,[whole],[],portion,,
277582,15,1.0,71.0,large,[whole],[],portion,,
277583,16,1.0,104.0,extra large,[whole],[extra],portion,,


# Weight Concentration

Ultimately we'll want to measure the concentration of ingredients in each recipe.

In [19]:
ingredient = ingredients_df.iloc[0]
ingredient

name.name                                           butter
name.description                          land lake butter
quantity                                             0.667
unit                                                  cups
comment                                               <NA>
preparation                                       softened
ingredient_string    2/3 cup Land O Lakes Butter, softened
unit_tags                                            [cup]
unit_remainders                                         []
unit_type                                           volume
food_id                                          2345703.0
food_portion_id                                   287267.0
gram_weight                                        149.408
Name: (1746116, 0), dtype: object

In [20]:
#| export
def get_weight_ratio(ingredient):
    recipe_id = ingredient.name[0]
    recipe_weight = ingredients_df.loc[recipe_id]['gram_weight'].sum()
    return ingredient['gram_weight'] / recipe_weight

In [21]:
get_weight_ratio(ingredient)

0.08011411847854065

In [22]:
ingredients_df['weight_ratio'] = ingredients_df.progress_apply(get_weight_ratio, axis=1).astype('Float64')

  0%|                                                                                                                                                                                                                                                | 0/2450 [00:00<?, ?it/s]

 19%|██████████████████████████████████████████▋                                                                                                                                                                                         | 459/2450 [00:00<00:00, 4584.78it/s]

 38%|█████████████████████████████████████████████████████████████████████████████████████▊                                                                                                                                              | 922/2450 [00:00<00:00, 4610.25it/s]

 57%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                 | 1394/2450 [00:00<00:00, 4657.63it/s]

 76%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                      | 1863/2450 [00:00<00:00, 4667.11it/s]

 95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████           | 2332/2450 [00:00<00:00, 4675.10it/s]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2450/2450 [00:00<00:00, 4648.22it/s]




### Evaluating

In [23]:
ingredients_df['weight_ratio'].groupby('recipe').sum()

recipe
222        1.0
1703       1.0
8981       1.0
13596      1.0
17929      1.0
          ... 
2185822    1.0
2189489    1.0
2195357    1.0
2196831    1.0
2201833    1.0
Name: weight_ratio, Length: 302, dtype: Float64

In [24]:
_ = ingredients_df['weight_ratio'].groupby('recipe').sum().astype('float')
_[~np.isclose(_,np.full((len(_)), 1))]

recipe
480397    0.0
824709    0.0
Name: weight_ratio, dtype: float64

In [25]:
ingredients_df.loc[_.index[~np.isclose(_,np.full((len(_)), 1))]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name.name,name.description,quantity,unit,comment,preparation,ingredient_string,unit_tags,unit_remainders,unit_type,food_id,food_portion_id,gram_weight,weight_ratio
recipe,ingredient,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
480397,0,bisquick,bisquick,3.0,cups,,,3 cup Bisquick,[cup],[],volume,,,,
480397,1,hot sausage,hot sausage,1.0,lb,,,1 lb. hot sausage,[pound],[],weight,,,,
480397,2,sharp cheese,sharp cheese,1.0,lb,,,1 lb. sharp cheese,[pound],[],weight,,,,
824709,0,bread,bread,,,,,bread,[],[],portion,2343328.0,277706.0,,
824709,1,ground cinnamon,ground cinnamon,,,,,ground cinnamon,[],[],portion,172824.0,91502.0,,
824709,2,sugar,sugar,,,,,sugar,[],[],portion,2345817.0,287776.0,,
824709,3,margarine,squeeze margarine,,,,,squeeze margarine,[],[],portion,2345707.0,287304.0,,


We didn't get any weight measures for any of these recipes. Let's remove these.

In [26]:
ingredients_df = ingredients_df.drop(_.index[~np.isclose(_,np.full((len(_)), 1))])

  ingredients_df = ingredients_df.drop(_.index[~np.isclose(_,np.full((len(_)), 1))])


# Saving

In [27]:
ingredients_df['weight_ratio'].to_frame().to_feather('../../data/local/density/partial/weights/0.feather')

In [28]:
from nbdev import nbdev_export; nbdev_export()