# Introduction

The incomplete nutrition data from the branded product description is mostly summary data. One strategy to match-up the USDA data is to break the data into protein, carbohydrates, and vitamins, and to scale those components to best match the branded product. The result is a three scalar valued correction factor to be applied to the USDA description to best match the branded product description.



# To Do



In [3]:
# Import all of the helper libraries 

from scipy.optimize import minimize
from scipy.optimize import Bounds
from scipy.optimize import least_squares
import pandas as pd
import numpy as np
import os
import json
import requests
from math import e
import matplotlib.pyplot as plt
import seaborn as sns
from math import log10
from ipysheet import from_dataframe, to_dataframe
from decimal import *
#%matplotlib inline

In [4]:
#!pip install seaborn
#!pip install ipysheet
#!pip install ipywidgets

In [5]:
# Setup the notebook context

data_dir = '../data'
pd.set_option('max_columns', 70)


# Our Data

The [tables](https://docs.google.com/spreadsheets/d/1UOx5ZkNMjcwybx9TgFS_EaQnjbokqDx6K9NW0mRDGlg/edit#gid=6244197120) containing our ingredients nutrition profile are held in Google Sheets.
The sheet names are "Ingredients" and "Nutrition Profile"


In [9]:
# Download our nutrition profile data from Google Sheets
#!(cd ..; make sync_data_from_google)

nutrition_profile_df = pd.read_csv(f'{data_dir}/raw/nutrition_profile.csv', index_col=0, verbose=True).transpose()
ingredients_df = pd.read_csv(f'{data_dir}/raw/ingredients.csv', index_col=0, verbose=True).transpose()

# convert all values to float
for col in ingredients_df.columns:
    ingredients_df[col] = ingredients_df[col].astype(float)

#sheet = from_dataframe(ingredients_df)
#sheet

Tokenization took: 0.07 ms
Type conversion took: 0.22 ms
Parser memory cleanup took: 0.00 ms
Tokenization took: 0.01 ms
Type conversion took: 0.13 ms
Parser memory cleanup took: 0.00 ms


In [7]:
from ipysheet import sheet, cell, row, column, cell_range
from ipywidgets import interact, interactive, fixed, interact_manual
sheet1 = sheet(rows=3, columns=4)
sheet1

Sheet(layout=Layout(height='auto', width='auto'))

In [24]:
n = Decimal(1E0)
s = n * (Decimal(100)/Decimal(30)).quantize(Decimal('0.1'))
s.as_tuple()
#s.quantize(Decimal('0.1'))
#Decimal('1.41421356').quantize(Decimal('1.000'))
10 ** Decimal('1E0').as_tuple().exponent / 2.
#(10 ** 0) / 2.

0.5

# Branded Product Nutrition Definitions

The following dataframe holds each branded product nutrition data, preserving the precision from the food nutrition label. This allows the nutrition data to be scaled to a common 100g mass for comparison.

In [120]:
branded_nut = {
    'Serving Size (g)': [Decimal('30E0'), Decimal('40E0'), Decimal('33E0'), Decimal('28E0')],
    'Energy (kcal)': [Decimal('110E0'), Decimal('160E0'), Decimal('130E0'), Decimal('105E0')],
    'Total lipid (fat) (g)': [Decimal('1E0'), Decimal('3E0'), Decimal('0.5E0'), Decimal('2E0')],
    'Fatty acids, total polyunsaturated (g)': [Decimal('1E0'), np.NaN, np.NaN, np.NaN],
    'Carbohydrate, by difference (g)': [Decimal('23E0'), Decimal('26E0'), Decimal('27E0'), Decimal('0E0')],
    'Fiber, total dietary (g)': [Decimal('2E0'), Decimal('4E0'), Decimal('3E0'), Decimal('0E0')],
    'Sugars, total including NLEA (g)': [Decimal('1E0'), Decimal('0E0'), Decimal('0E0'), Decimal('0E0')],
    'Protein (g)': [Decimal('3E0'), Decimal('7E0'), Decimal('3E0'), Decimal('23E0')],
    'Calcium, Ca (mg)': [Decimal('20E0'), Decimal('20E0'), Decimal('9E0'), Decimal('52E0')],
    'Iron, Fe (mg)': [Decimal('4E-1'), Decimal('1.8E0'), Decimal('1E0'), Decimal('1.3E0')],
    'Potassium, K (mg)': [Decimal('95E0'), np.NaN, Decimal('108E0'), np.NaN],
    'Sodium, Na (mg)': [np.NaN, np.NaN, np.NaN, Decimal('336E0')],
    'Folic acid (µg)': [Decimal('40E0'), np.NaN, np.NaN, np.NaN],
    'Folate, DFE (µg)': [Decimal('66E0'), np.NaN, np.NaN, np.NaN],
}
branded_name = ['maseca white', 'oat flour', 'barley flour', 'pea protein']
branded = pd.DataFrame.from_dict(branded_nut, orient='index', columns=branded_name)
branded 


Unnamed: 0,maseca white,oat flour,barley flour,pea protein
Serving Size (g),30.0,40.0,33.0,28.0
Energy (kcal),110.0,160.0,130.0,105.0
Total lipid (fat) (g),1.0,3.0,0.5,2.0
"Fatty acids, total polyunsaturated (g)",1.0,,,
"Carbohydrate, by difference (g)",23.0,26.0,27.0,0.0
"Fiber, total dietary (g)",2.0,4.0,3.0,0.0
"Sugars, total including NLEA (g)",1.0,0.0,0.0,0.0
Protein (g),3.0,7.0,3.0,23.0
"Calcium, Ca (mg)",20.0,20.0,9.0,52.0
"Iron, Fe (mg)",0.4,1.8,1.0,1.3


In [115]:
# Scale a branded product along with the provided precision to a standard 100g sample mass
def compare_brand_usda(branded_name, usda_name):
    product = branded[branded_name][branded[branded_name].notnull()]
    serv_size = product['Serving Size (g)']
    getcontext().prec = 6
    br_min = product.apply(lambda x: (x - Decimal(10 ** x.as_tuple().exponent / 2.)) * Decimal(100)/serv_size)
    br_max = product.apply(lambda x: (x + Decimal(10 ** x.as_tuple().exponent / 2.)) * Decimal(100)/serv_size)

    jmi = ing_df.loc[usda_name].align(br_min,join='inner')
    jmx = ing_df.loc[usda_name].align(br_max,join='inner')
    jtrg = ing_df.loc[usda_name].align(product.apply(lambda x: x * Decimal(100)/serv_size))
    jcom = pd.DataFrame((jmi[0], jmi[1], jmx[1], jtrg[1]), ['usda','min','max','target']).transpose()
    jcom['pct_diff'] = jcom.apply(lambda x: (Decimal(x['target']) - Decimal(x['usda'])) / Decimal(x['usda']) * Decimal(100), axis=1)
    return jcom[jcom.apply(lambda x: x['usda'] < x['min'] or x['usda'] > x['max'], axis=1)]



In [91]:
ingredient_map = {
    'wheat bran': '785757',
    'oat flour': '169741',
    'corn flour': '169696',
    'whey protein isolate': '173177',
    'soy protein isolate': '174276',
    'lecithin': '171426',
    'canola oil': '172336',
    'barley flour': '169739',
    'flaxseed': '169414',
    'guar gum': '169045',
    'xanthan gum': '563344',
}

def get_fdc(fdc_id):
    key = 'ranhDhgRXsjucJQHSk9zMeX2XSS02iUQvzDBAvpG'
    payload={'api_key': key}
    url = f'https://api.nal.usda.gov/fdc/v1/food/{fdc_id}'
    r = requests.get(url, params=payload)
    def nutrient_decode(el):
        if 'nutrient' in el:
            if 'amount' in el:
                return((f'{el["nutrient"]["name"]} ({el["nutrient"]["unitName"]})', el['amount']))
            else:
                return(None)
        return(el)

    res = json.loads(r.text, object_hook=nutrient_decode)
    return pd.Series(dict(filter(lambda el: el is not None, res['foodNutrients'])), name=res['description'])
    
ing_list = []
for (ing_name, fdc_id) in ingredient_map.items():
    ing_list.append(get_fdc(fdc_id))
ing_df = pd.DataFrame(ing_list)
updated_ingredients = ing_df.copy()


# Merged Branded Products With USDA Data

The base USDA definition is scaled and updaded with data taken from the branded products. Some branded products fortify with vitamins or minerals. The following steps are based on the vendor provied data and evidence

In [92]:
#
# Corn Masa Flour see: bit.ly/3e55oMx
#
c = compare_brand_usda('maseca white', 'Corn flour, masa, unenriched, white')
c
updated_ingredients.loc['Corn flour, masa, unenriched, white']['Folate, DFE (µg)'] = c.loc['Folate, DFE (µg)']['target']
updated_ingredients.loc['Corn flour, masa, unenriched, white']['Folic acid (µg)'] = c.loc['Folic acid (µg)']['target']
updated_ingredients.loc['Corn flour, masa, unenriched, white']['Calcium, Ca (mg)'] = c.loc['Calcium, Ca (mg)']['target']
updated_ingredients.loc['Corn flour, masa, unenriched, white']['Potassium, K (mg)'] = c.loc['Potassium, K (mg)']['target']


In [101]:
#
# Oat Flour
#
c = compare_brand_usda('oat flour', 'Oat flour, partially debranned')
c
updated_ingredients.loc['Oat flour, partially debranned']['Protein (g)'] = c.loc['Protein (g)']['target']
updated_ingredients.loc['Oat flour, partially debranned']['Total lipid (fat) (g)'] = c.loc['Total lipid (fat) (g)']['target']
updated_ingredients.loc['Oat flour, partially debranned']['Fiber, total dietary (g)'] = c.loc['Fiber, total dietary (g)']['target']
updated_ingredients.loc['Oat flour, partially debranned']['Calcium, Ca (mg)'] = c.loc['Calcium, Ca (mg)']['target']
updated_ingredients.loc['Oat flour, partially debranned']['Iron, Fe (mg)'] = c.loc['Iron, Fe (mg)']['target']

In [117]:
#
# Barley Flour: See for details: https://www.bobsredmill.com/barley-flour.html
# 
# The BRM Carbohydrate is 9.8% greater than the USDA definition. Assuming the BRM contains more endosperm content
# The BRM Energy is 14% greater than the USDA definition. Assuming that is due to the above

c = compare_brand_usda('barley flour', 'Barley flour or meal')
c
updated_ingredients.loc['Barley flour or meal']['Carbohydrate, by difference (g)'] = c.loc['Carbohydrate, by difference (g)']['target']
updated_ingredients.loc['Barley flour or meal']['Energy (kcal)'] = c.loc['Energy (kcal)']['target']
updated_ingredients.loc['Barley flour or meal']['Calcium, Ca (mg)'] = c.loc['Calcium, Ca (mg)']['target']
updated_ingredients.loc['Barley flour or meal']['Potassium, K (mg)'] = c.loc['Potassium, K (mg)']['target']


In [None]:
#
# Pea Protein: See for details: https://nuts.com/cookingbaking/powders/protein-powders/pea-protein-powder.html
# 
# The BRM Carbohydrate is 9.8% greater than the USDA definition. Assuming the BRM contains more endosperm content
# The BRM Energy is 14% greater than the USDA definition. Assuming that is due to the above
# References:
# https://www.nowfoods.com/sports-nutrition/pea-protein-organic-powder
# https://www.onnit.com/academy/pea-protein-powder/

c = compare_brand_usda('pea protein', 'Barley flour or meal')
c
updated_ingredients.loc['Barley flour or meal']['Carbohydrate, by difference (g)'] = c.loc['Carbohydrate, by difference (g)']['target']

In [None]:
'''
o Lecithin (g) - http://www.bulkfoods.com/health-food-distributor/4040-lecithin-powder-22-pounds.html
o Canola Oil (g)
Xanthan Gum (g)
Guar Gum (g)
Salt (g)
P. Chloride (g)
 Ascorbic Acid (g)
Vit K (pill)
Multi vit (pill)
Calcium + D (pill)
Choline Bitartrate
- Asprin (pill)
- Beano (pill)
Flax Seed Meal (g)
Wheat Bran (g) - http://www.bulkfoods.com/wheat-grain-distributor/1804-wheat-bran-20-pounds.html
o Whey Protein (g) - http://www.bulkfoods.com/protain-powder-distributor/543-Whey-Protein-Concentrate-25-pounds.html
o Pea Protein (g) - https://nuts.com/cookingbaking/powders/protein-powders/pea-protein-powder.html
o Soy Protein (g) - http://www.bulkfoods.com/protain-powder-distributor/2996-soy-isolate-90-percent-protein-25-pounds.html
x Oat Flour (g) - https://www.amazon.com/Bobs-Red-Mill-Gluten-Flour/dp/B0121EDPTK
x Barley Flour (g) - https://www.bobsredmill.com/barley-flour.html
x Corn Flour (g)

key: x: complete, o: need to create update entry, -: ignore
'''

In [102]:
#sorted(ing_df.loc['Corn flour, masa, unenriched, white'].index)
#print(str(sorted(ing_df.columns)))
ing_df

Unnamed: 0,Protein (g),Total lipid (fat) (g),"Carbohydrate, by difference (g)",Energy (kcal),"Alcohol, ethyl (g)",Water (g),Caffeine (mg),Theobromine (mg),"Sugars, total including NLEA (g)","Fiber, total dietary (g)","Calcium, Ca (mg)","Iron, Fe (mg)","Magnesium, Mg (mg)","Phosphorus, P (mg)","Potassium, K (mg)","Sodium, Na (mg)","Zinc, Zn (mg)","Copper, Cu (mg)","Selenium, Se (µg)",Retinol (µg),"Vitamin A, RAE (µg)","Carotene, beta (µg)","Carotene, alpha (µg)",Vitamin E (alpha-tocopherol) (mg),Vitamin D (D2 + D3) (µg),"Cryptoxanthin, beta (µg)",Lycopene (µg),Lutein + zeaxanthin (µg),"Vitamin C, total ascorbic acid (mg)",Thiamin (mg),Riboflavin (mg),Niacin (mg),Vitamin B-6 (mg),"Folate, total (µg)",Vitamin B-12 (µg),...,"Tocopherol, beta (mg)","Tocopherol, gamma (mg)","Tocopherol, delta (mg)","Tocotrienol, alpha (mg)","Tocotrienol, beta (mg)","Tocotrienol, gamma (mg)","Tocotrienol, delta (mg)",Vitamin K (Dihydrophylloquinone) (µg),Vitamin K (Menaquinone-4) (µg),15:0 (g),17:0 (g),20:0 (g),22:0 (g),14:1 (g),15:1 (g),17:1 (g),18:1 c (g),"18:2 n-6 c,c (g)","18:3 n-3 c,c,c (ALA) (g)","18:3 n-6 c,c,c (g)","20:2 n-6 c,c (g)",20:3 (g),"Fatty acids, total trans-monoenoic (g)",18:1 t (g),"18:2 t,t (g)","Fatty acids, total trans-polyenoic (g)",Stigmasterol (mg),Campesterol (mg),Beta-sitosterol (mg),Lactose (g),Maltose (g),Galactose (g),24:0 (g),24:1 c (g),Hydroxyproline (g)
"Wheat bran, unprocessed",15.55,4.25,64.51,216.0,0.0,9.89,0.0,0.0,0.41,42.8,73.0,10.57,611.0,1013.0,1182.0,2.0,7.27,0.998,77.6,0.0,0.0,6.0,0.0,1.49,0.0,0.0,0.0,240.0,0.0,0.523,0.577,13.578,1.303,79.0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Oat flour, partially debranned",14.66,9.12,65.7,404.0,0.0,8.55,0.0,0.0,0.8,6.5,55.0,4.0,144.0,452.0,371.0,19.0,3.2,0.437,34.0,0.0,0.0,0.0,0.0,0.7,0.0,0.0,0.0,180.0,0.0,0.692,0.125,1.474,0.125,32.0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Corn flour, masa, unenriched, white",8.46,3.69,76.59,363.0,0.0,9.79,0.0,0.0,1.61,6.4,138.0,1.47,93.0,231.0,262.0,5.0,1.8,0.209,10.5,0.0,0.0,2.0,0.0,0.12,0.0,2.0,0.0,6.0,0.0,0.223,0.097,1.634,0.475,29.0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Beverages, Whey protein powder isolate",58.14,1.16,29.07,359.0,0.0,0.86,0.0,0.0,1.16,0.0,698.0,1.26,233.0,581.0,872.0,372.0,8.72,1.163,40.7,872.0,872.0,1.0,0.0,7.85,0.0,0.0,0.0,0.0,34.9,0.872,0.988,11.628,1.163,233.0,3.49,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Soy protein isolate,88.32,3.39,0.0,335.0,0.0,4.98,0.0,0.0,0.0,0.0,178.0,14.5,39.0,776.0,81.0,1005.0,4.03,1.599,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.176,0.1,1.438,0.1,176.0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Oil, soybean lecithin",0.0,100.0,0.0,763.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Oil, canola",0.0,100.0,0.0,884.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.46,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,27.34,0.99,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.65,0.33,0.0,0.0,0.0,61.714,18.64,9.137,0.0,0.0,0.0,0.03,0.03,0.365,0.365,3.0,241.0,413.0,,,,,,
Barley flour or meal,10.5,1.6,74.52,345.0,0.0,12.11,0.0,0.0,0.8,10.1,32.0,2.68,96.0,296.0,309.0,4.0,2.0,0.343,37.7,0.0,0.0,0.0,0.0,0.57,0.0,0.0,0.0,160.0,0.0,0.37,0.114,6.269,0.396,8.0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Seeds, flaxseed",18.29,42.16,28.88,534.0,0.0,6.96,0.0,0.0,1.55,27.3,255.0,5.73,392.0,642.0,813.0,30.0,4.34,1.22,25.4,0.0,0.0,0.0,0.0,0.31,0.0,0.0,0.0,651.0,0.6,1.644,0.161,3.08,0.473,87.0,0.0,...,0.0,19.95,0.35,0.0,0.0,0.0,0.0,0.0,,0.005,0.018,0.052,0.052,,,,,,,,0.007,,,,,,11.0,45.0,90.0,0.0,0.0,0.0,0.031,0.064,0.175
"Gums, seed gums (includes locust bean, guar)",4.6,0.5,77.3,332.0,0.0,15.0,0.0,0.0,0.0,77.3,294.0,0.0,0.0,0.0,0.0,125.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
d = {'a':'b'}
d.items()
l = [d]
list(d.keys())[0]
tuple(d)
tuple(d.items())

# Problem Setup

Let's cast our data into the from $\vec{y} = A \vec{x} + \vec{b}$ where $A$ is our ingredients data, $\vec{x}$ is the quantity of each ingredient for our recipe, and $\vec{b}$ is the nutrition profile.
The problem to be solved is to find the quantity of each ingredient which will optimally satisfy the nutrition profile, or in our model, to minimize: $|A \vec{x} - \vec{b}|$.

There are some nutrients we only want to track, but not optimize. For example, we want to know how much cholesterol is contained in our recipe, but we don't want to constrain our result to obtain a specific amount of cholesterol as a goal. The full list of ingredients are named: A_full, and b_full. The values to optimized are named: A and b

In [None]:
b_full = nutrition_profile_df
A_full = ingredients_df.transpose()
A = ingredients_df.transpose()[nutrition_profile_df.loc['Report Only'] == False]

b_full = nutrition_profile_df.loc['Target']
b = nutrition_profile_df.loc['Target'][nutrition_profile_df.loc['Report Only'] == False]
ul = nutrition_profile_df.loc['UL'][nutrition_profile_df.loc['Report Only'] == False]
rdi = nutrition_profile_df.loc['RDI'][nutrition_profile_df.loc['Report Only'] == False]
weight = nutrition_profile_df.loc['Weight'][nutrition_profile_df.loc['Report Only'] == False]
ul_full = nutrition_profile_df.loc['UL']
rdi_full = nutrition_profile_df.loc['RDI']

In [None]:
weight

# Least Squares Optimization
Start with a simple solver using a least squares optimizer


In [None]:
# Using the scipy least_squares optimizer
# Define a residual function: if y = A x + b -> A x - b is the residual function
k_thres = 0.7
k_rdi = b - ((rdi - b) * k_thres)
k_ul =  b + ((ul - b) * k_thres)

def limit_scale(df):
    r = ((df['y'] - df['b']) /  df['b']) * df['weight']
    k_thres = 0.7
    if df['y'] < df['k_rdi']:
        r *= 10
        if df['y'] < df['rdi']:
            r *= 10
    elif df['y'] >= df['b']:
        if df['k_ul'] is np.NaN:
            return(0.0)
        elif df['y'] > df['k_ul']:
            r *= 10
        if df['y'] > df['ul']:
            r *= 10
    return(r)

scale_limits = pd.DataFrame({'rdi': rdi, 'k_rdi': k_rdi, 'b': b, 'k_ul': k_ul, 'ul': ul, 'weight': weight})

def residuals(x, *args, **kwargs):
    y0 = A.dot(x.transpose())
    res0 = pd.concat([scale_limits, pd.Series(y0,scale_limits.index, name='y')], axis=1)
    res1 = res0.apply(limit_scale, axis=1)
    return(res1)

In [None]:
# Constrain ingredients before the optimization process. Many of the ingredients are required for non-nutritional purposes 
# or are being limited to enhance flavor

bounds_df = pd.DataFrame(index=ingredients_df.index, data={'lower': 0, 'upper': np.inf})             
bounds_df.loc['Guar gum'] = [0.9 * .01, 0.9 * .01 + .0001]
bounds_df.loc['Xanthan Gum'] = [0.9 * .01, 0.9 * .01 + .0001]
bounds_df.loc['Alpha-galactosidase enzyme (Beano)'] = [1.0, 1.0 + .0001]
bounds_df.loc['Multivitamin'] = [1.0, 1.0 + .0001]
bounds_df.loc['Corn flour, nixtamalized'] = [0, 1.0]
bounds_df.loc['Whey protein'] = [0,0.15]
bounds_df.loc['Ascorbic acid'] = [0.01, 0.01 + .0001]
lower = list(bounds_df.lower.values)
upper = list(bounds_df.upper.values)

# x0 is our initial guess at a solution
x0 = np.array(lower)

len(lower), len(upper)

In [None]:
solution = least_squares(residuals, x0, jac='2-point', bounds=(lower, upper), method='trf', ftol=1e-08, xtol=1e-08, gtol=1e-08, x_scale=1.0, loss='linear', f_scale=1.0, diff_step=None, tr_solver=None, tr_options={}, jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={})
solution.success, solution.x

In [None]:
A_full.dot(solution.x)

In [None]:
# Scale the ingredient nutrient amounts for the given quantity of each ingredient given by the optimizer
solution_df = A_full.transpose().mul(solution.x, axis=0) # Scale each nutrient vector per ingredient by the amount of the ingredient
solution_df.insert(0, 'Quantity (g)', solution.x * 100) # Scale to 100 g since that is basis for the nutrient quantities

# Add a row showing the sum of the scaled amount of each nutrient
total = solution_df.sum()
total.name = 'Total'
solution_df = solution_df.append(total)

In [None]:
# Plot the macro nutrient profile
# The ratio of Calories for protein:carbohydrates:fat is 4:4:9 kcal/g
pc = solution_df['Protein (g)']['Total'] * 4.0
cc = solution_df['Carbohydrates (g)']['Total'] * 4.0
fc = solution_df['Total Fat (g)']['Total'] * 9.0
tc = pc + cc + fc
p_pct = int(round(pc / tc * 100))
c_pct = int(round(cc / tc * 100))
f_pct = int(round(fc / tc * 100))
(p_pct, c_pct, f_pct)
# create data
names=f'Protein {p_pct}%', f'Carbohydrates {c_pct}%', f'Fat {f_pct}%', 
size=[p_pct, c_pct, f_pct]
 
fig = plt.figure(figsize=(10, 5))

fig.add_subplot(1,2,1)
# Create a circle for the center of the plot
my_circle=plt.Circle( (0,0), 0.5, color='white')

# Give color names
cmap = plt.get_cmap('Spectral')
sm = plt.cm.ScalarMappable(cmap=cmap)
colors = ['yellow','orange','red']
plt.pie(size, labels=names, colors=colors)

#p=plt.gcf()
#p.gca().add_artist(my_circle)
fig.gca().add_artist(my_circle)
#plt.show()

fig.add_subplot(1,2,2)
barWidth = 1
fs = [solution_df['Soluble Fiber (g)']['Total']]
fi = [solution_df['Insoluble Fiber (g)']['Total']]
plt.bar([0], fs, color='red', edgecolor='white', width=barWidth)
plt.bar([0], fi, bottom=fs, color='yellow', edgecolor='white', width=barWidth, label=['Insoluble Fiber (g)'])
 
# Also show the Omega-3, Omega-6 ratio
# Saturated:Monounsaturated:Polyunsaturated ratios

In [None]:
# Prepare data as a whole for plotting by normalizing and scaling
amounts = solution_df
total = A_full.dot(solution.x) #solution_df.loc['Total']

# Normalize as a ratio beyond RDI
norm = (total) / rdi_full
norm_ul = (ul_full) / rdi_full

nuts = pd.concat([pd.Series(norm.values, name='value'), pd.Series(norm.index, name='name')], axis=1)

In [None]:
# Setup categories of nutrients and a common plotting function
vitamins = ['Vitamin A (IU)','Vitamin B6 (mg)','Vitamin B12 (ug)','Vitamin C (mg)','Vitamin D (IU)',
            'Vitamin E (IU)','Vitamin K (ug)','Thiamin (mg)','Riboflavin (mg)','Niacin (mg)','Folate (ug)','Pantothenic Acid (mg)','Biotin (ug)','Choline (mg)']
minerals = ['Calcium (g)','Chloride (g)','Chromium (ug)','Copper (mg)','Iodine (ug)','Iron (mg)',
            'Magnesium (mg)','Manganese (mg)','Molybdenum (ug)','Phosphorus (g)','Potassium (g)','Selenium (ug)','Sodium (g)','Sulfur (g)','Zinc (mg)']
essential_aminoacids = ['Cystine (mg)','Histidine (mg)','Isoleucine (mg)','Leucine (mg)','Lysine (mg)',
                        'Methionine (mg)','Phenylalanine (mg)','Threonine (mg)','Tryptophan (mg)','Valine (mg)']
other_aminoacids = ['Tyrosine (mg)','Arginine (mg)','Alanine (mg)','Aspartic acid (mg)','Glutamic acid (mg)','Glycine (mg)','Proline (mg)','Serine (mg)','Hydroxyproline (mg)']

def plot_group(nut_names, title):
    nut_names_short = [s.split(' (')[0] for s in nut_names] # Snip off the units from the nutrient names
    
    # Create a bar to indicate an upper limit 
    ul_bar = (norm_ul * 1.04)[nut_names]
    ul_bar[ul_full[nut_names].isnull() == True] = 0
    
    # Create a bar to mask the UL bar so just the end is exposed
    ul_mask = norm_ul[nut_names]
    ul_mask[ul_full[nut_names].isnull() == True] = 0
    
    
    n = []  # normalized values for each bar
    for x, mx in zip(norm[nut_names], ul_mask.values):
        if mx == 0: # no upper limit
            if x < 1.0:
                n.append(1.0 - (x / 2.0))
            else:
                n.append(0.50)
        else:
            n.append(1.0 - (log10(x) / log10(mx)))
    clrs = sm.to_rgba(n, norm=False)
    
    g = sns.barplot(x=ul_bar.values, y=nut_names_short, color='red')
    g.set_xscale('log')
    sns.barplot(x=ul_mask.values, y=nut_names_short, color='white')
    bax = sns.barplot(x=norm[nut_names], y=nut_names_short, label="Total", palette=clrs)
    
    # Add a legend and informative axis label
    g.set( ylabel="",xlabel="Nutrient Mass / RDI (Red Band is UL)", title=title)

    #sns.despine(left=True, bottom=True)

In [None]:
# Construct a group of bar charts for each nutrient group

# Setup the colormap for each bar
cmap = plt.get_cmap('Spectral')
sm = plt.cm.ScalarMappable(cmap=cmap)

#fig = plt.figure(figsize=plt.figaspect(3.))
fig = plt.figure(figsize=(20, 20))
fig.add_subplot(4, 1, 1)
plot_group(vitamins,'Vitamin amounts relative to RDI')
fig.add_subplot(4, 1, 2)
plot_group(minerals,'Mineral amounts relative to RDI')
fig.add_subplot(4, 1, 3)
plot_group(essential_aminoacids,'Essential amino acid amounts relative to RDI')
fig.add_subplot(4, 1, 4)
plot_group(other_aminoacids,'Other amino acid amounts relative to RDI')

#fig.show()
fig.tight_layout()

In [None]:
solution_df