# RDI tables in MySQL are of type float64

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:100% !important;}</style>"))
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


In [2]:
from sqlalchemy import create_engine
rdi_engine = create_engine("mysql+pymysql://root:tennis33@localhost/rdi_db?charset=utf8mb4")

In [3]:
import glob
filenames_arr = glob.glob('rdi_csv/*csv')
print(filenames_arr)
names_arr = [x.split("/")[1].replace(".csv", "") for x in filenames_arr]
print(names_arr)

['rdi_csv/elements_upper_rdi.csv', 'rdi_csv/vitamins_upper_rdi.csv', 'rdi_csv/macronutrients_rdi.csv', 'rdi_csv/elements_rdi.csv', 'rdi_csv/macronutrients_dist_range.csv', 'rdi_csv/vitamins_rdi.csv']
['elements_upper_rdi', 'vitamins_upper_rdi', 'macronutrients_rdi', 'elements_rdi', 'macronutrients_dist_range', 'vitamins_rdi']


In [5]:
import pandas as pd
df_arr = [pd.read_csv(x) for x in filenames_arr]
df_dict = dict(zip(names_arr, df_arr))
for k,v in df_dict.items():
    print(f'{k}\n{v}')
    

elements_upper_rdi
   Life-Stage Group    Arsenic Boron (mg/d) Calcium (mg/d)   Chromium  \
0           Infants    Infants      Infants        Infants    Infants   
1            < 6 mo         ND           ND           1000         ND   
2           < 12 mo         ND           ND           1500         ND   
3          Children   Children     Children       Children   Children   
4          1 to 3 y         ND            3           2500         ND   
5          4 to 8 y         ND            6           2500         ND   
6             Males      Males        Males          Males      Males   
7         9 to 13 y         ND           11           3000         ND   
8        14 to 18 y         ND           17          3 000         ND   
9        19 to 30 y         ND           20           2500         ND   
10       31 to 50 y         ND           20           2500         ND   
11         51 to 70         ND           20           2000         ND   
12           > 70 y         ND  

In [8]:
#upload to mysql
for name,df in df_dict.items():
    df.to_sql(name, con=rdi_engine, if_exists='replace', chunksize=1000, index=False)

In [6]:
# fix alpha symbol in macronutrients_dist_range
mac_dist_df = df_dict['macronutrients_dist_range']
new_cols = [x.replace("\u03B1", "alpha") for x in mac_dist_df.columns.tolist()]
print(new_cols)
mac_dist_df.columns = new_cols
print(mac_dist_df.columns)
df_dict['macronutrients_dist_range'] = mac_dist_df
#upload


['Life-Stage Group', 'Fat', 'Linoleic Acid', 'alpha-Linolenic Acid', 'Carbohydrate', 'Protein']
Index(['Life-Stage Group', 'Fat', 'Linoleic Acid', 'alpha-Linolenic Acid',
       'Carbohydrate', 'Protein'],
      dtype='object')


In [14]:
#upload
mac_dist_df.to_sql('macronutrients_dist_range', con=engine, if_exists='replace', chunksize=1000,
                  index=False)

In [16]:
#change to type str
for k,df in df_dict.items():
    df_dict[k] = df.astype(str)
for k,v in df_dict.items():
    print(f'{v.dtypes}\n{k}\n{v}')
    

Life-Stage Group     object
Arsenic              object
Boron (mg/d)         object
Calcium (mg/d)       object
Chromium             object
Copper (ug/d)        object
Fluoride (mg/d)      object
Iodine (ug/d)        object
Iron (mg/d)          object
Magnesium (mg/d)     object
Manganese (mg/d)     object
Molybdenum (ug/d)    object
Nickel (mg/d)        object
Phosphorus (g/d)     object
Potassium            object
Selenium (ug/d)      object
Silicon              object
Sulfate              object
Vanadium (mg/d)      object
Zinc (mg/d)          object
Sodium               object
Chloride (g/d)       object
dtype: object
elements_upper_rdi
   Life-Stage Group Arsenic Boron (mg/d) Calcium (mg/d) Chromium  \
0           Infants     nan          nan            nan      nan   
1            < 6 mo     nan          nan         1000.0      nan   
2           < 12 mo     nan          nan         1500.0      nan   
3          Children     nan          nan            nan      nan   
4          

### Waypoint: connect to Mongo for CNF

In [8]:
from mongoengine import connect
connect('cnf')

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary())

In [9]:
from mongo_models import (
    CNFFoodName, CNFConversionFactor, CNFNutrientAmount,
    CNFYieldAmount, CNFRefuseAmount, CNFNutrientName
)



In [11]:
# test mongo
q='bean'
foods = CNFFoodName.objects.filter(description__icontains=q) if q else []
for food in foods:
    print(food.description, type(food))

Vegetable oil, soybean <class 'mongo_models.CNFFoodName'>
Vegetable oil, soybean lecithin <class 'mongo_models.CNFFoodName'>
Shortening, household, soybean (hydrogenated) and palm <class 'mongo_models.CNFFoodName'>
Shortening, multipurpose, hydrogenated soybean and palm <class 'mongo_models.CNFFoodName'>
Shortening, industrial, for frying (liquid), soybean <class 'mongo_models.CNFFoodName'>
Shortening, industrial, for frying (solid), soybean <class 'mongo_models.CNFFoodName'>
Shortening, industrial, for baking (cake), soybean <class 'mongo_models.CNFFoodName'>
Shortening, industrial, for baking (pastry), soybean <class 'mongo_models.CNFFoodName'>
Soup, bean with bacon, canned, condensed <class 'mongo_models.CNFFoodName'>
Soup, bean with ham, ready-to-serve <class 'mongo_models.CNFFoodName'>
Soup, bean with bacon, dehydrated <class 'mongo_models.CNFFoodName'>
Soup, bean with bacon, canned, condensed, water added <class 'mongo_models.CNFFoodName'>
Soup, bean with bacon, dehydrated, water

### get nutrients dict from make_meals_callbacks.py and match with RDI,




In [14]:
from dash_utils.Shiny_utils import (rdi_nutrients, make_food_to_id_dict, get_unit_names,
                                         make_foodgroup_df, make_conversions_df, make_nutrients_df,
                                         get_conversions_multiplier, mult_nutrients_df)
# used in layout for datalist
food_to_id_dict, food_names_arr, food_ids_arr = make_food_to_id_dict()

# dict of cnf nutrient names: nutrient units
nutrients = CNFNutrientName.objects
cnf_nutr_dict = {}
cnf_nutrient_names_all = []
cnf_nutrient_units_all = []
for n in nutrients:
    cnf_nutr_dict[str(n.name)] = str(n.unit)
    cnf_nutrient_names_all.append(str(n.name))
    cnf_nutrient_units_all.append(str(n.unit))

assert len(cnf_nutrient_names_all) == len(cnf_nutrient_units_all)
num_values = len(cnf_nutrient_names_all)
# make a base nutrients dataframe to cumulate into
nuts_totals_dict = {
    "Name": cnf_nutrient_names_all,
    "Value": ["0" for i in range(num_values)], # 0 should be str
    "Units": cnf_nutrient_units_all
}

# Micro "u" is \xb5     with g tacked on

In [15]:
# check units
cnf_units_arr = cnf_nutrient_units_all

cnf_units_arr = [x.replace("\xb5g", "ug") for x in cnf_nutrient_units_all]
cnf_units_arr = [x.replace(x, f'{x}/d') for x in cnf_units_arr]
print(len(cnf_units_arr))
print(cnf_units_arr)


152
['g/d', 'g/d', 'g/d', 'g/d', 'kCal/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'mg/d', 'g/d', 'g/d', 'g/d', 'mg/d', 'mg/d', 'kJ/d', 'g/d', 'g/d', 'g/d', 'mg/d', 'mg/d', 'mg/d', 'mg/d', 'mg/d', 'mg/d', 'mg/d', 'mg/d', 'mg/d', 'ug/d', 'ug/d', 'ug/d', 'ug/d', 'ug/d', 'mg/d', 'IU/d', 'ug/d', 'ug/d', 'ug/d', 'ug/d', 'ug/d', 'mg/d', 'mg/d', 'mg/d', 'mg/d', 'mg/d', 'mg/d', 'mg/d', 'NE/d', 'mg/d', 'mg/d', 'ug/d', 'ug/d', 'ug/d', 'mg/d', 'ug/d', 'ug/d', 'ug/d', 'ug/d', 'mg/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'mg/d', 'mg/d', 'ug/d', 'mg/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'mg/d', 'mg/d', 'mg/d', 'mg/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g/d', 'g

In [18]:
#dict of {cnf_nut: cnf_units}
cnf_arr = []
for nut in nuts_totals_dict['Name']:
    cnf_arr.append(nut.lower())
#print(len(cnf_arr))
#print(cnf_arr)
cnf_nuts_dict = dict(zip(cnf_arr, cnf_units_arr))
#for nut, units in cnf_nuts_dict.items():
#for nut, units in cnf_nuts_dict.items():
 #   print(f'{nut}:{units}')

with_units_arr = []
for nut, unit in cnf_nuts_dict.items():
    if nut=="fat (total lipids)":
        nut = 'fat'
    elif nut == "carbohydrate, total (by difference)":
        nut = 'carbohydrate'
    elif nut=="fibre, total dietary":
        nut = 'fiber'
    elif nut == "fatty acids, polyunsaturated, 18:2undifferentiated, linoleic, octadecadienoic":
        nut = 'linoleic acid'
    elif nut == 'fatty acids, polyunsaturated, 18:3undifferentiated, linolenic, octadecatrienoic':
        nut = 'alpha-linolenic acid'
    elif nut == "vitamin d (d2 + d3)":
        nut = 'vitamin d'
    elif nut == "niacin (nicotinic acid) preformed":
        nut = 'niacin'
    elif nut == "dietary folate equivalents":
        nut = 'folate'
    elif nut == 'vitamin b12, added':
        nut = 'vitamin b12'
    elif nut == 'choline, total':
        nut = 'choline'
    elif nut == 'alpha-tocopherol':
        nut = 'vitamin e'
    elif nut == 'retinol':
        nut = 'vitamin a'
    elif nut == 'vitamin b-6':
        nut = 'vitamin b6'
    elif nut == 'vitamin b-12':
        nut = 'vitamin b12'
    with_units_arr.append(nut)

#for k,v in cnf_nuts_dict.items():
    #print(f'{k}:{v}')
#print(with_units_arr)
#print(nuts_totals_dict['Name'], len(nuts_totals_dict['Name']))
cnf_nuts_dict = dict(zip(with_units_arr, cnf_units_arr))


In [19]:
for nut, units in cnf_nuts_dict.items():
    print(f'{nut}:{units}')

protein:g/d
fat:g/d
carbohydrate:g/d
ash, total:g/d
energy (kilocalories):kCal/d
starch:g/d
sucrose:g/d
glucose:g/d
fructose:g/d
lactose:g/d
maltose:g/d
alcohol:g/d
oxalic acid:mg/d
moisture:g/d
mannitol:g/d
sorbitol:g/d
caffeine:mg/d
theobromine:mg/d
energy (kilojoules):kJ/d
sugars, total:g/d
galactose:g/d
fiber:g/d
calcium:mg/d
iron:mg/d
magnesium:mg/d
phosphorus:mg/d
potassium:mg/d
sodium:mg/d
zinc:mg/d
copper:mg/d
manganese:mg/d
selenium:ug/d
vitamin a:ug/d
retinol activity equivalents:ug/d
beta carotene:ug/d
alpha carotene:ug/d
vitamin e:mg/d
vitamin d (international units):IU/d
vitamin d2, ergocalciferol:ug/d
vitamin d:ug/d
beta cryptoxanthin:ug/d
lycopene:ug/d
lutein and zeaxanthin:ug/d
beta-tocopherol:mg/d
gamma-tocopherol:mg/d
delta-tocopherol:mg/d
vitamin c:mg/d
thiamin:mg/d
riboflavin:mg/d
niacin:mg/d
total niacin equivalent:NE/d
pantothenic acid:mg/d
vitamin b6:mg/d
biotin:ug/d
total folacin:ug/d
vitamin b12:ug/d
choline:mg/d
vitamin k:ug/d
folic acid:ug/d
naturally occurri

In [36]:
#rename to cnf_arr
#cnf_arr = with_units_arr

# Waypoint: Start here to get rdi tables from MySQL

In [1]:
#rdi df_dict, #read from sql since csv's have alpha symbol
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
import pymysql
rdi_engine = create_engine("mysql+pymysql://root:tennis33@localhost/rdi_db?charset=utf8mb4")#, echo=True)

print(rdi_engine.table_names())



['elements_rdi', 'elements_upper_rdi', 'females_calories', 'macronutrients_dist_range', 'macronutrients_rdi', 'males_calories', 'vitamins_rdi', 'vitamins_upper_rdi']


In [2]:
import pandas as pd
df_dict = {}
for name in rdi_engine.table_names():
    #if name != "macronutrients_dist_range" and name !=\
    #"elements_upper_rdi" and name != "vitamins_upper_rdi":
    sql = "SELECT * from " + name
    df = pd.read_sql_query(sql, rdi_engine)
    df_dict[name] = df
    print(f'{name}, {df.dtypes}\n{df}')



elements_rdi, Life-Stage Group     object
Calcium (mg/d)       object
Chromium (ug/d)      object
Copper (ug/d)        object
Fluoride (mg/d)      object
Iodine (ug/d)        object
Iron (mg/d)          object
Magnesium (mg/d)     object
Manganese (mg/d)     object
Molybdenum (ug/d)    object
Phosphorus (mg/d)    object
Selenium (ug/d)      object
Zinc (mg/d)          object
Potassium (mg/d)     object
Sodium (mg/d)        object
Chloride (g/d)       object
dtype: object
   Life-Stage Group Calcium (mg/d) Chromium (ug/d) Copper (ug/d)  \
0           Infants           None            None          None   
1            < 6 mo            200             0.2           200   
2           < 12 mo            260             5.5           220   
3          Children           None            None          None   
4          1 to 3 y            700              11           340   
5          4 to 8 y           1000              15           440   
6             Males           None            No

In [17]:
'''
#edit elements and elements upper tables so lifestage is not across cols, change to NaN
elems_rdi = df_dict['elements_rdi']
elems_upper = df_dict['elements_upper_rdi']

need_change_dict = {'elements_rdi':elems_rdi, 'elements_upper_rdi': elems_upper}

for name, df in need_change_dict.items():
    cols = list(df.columns)
    key_cols = cols[1:]
    for idx, row in df.iterrows():
        for col in key_cols:
            val = df.loc[idx, col]
            if val in ['Infants', 'Children', 'Males', "Females", "Pregnancy", "Lactation"]:
                df.loc[idx, col] = None

for name, df in need_change_dict.items():
    #print(f'{name}\n{df}')
    df.to_sql(name, con=rdi_engine, if_exists='replace', index=False)
            
'''

In [3]:
print(len(df_dict))
print(df_dict.keys())

8
dict_keys(['elements_rdi', 'elements_upper_rdi', 'females_calories', 'macronutrients_dist_range', 'macronutrients_rdi', 'males_calories', 'vitamins_rdi', 'vitamins_upper_rdi'])


# waypoint: mutiply cols of macro by num_days for period RDI amts 

In [19]:
macro_rdi = df_dict['macronutrients_rdi']
macro_rdi = macro_rdi.copy()
macro_rdi = macro_rdi.astype(str)
macro_rdi.iloc[:10]

Unnamed: 0,Life-Stage Group,Total Water (L/d),Carbohydrates (g/d),Total Fiber (g/d),Fat (g/d),Linoleic Acid (g/d),alpha-Linolenic Acid (g/d),Protein (g/d)
0,Infants,,,,,,,
1,< 6 mo,0.7,60.0,ND,31,4.4,0.5,9.1
2,< 12 mo,0.8,95.0,ND,30,4.6,0.5,11.0
3,Children,,,,,,,
4,1 to 3 y,1.3,130.0,19,ND,7.0,0.7,13.0
5,4 to 8 y,1.7,130.0,25,ND,10.0,0.9,19.0
6,Males,,,,,,,
7,9 to 13 y,2.4,130.0,31,ND,12.0,1.2,34.0
8,14 to 18 y,3.3,130.0,38,ND,16.0,1.6,52.0
9,19 to 30 y,3.7,130.0,38,ND,17.0,1.6,56.0


In [11]:
#! conda list
! pip install pandas==1.1.4

Collecting pandas==1.1.4
  Downloading pandas-1.1.4-cp37-cp37m-manylinux1_x86_64.whl (9.5 MB)
[K     |████████████████████████████████| 9.5 MB 6.6 MB/s eta 0:00:01     |█████████████████▊              | 5.3 MB 3.1 MB/s eta 0:00:02
Installing collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 1.1.1
    Uninstalling pandas-1.1.1:
      Successfully uninstalled pandas-1.1.1
Successfully installed pandas-1.1.4


In [6]:
macro_r = macro_rdi.copy()
macro_r = macro_r.astype(str)
print(macro_r.dtypes)

num_days = 5.
cols = list(macro_r.columns)
key_cols = cols[1:]
for idx, row in macro_r.iterrows():
    #for i in range(1, len(cols)):
    for col in key_cols:
        curr_val = macro_r.loc[idx, col]
        if curr_val == 'ND' or curr_val == 'None' or curr_val == 'nan': # delete infants (fiber is ND, for fat ND, mult cals by num days)
            continue
        else:
            curr_val = float(curr_val) * num_days
            macro_r.loc[idx, col] = curr_val

print(macro_r.iloc[:10])

'''
col_count = len(list(macro_r.columns))
    for i in range (1, col_count):
        s = pd.Series(macro_r.iloc[:,i])
        if "ND" in s.values:
            continue
        else:
            float_col = rdi_df.iloc[:, i].astype(float)
'''

Life-Stage Group              object
Total Water (L/d)             object
Carbohydrates (g/d)           object
Total Fiber (g/d)             object
Fat (g/d)                     object
Linoleic Acid (g/d)           object
alpha-Linolenic Acid (g/d)    object
Protein (g/d)                 object
dtype: object
  Life-Stage Group Total Water (L/d) Carbohydrates (g/d) Total Fiber (g/d)  \
0          Infants               nan                 nan              None   
1           < 6 mo               3.5                 300                ND   
2          < 12 mo                 4                 475                ND   
3         Children               nan                 nan              None   
4         1 to 3 y               6.5                 650                95   
5         4 to 8 y               8.5                 650               125   
6            Males               nan                 nan              None   
7        9 to 13 y                12                 650          

'\ncol_count = len(list(macro_r.columns))\n    for i in range (1, col_count):\n        s = pd.Series(macro_r.iloc[:,i])\n        if "ND" in s.values:\n            continue\n        else:\n            float_col = rdi_df.iloc[:, i].astype(float)\n'

# Practice indexing into rdi_df's

In [26]:
rdi_elems_df = df_dict['elements_rdi']
rdi_vits_df = df_dict['vitamins_rdi']
rdi_macros_df = df_dict['macronutrients_rdi']

print(rdi_macros_df)
print(len(rdi_macros_df.index))

   Life-Stage Group  Total Water (L/d)  Carbohydrates (g/d)  \
0           Infants                NaN                  NaN   
1            < 6 mo                0.7                 60.0   
2           < 12 mo                0.8                 95.0   
3          Children                NaN                  NaN   
4          1 to 3 y                1.3                130.0   
5          4 to 8 y                1.7                130.0   
6             Males                NaN                  NaN   
7         9 to 13 y                2.4                130.0   
8        14 to 18 y                3.3                130.0   
9        19 to 30 y                3.7                130.0   
10       31 to 50 y                3.7                130.0   
11         51 to 70                3.7                130.0   
12           > 70 y                3.7                130.0   
13          Females                NaN                  NaN   
14        9 to 13 y                2.1                1

In [30]:
# return all of 4 of them
rdi_macros_df[rdi_macros_df['Life-Stage Group']=='31 to 50 y']
#rdi_macros_df.where(rdi_macros_df['Life-Stage Group']>'Males')
infants_idx = rdi_macros_df.index[rdi_macros_df['Life-Stage Group']=='Infants'][0]
child_idx = rdi_macros_df.index[rdi_macros_df['Life-Stage Group']=='Children'][0]
male_idx = rdi_macros_df.index[rdi_macros_df['Life-Stage Group']=='Males'][0]
fem_idx = rdi_macros_df.index[rdi_macros_df['Life-Stage Group']=='Females'][0]
preg_idx = rdi_macros_df.index[rdi_macros_df['Life-Stage Group']=='Pregnancy'][0]
lact_idx = rdi_macros_df.index[rdi_macros_df['Life-Stage Group']=='Lacation'][0]
print(infants_idx, child_idx, male_idx, fem_idx, preg_idx, lact_idx)

#only lact_idx is [lact_idx:]
lifestage_idxs = [infants_idx, 
                  child_idx, 
                  male_idx, 
                  fem_idx, 
                  preg_idx, 
                  lact_idx]

#dial into > male and < fem
user_type = 'male'
age_group = '31 to 50 y'
nut = 'protein'

portion = rdi_macros_df.iloc[male_idx:fem_idx, :]
print(portion)

row = portion[portion['Life-Stage Group']==age_group]
print(row)

cols = row.columns
for col in cols:
    if nut in str(col):
        target_col = col
        break
print(col)

val = row[col].item()
print(val)

cp_portion = pd.DataFrame(columns=cols)
cp_portion.loc[0,col] = 100
print(cp_portion)




0 3 6 13 20 24
   Life-Stage Group  Total Water (L/d)  Carbohydrates (g/d)  \
6             Males                NaN                  NaN   
7         9 to 13 y                2.4                130.0   
8        14 to 18 y                3.3                130.0   
9        19 to 30 y                3.7                130.0   
10       31 to 50 y                3.7                130.0   
11         51 to 70                3.7                130.0   
12           > 70 y                3.7                130.0   

    Total Fiber (g/d)  Fat (g/d)  Linoleic Acid (g/d)  \
6                 NaN        NaN                  NaN   
7                31.0        NaN                 12.0   
8                38.0        NaN                 16.0   
9                38.0        NaN                 17.0   
10               38.0        NaN                 17.0   
11               30.0        NaN                 14.0   
12               30.0        NaN                 14.0   

    alpha-Linolenic Aci

# waypoint, get cols from rdi elem, vits, macros and strip units

In [7]:
# function to take out units
def strip_units(nuts_arr):
    nuts_units_dict = {}
    for nut in nuts_arr:
        # take out units
        if '(g/d)' in nut:
            curr_units = 'g/d'
            new_nut = nut.replace('(g/d)', '').strip()
            nuts_units_dict[new_nut] = curr_units
        elif '(mg/d)' in nut:
            curr_units = 'mg/d'
            new_nut = nut.replace('(mg/d)', '').strip()
            nuts_units_dict[new_nut] = curr_units
        elif '(ug/d)' in nut:
            curr_units = 'ug/d'
            new_nut = nut.replace('(ug/d)', '').strip()
            nuts_units_dict[new_nut] = curr_units
        elif '(l/d)' in nut:
            curr_units = 'l/d'
            new_nut = nut.replace('(l/d)', '').strip()
            nuts_units_dict[new_nut] = curr_units
        else: # carotenoid -> alpha-carotene, beta-carotene?
            new_nut = nut
            nuts_units_dict[new_nut] = ''
    
    return nuts_units_dict

In [6]:
# get lists of rdi_elements, rdi_vitamins, rdi_macros
rdi_elements = []
rdi_vitamins = []
rdi_macros = []

elements_df = df_dict['elements_rdi']
vitamins_df = df_dict['vitamins_rdi']
macros_df = df_dict['macronutrients_rdi']

print(f'{elements_df.head()}\n{vitamins_df.head()}\n{macros_df.head()}' )


  Life-Stage Group Calcium (mg/d) Chromium (ug/d) Copper (ug/d)  \
0          Infants           None            None          None   
1           < 6 mo            200             0.2           200   
2          < 12 mo            260             5.5           220   
3         Children           None            None          None   
4         1 to 3 y            700              11           340   

  Fluoride (mg/d) Iodine (ug/d) Iron (mg/d) Magnesium (mg/d) Manganese (mg/d)  \
0            None          None        None             None             None   
1            0.01           110        0.27               30            0.003   
2             0.5           130          11               75              0.6   
3            None          None        None             None             None   
4             0.7            90           7               80              1.2   

  Molybdenum (ug/d) Phosphorus (mg/d) Selenium (ug/d) Zinc (mg/d)  \
0              None              None    

In [24]:
rdi_elements = [x.lower() for x in elements_df.columns.tolist() if x != "Life-Stage Group"]
rdi_elems_dict = strip_units(rdi_elements)
print(len(rdi_elements))
print(len(rdi_elems_dict))
for nut, units in rdi_elems_dict.items():
    print(f'{nut}:{units}')
    
rdi_vitamins = [x.lower() for x in vitamins_df.columns.tolist() if x != "Life-Stage Group"]
rdi_vits_dict = strip_units(rdi_vitamins)
print(len(rdi_vitamins))
print(len(rdi_vits_dict))
for nut, units in rdi_vits_dict.items():
    print(f'{nut}:{units}')

rdi_macros = [x.lower() for x in macros_df.columns.tolist() if x != "Life-Stage Group"]
rdi_macros_dict = strip_units(rdi_macros)
print(len(rdi_macros))
print(len(rdi_macros_dict))
for nut, units in rdi_macros_dict.items():
    print(f'{nut}:{units}')


#print(f'{rdi_elements}\n{rdi_vitamins}\n{rdi_macros}\n')

15
15
calcium:mg/d
chromium:ug/d
copper:ug/d
fluoride:mg/d
iodine:ug/d
iron:mg/d
magnesium:mg/d
manganese:mg/d
molybdenum:ug/d
phosphorus:mg/d
selenium:ug/d
zinc:mg/d
potassium:mg/d
sodium:mg/d
chloride:g/d
14
14
vitamin a:ug/d
vitamin c:mg/d
vitamin d:ug/d
vitamin e:mg/d
vitamin k:ug/d
thiamin:mg/d
riboflavin:mg/d
niacin:mg/d
vitamin b6:mg/d
folate:ug/d
vitamin b12:ug/d
pantothenic acid:mg/d
biotin:ug/d
choline:mg/d
7
7
total water:l/d
carbohydrates:g/d
total fiber:g/d
fat:g/d
linoleic acid:g/d
alpha-linolenic acid:g/d
protein:g/d


In [35]:
for name, units in cnf_nuts_dict.items():
    print(f'{name}:{units}')

protein:g/d
fat:g/d
carbohydrate:g/d
ash, total:g/d
energy (kilocalories):kCal/d
starch:g/d
sucrose:g/d
glucose:g/d
fructose:g/d
lactose:g/d
maltose:g/d
alcohol:g/d
oxalic acid:mg/d
moisture:g/d
mannitol:g/d
sorbitol:g/d
caffeine:mg/d
theobromine:mg/d
energy (kilojoules):kJ/d
sugars, total:g/d
galactose:g/d
fiber:g/d
calcium:mg/d
iron:mg/d
magnesium:mg/d
phosphorus:mg/d
potassium:mg/d
sodium:mg/d
zinc:mg/d
copper:mg/d
manganese:mg/d
selenium:ug/d
retinol:ug/d
retinol activity equivalents:ug/d
beta carotene:ug/d
alpha carotene:ug/d
alpha-tocopherol:mg/d
vitamin d (international units):IU/d
vitamin d2, ergocalciferol:ug/d
vitamin d (d2 + d3):ug/d
beta cryptoxanthin:ug/d
lycopene:ug/d
lutein and zeaxanthin:ug/d
beta-tocopherol:mg/d
gamma-tocopherol:mg/d
delta-tocopherol:mg/d
vitamin c:mg/d
thiamin:mg/d
riboflavin:mg/d
niacin (nicotinic acid) preformed:mg/d
total niacin equivalent:NE/d
pantothenic acid:mg/d
vitamin b-6:mg/d
biotin:ug/d
total folacin:ug/d
vitamin b-12:ug/d
choline, total:mg

In [37]:
#concatenate 3 rdi lists
elems_vits_dict = {**rdi_elems_dict, **rdi_vits_dict}
rdi_dict = {**elems_vits_dict, **rdi_macros_dict}
for k,v in rdi_dict.items():
    print(f'{k}:{v}')

calcium:mg/d
chromium:ug/d
copper:ug/d
fluoride:mg/d
iodine:ug/d
iron:mg/d
magnesium:mg/d
manganese:mg/d
molybdenum:ug/d
phosphorus:mg/d
selenium:ug/d
zinc:mg/d
potassium:mg/d
sodium:mg/d
chloride:g/d
vitamin a:ug/d
vitamin c:mg/d
vitamin d:ug/d
vitamin e:mg/d
vitamin k:ug/d
thiamin:mg/d
riboflavin:mg/d
niacin:mg/d
vitamin b6:mg/d
folate:ug/d
vitamin b12:ug/d
pantothenic acid:mg/d
biotin:ug/d
choline:mg/d
total water:l/d
carbohydrates:g/d
total fiber:g/d
fat:g/d
linoleic acid:g/d
alpha-linolenic acid:g/d
protein:g/d


In [38]:
# deepcopy dict
import copy
rdi_backup_dict = copy.deepcopy(rdi_dict) 

In [40]:

# take out extra word like total
rdi_all_dict ={}
for nut, units in rdi_dict.items():
    # take out total and fix fibre misspellling
    if 'total' in nut: #total fiber, total fats
        new_nut = nut.replace('total', '').strip()
        rdi_all_dict[new_nut] = units
    else:
        rdi_all_dict[nut] = units
    
print(rdi_all_dict)


{'calcium': 'mg/d', 'chromium': 'ug/d', 'copper': 'ug/d', 'fluoride': 'mg/d', 'iodine': 'ug/d', 'iron': 'mg/d', 'magnesium': 'mg/d', 'manganese': 'mg/d', 'molybdenum': 'ug/d', 'phosphorus': 'mg/d', 'selenium': 'ug/d', 'zinc': 'mg/d', 'potassium': 'mg/d', 'sodium': 'mg/d', 'chloride': 'g/d', 'vitamin a': 'ug/d', 'vitamin c': 'mg/d', 'vitamin d': 'ug/d', 'vitamin e': 'mg/d', 'vitamin k': 'ug/d', 'thiamin': 'mg/d', 'riboflavin': 'mg/d', 'niacin': 'mg/d', 'vitamin b6': 'mg/d', 'folate': 'ug/d', 'vitamin b12': 'ug/d', 'pantothenic acid': 'mg/d', 'biotin': 'ug/d', 'choline': 'mg/d', 'water': 'l/d', 'carbohydrates': 'g/d', 'fiber': 'g/d', 'fat': 'g/d', 'linoleic acid': 'g/d', 'alpha-linolenic acid': 'g/d', 'protein': 'g/d'}


In [42]:
#cnf_nuts_dict vs rdi_units_dict
'''
#compare incoming cnf data with rdi benchmarks
compare the keys and include the matches in comparisons (missing sci-name nuts)
then compare the units to see which need conversions
exact_matches=[[{cnf:units},{rdi:units}]], convers_matches = [[{},{}]]
no_match= [cnf no rdi equivalent]
rdi_unused = []
'''
exacts=[]
converts =[]
cnf_unfound = []
rdi_matched = []
exact = False
convert= False
for nut, units in cnf_nuts_dict.items():
    for rdi_nut, rdi_units in rdi_all_dict.items():
        if nut in rdi_nut and units in rdi_units:
            exact=True
            exacts.append([{nut:units}, {rdi_nut:rdi_units}])
            rdi_matched.append({rdi_nut: rdi_units})
        elif nut in rdi_nut and units not in rdi_units:
            convert=True
            converts.append([{nut:units}, {rdi_nut:rdi_units}])
            rdi_matched.append({rdi_nut: rdi_units})

    if exact==False and convert==False:
        cnf_unfound.append({nut:units})
    exact=False
    convert=False

#also need which rdi are not in cnf

print(len(exacts), exacts)
print(f'\n\nconvert: {len(converts)}\n{converts}')
print(f'len rdi_all_dict: {len(rdi_all_dict)}')
print(f'\n\nrdi matched: {len(rdi_matched)}\n{rdi_matched}')
print(f'\n\nno match:{len(cnf_unfound)}\n{cnf_unfound}')
        
        

19 [[{'protein': 'g/d'}, {'protein': 'g/d'}], [{'fat': 'g/d'}, {'fat': 'g/d'}], [{'carbohydrate': 'g/d'}, {'carbohydrates': 'g/d'}], [{'fiber': 'g/d'}, {'fiber': 'g/d'}], [{'calcium': 'mg/d'}, {'calcium': 'mg/d'}], [{'iron': 'mg/d'}, {'iron': 'mg/d'}], [{'magnesium': 'mg/d'}, {'magnesium': 'mg/d'}], [{'phosphorus': 'mg/d'}, {'phosphorus': 'mg/d'}], [{'potassium': 'mg/d'}, {'potassium': 'mg/d'}], [{'sodium': 'mg/d'}, {'sodium': 'mg/d'}], [{'zinc': 'mg/d'}, {'zinc': 'mg/d'}], [{'manganese': 'mg/d'}, {'manganese': 'mg/d'}], [{'selenium': 'ug/d'}, {'selenium': 'ug/d'}], [{'vitamin c': 'mg/d'}, {'vitamin c': 'mg/d'}], [{'thiamin': 'mg/d'}, {'thiamin': 'mg/d'}], [{'riboflavin': 'mg/d'}, {'riboflavin': 'mg/d'}], [{'pantothenic acid': 'mg/d'}, {'pantothenic acid': 'mg/d'}], [{'biotin': 'ug/d'}, {'biotin': 'ug/d'}], [{'vitamin k': 'ug/d'}, {'vitamin k': 'ug/d'}]]


convert: 1
[[{'copper': 'mg/d'}, {'copper': 'ug/d'}]]
len rdi_all_dict: 36


rdi matched: 20
[{'protein': 'g/d'}, {'fat': 'g/d'}, {

In [44]:
#find which rdi did not match
rdi_unfound = {}
found = False
for name, units in rdi_all_dict.items():
    for matches in rdi_matched:
        for nut, measure in matches.items():
            if name in nut:
                found=True
    if found==False:
        rdi_unfound[name]=units
    found=False

print(rdi_unfound)
        
        

{'chromium': 'ug/d', 'fluoride': 'mg/d', 'iodine': 'ug/d', 'molybdenum': 'ug/d', 'chloride': 'g/d', 'vitamin a': 'ug/d', 'vitamin d': 'ug/d', 'vitamin e': 'mg/d', 'niacin': 'mg/d', 'vitamin b6': 'mg/d', 'folate': 'ug/d', 'vitamin b12': 'ug/d', 'choline': 'mg/d', 'water': 'l/d', 'linoleic acid': 'g/d', 'alpha-linolenic acid': 'g/d'}


In [46]:
#print(cnf_unfound)
cnf_missing = {}
#convert to dict from list of dicts
for pair in cnf_unfound:
    for k,v in pair.items():
        cnf_missing[k]=v
print(cnf_missing)

{'ash, total': 'g/d', 'energy (kilocalories)': 'kCal/d', 'starch': 'g/d', 'sucrose': 'g/d', 'glucose': 'g/d', 'fructose': 'g/d', 'lactose': 'g/d', 'maltose': 'g/d', 'alcohol': 'g/d', 'oxalic acid': 'mg/d', 'moisture': 'g/d', 'mannitol': 'g/d', 'sorbitol': 'g/d', 'caffeine': 'mg/d', 'theobromine': 'mg/d', 'energy (kilojoules)': 'kJ/d', 'sugars, total': 'g/d', 'galactose': 'g/d', 'retinol': 'ug/d', 'retinol activity equivalents': 'ug/d', 'beta carotene': 'ug/d', 'alpha carotene': 'ug/d', 'alpha-tocopherol': 'mg/d', 'vitamin d (international units)': 'IU/d', 'vitamin d2, ergocalciferol': 'ug/d', 'vitamin d (d2 + d3)': 'ug/d', 'beta cryptoxanthin': 'ug/d', 'lycopene': 'ug/d', 'lutein and zeaxanthin': 'ug/d', 'beta-tocopherol': 'mg/d', 'gamma-tocopherol': 'mg/d', 'delta-tocopherol': 'mg/d', 'niacin (nicotinic acid) preformed': 'mg/d', 'total niacin equivalent': 'NE/d', 'vitamin b-6': 'mg/d', 'total folacin': 'ug/d', 'vitamin b-12': 'ug/d', 'choline, total': 'mg/d', 'folic acid': 'ug/d', 'na

# waypoint, found more rdi in cnf, previous was cnf in rdi but cnf names are longer

In [50]:
#compare rdi_unfound with cnf_missing
#find which rdi did not match
matches = []
no_matches = []
found = False
for name, units in rdi_unfound.items():
    for miss, meas in cnf_missing.items():
        if name in miss:
            found = True
            matches.append([{name:units},{miss:meas}])
    if found==False:
        no_matches.append({name:units})
    found=False

print(f'matches:\n{matches}\nno matches:\n{no_matches}')        

matches:
[[{'vitamin d': 'ug/d'}, {'vitamin d (international units)': 'IU/d'}], [{'vitamin d': 'ug/d'}, {'vitamin d2, ergocalciferol': 'ug/d'}], [{'vitamin d': 'ug/d'}, {'vitamin d (d2 + d3)': 'ug/d'}], [{'niacin': 'mg/d'}, {'niacin (nicotinic acid) preformed': 'mg/d'}], [{'niacin': 'mg/d'}, {'total niacin equivalent': 'NE/d'}], [{'folate': 'ug/d'}, {'naturally occurring folate': 'ug/d'}], [{'folate': 'ug/d'}, {'dietary folate equivalents': 'ug/d'}], [{'vitamin b12': 'ug/d'}, {'vitamin b12, added': 'ug/d'}], [{'choline': 'mg/d'}, {'choline, total': 'mg/d'}]]
no matches:
[{'chromium': 'ug/d'}, {'fluoride': 'mg/d'}, {'iodine': 'ug/d'}, {'molybdenum': 'ug/d'}, {'chloride': 'g/d'}, {'vitamin a': 'ug/d'}, {'vitamin e': 'mg/d'}, {'vitamin b6': 'mg/d'}, {'water': 'l/d'}, {'linoleic acid': 'g/d'}, {'alpha-linolenic acid': 'g/d'}]


no matches: <br>

{'chromium': 'ug/d'}, <br>
{'fluoride': 'mg/d'}, <br>
{'iodine': 'ug/d'}, <br>
{'molybdenum': 'ug/d'}, <br>
{'chloride': 'g/d'}, <br>
{'water': 'l/d'}, <br>
<br>
{'vitamin a': 'ug/d'} = 'retinol': 'ug/d' <br> 
{'vitamin e': 'mg/d'}, = 'alpha-tocopherol': 'mg/d' <br>
{'vitamin b6': 'mg/d'}, = 'vitamin b-6': 'mg/d' <br>
{'linoleic acid': 'g/d'}, = there are 4 if just 'linoleic' so sum?<br> 
{'alpha-linolenic acid': 'g/d'} = also 4 if just 'linolenic' <br>



# Done here, below is other stuff

### make cnf_elems_dict, cnf_vits_dict, cnf_macros_dict

In [120]:
cnf_elems_dict, cnf_vits_dict, cnf_macros_dict = {}, {}, {}
for match in exacts:
    cnf = match[0]

    cnf_nut = list(cnf.keys())[0]
    cnf_units = list(cnf.values())[0]
    # print(cnf_nut, cnf_units)
    # rdi = match[1]
    # rdi_nut = list(rdi.keys())[0]
    # rdi_units = list(rdi.values())[0]
    for rdi_nut, _ in rdi_elems_dict.items():
        if cnf_nut in rdi_nut:
            cnf_elems_dict[cnf_nut] = cnf_units
            continue
    for rdi_nut, _ in rdi_vits_dict.items():
        if cnf_nut in rdi_nut:
            cnf_vits_dict[cnf_nut] = cnf_units
            continue
    for rdi_nut, _ in rdi_macros_dict.items():
        if cnf_nut in rdi_nut:
            cnf_macros_dict[cnf_nut] = cnf_units
            continue

print(len(cnf_elems_dict), len(cnf_vits_dict), len(cnf_macros_dict))
for nut, unit in cnf_elems_dict.items():
    print(f'{nut}:{unit}')
for nut, unit in cnf_vits_dict.items():
    print(f'{nut}:{unit}')
for nut, unit in cnf_macros_dict.items():
    print(f'{nut}:{unit}')

9 6 4
calcium:mg/d
iron:mg/d
magnesium:mg/d
phosphorus:mg/d
potassium:mg/d
sodium:mg/d
zinc:mg/d
manganese:mg/d
selenium:ug/d
vitamin c:mg/d
thiamin:mg/d
riboflavin:mg/d
pantothenic acid:mg/d
biotin:ug/d
vitamin k:ug/d
protein:g/d
fat:g/d
carbohydrate:g/d
fiber:g/d


In [122]:
cnf_elems_dicts, cnf_vits_dicts, cnf_macros_dicts=[], [], []

for match in exacts:
    cnf = match[0]
    
    cnf_nut = list(cnf.keys())[0]
    cnf_units = list(cnf.values())[0]
    #print(cnf_nut, cnf_units)
    rdi = match[1]
    rdi_nut = list(rdi.keys())[0]
    rdi_units = list(rdi.values())[0]
    for rdi_n, _ in rdi_elems_dict.items():
        if cnf_nut in rdi_n:
            cnf_elems_dicts.append([{cnf_nut:cnf_units},{rdi_nut:rdi_units}]) 
            continue
    for rdi_n, _ in rdi_vits_dict.items():
        if cnf_nut in rdi_n:
            cnf_vits_dicts.append([{cnf_nut:cnf_units}, {rdi_nut:rdi_units}])
            continue
    for rdi_n, _ in rdi_macros_dict.items():
        if cnf_nut in rdi_n:
            cnf_macros_dicts.append([{cnf_nut:cnf_units},{rdi_nut:rdi_units}])
            continue

print(len(cnf_elems_dicts), len(cnf_vits_dicts), len(cnf_macros_dicts))
print(cnf_elems_dicts, cnf_vits_dicts, cnf_macros_dicts)


9 6 4
[[{'calcium': 'mg/d'}, {'calcium': 'mg/d'}], [{'iron': 'mg/d'}, {'iron': 'mg/d'}], [{'magnesium': 'mg/d'}, {'magnesium': 'mg/d'}], [{'phosphorus': 'mg/d'}, {'phosphorus': 'mg/d'}], [{'potassium': 'mg/d'}, {'potassium': 'mg/d'}], [{'sodium': 'mg/d'}, {'sodium': 'mg/d'}], [{'zinc': 'mg/d'}, {'zinc': 'mg/d'}], [{'manganese': 'mg/d'}, {'manganese': 'mg/d'}], [{'selenium': 'ug/d'}, {'selenium': 'ug/d'}]] [[{'vitamin c': 'mg/d'}, {'vitamin c': 'mg/d'}], [{'thiamin': 'mg/d'}, {'thiamin': 'mg/d'}], [{'riboflavin': 'mg/d'}, {'riboflavin': 'mg/d'}], [{'pantothenic acid': 'mg/d'}, {'pantothenic acid': 'mg/d'}], [{'biotin': 'ug/d'}, {'biotin': 'ug/d'}], [{'vitamin k': 'ug/d'}, {'vitamin k': 'ug/d'}]] [[{'protein': 'g/d'}, {'protein': 'g/d'}], [{'fat': 'g/d'}, {'fat': 'g/d'}], [{'carbohydrate': 'g/d'}, {'carbohydrate': 'g/d'}], [{'fiber': 'g/d'}, {'fiber': 'g/d'}]]


In [117]:
cnf_elems_dict, cnf_vits_dict, cnf_macros_dict={}, {}, {}
# 9,6,2, items for 17
for match in exacts:
    cnf = match[0]
    
    cnf_nut = list(cnf.keys())[0]
    cnf_units = list(cnf.values())[0]
    #print(cnf_nut, cnf_units)
    rdi = match[1]
    rdi_nut = list(rdi.keys())[0]
    rdi_units = list(rdi.values())[0]
    #print(rdi_nut, rdi_units)
    #print(list(rdi_elems_dict.keys()))
    if rdi_nut in list(rdi_elems_dict.keys()):
        cnf_elems_dict[cnf_nut] = cnf_units
    elif rdi_nut in list(rdi_vits_dict.keys()):
        cnf_vits_dict[cnf_nut] = cnf_units
        #continue
    elif rdi_nut in list(rdi_macros_dict.keys()):
        cnf_macros_dict[cnf_nut] = cnf_units
        #continue
print(len(cnf_elems_dict), len(cnf_vits_dict), len(cnf_macros_dict))
for nut, unit in cnf_elems_dict.items():
    print(f'{nut}:{unit}')
for nut, unit in cnf_vits_dict.items():
    print(f'{nut}:{unit}')
for nut, unit in cnf_macros_dict.items():
    print(f'{nut}:{unit}')

9 6 2
calcium:mg/d
iron:mg/d
magnesium:mg/d
phosphorus:mg/d
potassium:mg/d
sodium:mg/d
zinc:mg/d
manganese:mg/d
selenium:ug/d
vitamin c:mg/d
thiamin:mg/d
riboflavin:mg/d
pantothenic acid:mg/d
biotin:ug/d
vitamin k:ug/d
protein:g/d
fat:g/d
