In [1]:
%run imports.py

# Download food nutrient data

It is possible to download the nutrient data directly [1].
However, the latest release contains errors that have only been updated in the web interface.

[1] https://www.ars.usda.gov/northeast-area/beltsville-md/beltsville-human-nutrition-research-center/nutrient-data-laboratory/docs/usda-national-nutrient-database-for-standard-reference/

In [2]:
# Get a list of nutrient ids.
soup = bs4.BeautifulSoup(requests.get('https://ndb.nal.usda.gov/ndb/nutrients/index').text, 'html5lib')
nutrient_ids = [option.get('value') for option in soup.select('select#nutrient3 > option') if option.get('value')]

In [3]:
# For each nutrient, download a DataFrame of foods with that nutrient.
foods = {} if 'foods' not in locals() else foods
for nutrient_id in tqdm_notebook(nutrient_ids, unit='nutrient'):
    if nutrient_id in foods:
        continue
    url = 'https://ndb.nal.usda.gov/ndb/nutrients/report/nutrientsfrm'
    params = {
        'max': 25,
        'offset': 0,
        'totCount': 0,
        'nutrient1': nutrient_id,
        'nutrient2': '',
        'nutrient3': '',
        'subset': 1,  # 0=All foods, 1=Abridged (most common 1000 foods)
        'sort': 'f',
        'measureby': 'm'
    }
    soup = bs4.BeautifulSoup(requests.get(url, params).text, 'html5lib')
    url_csv = 'https://ndb.nal.usda.gov'
    url_csv += [a.get('href') for a in soup.select('a[title="Download in spreadsheet format"]')][0]
    src = requests.get(url_csv).text \
        .replace('Per Measure', '') \
        .replace('"""', '"') \
        .replace(',,\n', '\n') \
        .replace(',\n', '\n')
    df = pd.read_csv(
        io.StringIO(src),
        index_col='NDB_NO',
        skiprows=6)
    df = df[df.iloc[:, -1] > 0]
    foods[nutrient_id] = df




In [4]:
# Merge all nutrient DataFrames together.
nutrients_df = None
for nutrient_id, nutrient_df in tqdm_notebook(foods.items(), unit='nutrient', total=len(foods)):
    if nutrients_df is None:
        nutrients_df = nutrient_df
    else:
        nutrients_df = nutrients_df.combine_first(nutrient_df)




In [5]:
# Reorder the columns and store to disk.
cols_front = ['Description', 'Weight(g)', 'Measure']
cols_back = sorted([col for col in nutrients_df.columns if col not in cols_front])
nutrients_df = nutrients_df[cols_front + cols_back].sort_index()
nutrients_df.to_msgpack('nutrients.msg', compress='zlib')

In [6]:
nutrients_df[[col for col in nutrients_df.columns if ':' not in col]].head(20)

Unnamed: 0_level_0,Description,Weight(g),Measure,Alanine(g),"Alcohol, ethyl(g)",Arginine(g),Ash(g),Aspartic acid(g),Beta-sitosterol(mg),Betaine(mg),Caffeine(mg),"Calcium, Ca(mg)",Campesterol(mg),"Carbohydrate, by difference(g)","Carotene, alpha(µg)","Carotene, beta(µg)",Cholesterol(mg),"Choline, total(mg)","Copper, Cu(mg)","Cryptoxanthin, beta(µg)",Cystine(g),Dihydrophylloquinone(µg),Energy(kJ),Energy(kcal),"Fatty acids, total monounsaturated(g)","Fatty acids, total polyunsaturated(g)","Fatty acids, total saturated(g)","Fatty acids, total trans(g)","Fatty acids, total trans-monoenoic(g)","Fatty acids, total trans-polyenoic(g)","Fiber, total dietary(g)","Fluoride, F(µg)","Folate, DFE(µg)","Folate, food(µg)","Folate, total(µg)",Folic acid(µg),Fructose(g),Galactose(g),Glucose (dextrose)(g),Glutamic acid(g),Glycine(g),Histidine(g),Hydroxyproline(g),"Iron, Fe(mg)",Isoleucine(g),Lactose(g),Leucine(g),Lutein + zeaxanthin(µg),Lycopene(µg),Lysine(g),"Magnesium, Mg(mg)",Maltose(g),"Manganese, Mn(mg)",Menaquinone-4(µg),Methionine(g),Niacin(mg),Pantothenic acid(mg),Phenylalanine(g),"Phosphorus, P(mg)",Phytosterols(mg),"Potassium, K(mg)",Proline(g),Protein(g),Retinol(µg),Riboflavin(mg),"Selenium, Se(µg)",Serine(g),"Sodium, Na(mg)",Starch(g),Stigmasterol(mg),Sucrose(g),"Sugars, total(g)",Theobromine(mg),Thiamin(mg),Threonine(g),"Tocopherol, beta(mg)","Tocopherol, delta(mg)","Tocopherol, gamma(mg)","Tocotrienol, alpha(mg)","Tocotrienol, beta(mg)","Tocotrienol, delta(mg)","Tocotrienol, gamma(mg)",Total lipid (fat)(g),Tryptophan(g),Tyrosine(g),Valine(g),"Vitamin A, IU(IU)","Vitamin A, RAE(µg)",Vitamin B-12(µg),"Vitamin B-12, added(µg)",Vitamin B-6(mg),"Vitamin C, total ascorbic acid(mg)",Vitamin D (D2 + D3)(µg),Vitamin D(IU),Vitamin D2 (ergocalciferol)(µg),Vitamin D3 (cholecalciferol)(µg),Vitamin E (alpha-tocopherol)(mg),"Vitamin E, added(mg)",Vitamin K (phylloquinone)(µg),Water(g),"Zinc, Zn(mg)"
NDB_NO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
1001,"Butter, salted",5.0,"1.0 pat (1"" sq, 1/3"" high)",0.001,,0.002,0.11,0.003,,,,1.0,,,,8.0,11.0,0.9,,,,,150.0,36.0,1.051,0.152,2.568,0.164,0.149,0.015,,0.1,,,,,,,,0.009,0.001,0.001,,,0.003,,0.004,,,0.003,,,,,0.001,0.002,0.006,0.002,1.0,,1.0,0.004,0.04,34.0,0.002,0.1,0.002,32.0,,,,,,,0.002,,,,,,,,4.06,0.001,0.002,0.003,125.0,34.0,0.01,,,,,,,,0.12,,0.3,0.79,
1004,"Cheese, blue",28.35,1.0 oz,0.183,,0.202,1.45,0.407,,,,150.0,,0.66,,21.0,21.0,4.4,0.011,,0.03,,419.0,100.0,2.205,0.227,5.293,,,,,,10.0,10.0,10.0,,,,,1.468,0.115,0.215,,0.09,0.319,,0.544,,,0.525,7.0,,0.003,,0.166,0.288,0.49,0.308,110.0,,73.0,0.595,6.07,54.0,0.108,4.1,0.318,325.0,,,,0.14,,0.008,0.223,,,,,,,,8.15,0.088,0.367,0.441,204.0,56.0,0.35,,0.047,,0.1,6.0,,0.1,0.07,,0.7,12.02,0.75
1007,"Cheese, camembert",28.35,1.0 oz,0.232,,0.199,1.04,0.365,,,,110.0,,0.13,,3.0,20.0,4.4,0.006,,0.031,,355.0,85.0,1.991,0.205,4.326,,,,,,18.0,18.0,18.0,,,,,1.187,0.107,0.194,,0.09,0.274,,0.522,,,0.501,6.0,,0.011,,0.16,0.179,0.387,0.313,98.0,,53.0,0.665,5.61,68.0,0.138,4.1,0.316,239.0,,,,0.13,,0.008,0.203,,,,,,,,6.88,0.087,0.325,0.363,232.0,68.0,0.37,,0.064,,0.1,5.0,,0.1,0.06,,0.6,14.69,0.67
1009,"Cheese, cheddar",132.0,"1.0 cup, diced",0.991,,0.722,4.9,2.289,,0.9,,937.0,,4.08,,112.0,131.0,21.8,0.04,,0.162,,2229.0,533.0,12.205,1.876,24.904,1.21,1.21,,,46.1,36.0,36.0,36.0,,,0.13,0.34,6.25,0.722,0.722,,0.18,1.592,0.16,2.559,,,1.353,36.0,,0.036,11.4,0.722,0.078,0.541,1.418,601.0,,100.0,3.296,30.19,436.0,0.565,37.6,1.03,862.0,,,,0.63,,0.038,1.378,,,,,,,,43.97,0.722,1.463,1.853,1639.0,436.0,1.45,,0.087,,0.8,32.0,,0.8,0.94,,3.2,48.87,4.8
1012,"Cheese, cottage, creamed, large or small curd",113.0,4.0 oz,0.434,,0.562,1.59,1.023,,0.8,,94.0,,3.82,,14.0,19.0,20.8,0.033,,0.075,,466.0,111.0,0.879,0.139,1.941,,,,,35.7,14.0,14.0,14.0,,,,,2.941,0.251,0.368,,0.08,0.668,3.02,1.261,,,1.055,9.0,,0.002,1.0,0.304,0.112,0.629,0.652,180.0,,118.0,1.389,12.57,41.0,0.184,11.0,0.722,411.0,,,,3.02,,0.031,0.565,,,,,,,,4.86,0.166,0.683,0.845,158.0,42.0,0.49,,0.052,,0.1,3.0,,0.1,0.09,,,90.16,0.45
1013,"Cheese, cottage, creamed, with fruit",113.0,4.0 oz,0.623,,0.548,1.36,0.812,,,,60.0,,5.21,,16.0,15.0,19.8,0.045,,0.112,,460.0,110.0,1.171,0.14,2.611,,,,0.2,35.7,12.0,12.0,12.0,,,,,2.599,0.261,0.399,,0.18,0.705,,1.233,,,0.971,8.0,,0.003,,0.362,0.169,0.205,0.646,128.0,,102.0,1.39,12.08,42.0,0.16,8.7,0.673,389.0,,,,2.69,,0.037,0.532,,,,,,,,4.35,0.133,0.64,0.742,165.0,43.0,0.6,,0.077,1.6,,,,,0.05,,0.5,89.99,0.37
1014,"Cheese, cottage, nonfat, uncreamed, dry, large...",145.0,1.0 cup (not packed),0.518,,0.67,2.48,1.219,,1.3,,125.0,,9.66,,,10.0,26.0,0.043,,0.088,,439.0,104.0,0.115,0.004,0.245,,,,,45.8,13.0,13.0,13.0,,,,0.42,3.508,0.299,0.439,,0.22,0.796,2.26,1.504,,,1.259,16.0,,0.032,,0.362,0.209,0.647,0.777,276.0,,199.0,1.656,14.99,3.0,0.328,13.6,0.861,539.0,,,,2.68,,0.033,0.674,,,0.01,,,,,0.42,0.199,0.813,1.008,12.0,3.0,0.67,,0.023,,,,,,0.01,,,117.46,0.68
1015,"Cheese, cottage, lowfat, 2% milkfat",113.0,4.0 oz,0.462,,0.528,1.44,1.088,,0.7,,125.0,,5.38,,7.0,14.0,18.4,0.037,,0.07,,384.0,92.0,0.583,0.094,1.396,0.076,0.061,0.015,,,9.0,9.0,9.0,,,0.14,,2.764,0.236,0.346,,0.15,0.628,4.37,1.185,,,0.992,10.0,,0.017,,0.286,0.116,0.592,0.614,170.0,,141.0,1.305,11.81,77.0,0.284,13.4,0.679,348.0,,,,4.52,,0.023,0.531,,,,,,,,2.57,0.156,0.642,0.794,254.0,77.0,0.53,,0.064,,,,,,0.09,,,91.8,0.58
1016,"Cheese, cottage, lowfat, 1% milkfat",113.0,4.0 oz,0.727,,0.638,1.57,0.948,,,,69.0,,3.07,,3.0,5.0,19.8,0.032,,0.13,,342.0,81.0,0.329,0.035,0.729,,,,,35.7,14.0,14.0,14.0,,,,,3.033,0.305,0.466,,0.16,0.823,,1.44,,,1.132,6.0,,0.003,,0.421,0.145,0.243,0.755,151.0,,97.0,1.622,14.0,12.0,0.186,10.2,0.785,459.0,,,,3.07,,0.024,0.622,,,,,,,,1.15,0.156,0.746,0.867,46.0,12.0,0.71,,0.077,,,,,,0.01,,0.1,93.2,0.43
1017,"Cheese, cream",14.5,1.0 tbsp,0.027,,0.034,0.18,0.075,,0.1,,14.0,,0.8,,9.0,15.0,3.9,0.003,,0.006,,213.0,51.0,1.292,0.215,2.931,,,0.033,,,1.0,1.0,1.0,,,,,0.189,0.021,0.025,,0.02,0.047,0.55,0.095,1.0,,0.082,1.0,,0.002,1.3,0.028,0.013,0.075,0.042,16.0,,19.0,0.096,0.89,44.0,0.033,1.2,0.054,46.0,0.05,,,0.55,,0.003,0.034,,,0.06,0.01,,,,4.99,0.01,0.044,0.057,161.0,45.0,0.03,,0.008,,,,,,0.12,,0.3,7.63,0.07


# Download Recommended Daily Allowances

In [7]:
def download_rda_table(url):
    # Download the RDA table at the given url.
    src = requests.get(url).text \
        .replace('Life Stage<br />Group', 'Group') \
        .replace('*', '')
    rda_df = pd.read_html(io.StringIO(src))[0]
    # Integrate group rows into the index.
    group = None
    for i, name in enumerate(rda_df.Group):
        if re.match('^[A-Z]', name):
            group = name
        else:
            rda_df.Group.iat[i] = f'{group}: {name}'
    # Clean up the data and dtypes.
    rda_df = rda_df.set_index('Group').dropna(axis=0, how='all')
    rda_df = rda_df.replace(['ND', 'NDc'], np.nan)
    rda_df = rda_df.replace('([0-9.]+)[^0-9]+', r'\1', regex=True)
    rda_df = rda_df.astype(np.float64)
    return rda_df

In [8]:
rda_elements_df = download_rda_table('https://www.ncbi.nlm.nih.gov/books/NBK56068/table/summarytables.t3/?report=objectonly')
rda_elements_df

Unnamed: 0_level_0,Calcium(mg/d),Chromium(μg/d),Copper(μg/d),Fluoride(mg/d),Iodine(μg/d),Iron(mg/d),Magnesium(mg/d),Manganese(mg/d),Molybdenum(μg/d),Phosphorus(mg/d),Selenium(μg/d),Zinc(mg/d),Potassium(g/d),Sodium(g/d),Chloride(g/d)
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Infants: 0–6 mo,200.0,0.2,200.0,0.01,110.0,0.27,30.0,0.003,2.0,100.0,15.0,2.0,0.4,0.12,0.18
Infants: 6–12 mo,260.0,5.5,220.0,0.5,130.0,11.0,75.0,0.6,3.0,275.0,20.0,3.0,0.7,0.37,0.57
Children: 1–3 y,700.0,11.0,340.0,0.7,90.0,7.0,80.0,1.2,17.0,460.0,20.0,3.0,3.0,1.0,1.5
Children: 4–8 y,1000.0,15.0,440.0,1.0,90.0,10.0,130.0,1.5,22.0,500.0,30.0,5.0,3.8,1.2,1.9
Males: 9–13 y,1300.0,25.0,700.0,2.0,120.0,8.0,240.0,1.9,34.0,1250.0,40.0,8.0,4.5,1.5,2.3
Males: 14–18 y,1300.0,35.0,890.0,3.0,150.0,11.0,410.0,2.2,43.0,1250.0,55.0,11.0,4.7,1.5,2.3
Males: 19–30 y,1000.0,35.0,900.0,4.0,150.0,8.0,400.0,2.3,45.0,700.0,55.0,11.0,4.7,1.5,2.3
Males: 31–50 y,1000.0,35.0,900.0,4.0,150.0,8.0,420.0,2.3,45.0,700.0,55.0,11.0,4.7,1.5,2.3
Males: 51–70 y,1000.0,30.0,900.0,4.0,150.0,8.0,420.0,2.3,45.0,700.0,55.0,11.0,4.7,1.3,2.0
Males: > 70 y,1200.0,30.0,900.0,4.0,150.0,8.0,420.0,2.3,45.0,700.0,55.0,11.0,4.7,1.2,1.8


In [9]:
rda_vitamins_df = download_rda_table('https://www.ncbi.nlm.nih.gov/books/NBK56068/table/summarytables.t2/?report=objectonly')
rda_vitamins_df

Unnamed: 0_level_0,Vitamin A(μg/d)a,Vitamin C(mg/d),"Vitamin D(μg/d)b,c",Vitamin E(mg/d)d,Vitamin K(μg/d),Thiamin(mg/d),Riboflavin(mg/d),Niacin(mg/d)e,Vitamin B6(mg/d),Folate(μg/d)f,Vitamin B12(μg/d),Pantothenic Acid(mg/d),Biotin(μg/d),Choline(mg/d)g
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Infants: 0–6 mo,400.0,40.0,10.0,4.0,2.0,0.2,0.3,2.0,0.1,65.0,4.0,1.7,5.0,125.0
Infants: 6–12 mo,500.0,50.0,10.0,5.0,2.5,0.3,0.4,4.0,0.3,80.0,5.0,1.8,6.0,150.0
Children: 1–3 y,300.0,15.0,15.0,6.0,30.0,0.5,0.5,6.0,0.5,150.0,9.0,2.0,8.0,200.0
Children: 4–8 y,400.0,25.0,15.0,7.0,55.0,0.6,0.6,8.0,0.6,200.0,12.0,3.0,12.0,250.0
Males: 9–13 y,600.0,45.0,15.0,11.0,60.0,0.9,0.9,12.0,1.0,300.0,18.0,4.0,20.0,375.0
Males: 14–18 y,900.0,75.0,15.0,15.0,75.0,1.2,1.3,16.0,1.3,400.0,24.0,5.0,25.0,550.0
Males: 19–30 y,900.0,90.0,15.0,15.0,120.0,1.2,1.3,16.0,1.3,400.0,24.0,5.0,30.0,550.0
Males: 31–50 y,900.0,90.0,15.0,15.0,120.0,1.2,1.3,16.0,1.3,400.0,24.0,5.0,30.0,550.0
Males: 51–70 y,900.0,90.0,15.0,15.0,120.0,1.2,1.3,16.0,1.7,400.0,2.4,5.0,30.0,550.0
Males: > 70 y,900.0,90.0,20.0,15.0,120.0,1.2,1.3,16.0,1.7,400.0,2.4,5.0,30.0,550.0


In [10]:
rda_macro_df = download_rda_table('https://www.ncbi.nlm.nih.gov/books/NBK56068/table/summarytables.t4/?report=objectonly')
rda_macro_df

Unnamed: 0_level_0,Total Watera(L/d),Carbohydrate(g/d),Total Fiber(g/d),Fat(g/d),Linoleic Acid(g/d),α-Linolenic Acid(g/d),Proteinb(g/d)
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Infants: 0–6 mo,0.7,60.0,,31.0,4.4,0.5,9.1
Infants: 6–12 mo,0.8,95.0,,30.0,4.6,0.5,11.0
Children: 1–3 y,1.3,130.0,19.0,,7.0,0.7,13.0
Children: 4–8 y,1.7,130.0,25.0,,10.0,0.9,19.0
Males: 9–13 y,2.4,130.0,31.0,,12.0,1.2,34.0
Males: 14–18 y,3.3,130.0,38.0,,16.0,1.6,52.0
Males: 19–30 y,3.7,130.0,38.0,,17.0,1.6,56.0
Males: 31–50 y,3.7,130.0,38.0,,17.0,1.6,56.0
Males: 51–70 y,3.7,130.0,30.0,,14.0,1.6,56.0
Males: > 70 y,3.7,130.0,30.0,,14.0,1.6,56.0


In [12]:
# Store the RDA of the elements, vitamins and macronutrients to disk.
rda = {
    'elements': rda_elements_df,
    'vitamins': rda_vitamins_df,
    'macronutrients': rda_macro_df
}
rda['macronutrients'].reset_index(inplace=True)
rda['macronutrients'].loc[rda['macronutrients'].Group == 'Lactation: 14–18', 'Group'] += ' y'
rda['macronutrients'].set_index('Group', inplace=True)
rda = pd.concat(rda.values(), axis=1).sort_index()
pd.to_msgpack('rda.msg', rda, compress='zlib')