# Project 2: Subsistence Diets

In [1]:
from  scipy.optimize import linprog as lp

import pandas as pd
import numpy as np
import warnings
import requests

### [A] Dietary Reference Intakes Function

Write a function that takes as arguments the characteristics of a person (e.g., age, sex) and returns a `pandas.Series' of Dietary Reference Intakes (DRI's) or "Recommended Daily Allowances" (RDA) of a variety of nutrients appropriate for your population of interest.

In [2]:
rda = pd.read_csv("rda_carnivore.csv", index_col = 0)

options = ['Child_1_3', 'Female_4_8', 'Male_4_8', 'Female_9_13', 'Male_9_13', 'Female_14_18', 'Male_14_18', 'Female_19_30', 'Male_19_30', 'Female_31_50', 'Male_31_50', 'Female_51U', 'Male_51U', 'carnivore']

bmin = rda.loc[rda['Constraint Type'].isin(['RDA', 'AI'])]
bmax = rda.loc[rda['Constraint Type'].isin(['UL'])]

In [3]:
def dietary_ref_intake(age = 20,sex = "Female", data = rda):
    """Takes in age (integer) and sex (string), and returns a Series of dietary reference intakes for the chosen population, you can optionally use a different data frame, the min or max RDAs"""

    if age <= 3:
        col = 'Child_1_3'
    if sex in ["M", "male", "m"]:
        sex = "Male"
    elif sex in ["F", "f", "female"]:
        sex = "Female"
    if age <= 3:
        col = 'Child_1_3'
    elif age >= 51:
        col = sex + "_51U" 
    else:
        age_ranges = [(4,8),(9,13),(14,18),(19,30),(31,50),(51,100)]
        for age_range in age_ranges:
            if age >= age_range[0] and age <= age_range[1]:
                col = sex + '_' + str(age_range[0]) + '_' + str(age_range[1])
    return pd.Series(data[col])  

#### Examples

In [4]:
dietary_ref_intake(age=22,sex='M')

Nutrient
Energy            2400.0
Protein             56.0
Carbohydrate       130.0
Dietary Fiber       33.6
Linoleic Acid       17.0
Linolenic Acid       1.6
Calcium           1000.0
Iron                 8.0
Magnesium          400.0
Phosphorus         700.0
Potassium         4700.0
Sodium            2300.0
Zinc                11.0
Copper               0.9
Selenium            55.0
Vitamin A          900.0
Vitamin E           15.0
Vitamin D           15.0
Vitamin C           90.0
Thiamin              1.2
Riboflavin           1.3
Niacin              16.0
Vitamin B6           1.3
Vitamin B12          2.4
Choline            550.0
Vitamin K          120.0
Folate             400.0
Energy            3100.0
Name: Male_19_30, dtype: float64

In [5]:
dietary_ref_intake(age=80,sex='F', data = bmax)

Nutrient
Sodium    2300.0
Energy    3100.0
Name: Female_51U, dtype: float64

### [A] Data on Prices for Different Foods

Construct a google spreadsheet of the prices of different food products for each diet (frozen food diet, meat diet, fresh food diet, liquid diet, and canned-food diet)

In [6]:
# Define file paths again if they are not available
file_paths = {
    "carnivore": "/home/jovyan/Project2_EEP153/Wilbur Atwater min_cost_data - carnivore_recipes2.csv",
    "canned": "/home/jovyan/Project2_EEP153/Wilbur Atwater min_cost_data - canned_recipes.csv",
    "frozen": "/home/jovyan/Project2_EEP153/Wilbur Atwater min_cost_data - frozen_recipes.csv",
    "fresh": "/home/jovyan/Project2_EEP153/Wilbur Atwater min_cost_data - fresh_recipes.csv",
    "liquid": "/home/jovyan/Project2_EEP153/Wilbur Atwater min_cost_data - liquid_recipes.csv",
    "prices": "/home/jovyan/Project2_EEP153/Wilbur Atwater min_cost_data - prices.csv"
}


# Function to read a dataset
def read_sheet(file_path):
    df = pd.read_csv(file_path, index_col=False)
    df = df.iloc[:, :7].dropna(subset=['parent_foodcode'])
    df = df.reset_index(drop=True)
    return df

# Load prices dataset
prices_df = pd.read_csv(file_paths["prices"])
prices_df['parent_foodcode'] = prices_df['parent_foodcode'].astype(int)  # Convert type for merging

# Function to merge price with a given diet dataset
def read_and_merge_with_prices(diet_name):
    df = read_sheet(file_paths[diet_name])  # Read the diet dataset
    df['parent_foodcode'] = df['parent_foodcode'].astype(int)  # Ensure data type matches for merging
    merged_df = df.merge(prices_df, on="parent_foodcode", how="left")  # Left join to include all diet rows
    return merged_df

# Now run the function without errors
frozen_diet_with_prices = read_and_merge_with_prices("frozen")


In [7]:
#Example of merged diet and price
frozen_diet_with_prices.head()

Unnamed: 0,parent_foodcode,parent_desc,ingred_code,ingred_desc,ingred_wt,year,mod_code,method,method_description,nhanes,price
0,11460150,"Yogurt, frozen, NS as to flavor, lowfat milk",1298,"Yogurt, frozen, flavors other than chocolate, ...",100.0,2013/2014,,2.0,Links to altEC,Extra,0.335298
1,11460160,"Yogurt, frozen, chocolate, lowfat milk",1117,"Yogurt, plain, low fat, 12 grams protein per 8...",81.8,2011/2012,0.0,1.0,Links to FNDDS,,0.27658
2,11460160,"Yogurt, frozen, chocolate, lowfat milk",1117,"Yogurt, plain, low fat, 12 grams protein per 8...",81.8,2013/2014,,1.0,Links to FNDDS,Extra,0.296941
3,11460160,"Yogurt, frozen, chocolate, lowfat milk",1117,"Yogurt, plain, low fat, 12 grams protein per 8...",81.8,2015/2016,,1.0,Links to FNDDS,Extra,0.301143
4,11460160,"Yogurt, frozen, chocolate, lowfat milk",19166,"Cocoa, dry powder, unsweetened, processed with...",5.2,2011/2012,0.0,1.0,Links to FNDDS,,0.27658


### [A] Nutritional Content of Different Foods

Write a function that describes the nutritional content for each diet.

In [8]:
def read_nutrients():
    """
    Reads the nutrients dataset and ensures column formatting is correct.
    """
    nutrients_df = pd.read_csv("/home/jovyan/Project2_EEP153/Wilbur Atwater min_cost_data - nutrients.csv", index_col=False)

    # Strip any spaces from column names to avoid merge issues
    nutrients_df.columns = nutrients_df.columns.str.strip()

    # Print columns for debugging
    print("Nutrients dataset columns:", nutrients_df.columns)

    return nutrients_df

# Load the nutrients dataset once
nutrients_df = read_nutrients()

Nutrients dataset columns: Index(['ingred_code', 'Ingredient description', 'Capric acid', 'Lauric acid',
       'Myristic acid', 'Palmitic acid', 'Palmitoleic acid', 'Stearic acid',
       'Oleic acid', 'Linoleic Acid', 'Linolenic Acid', 'Stearidonic acid',
       'Eicosenoic acid', 'Arachidonic acid', 'Eicosapentaenoic acid',
       'Erucic acid', 'Docosapentaenoic acid', 'Docosahexaenoic acid',
       'Butyric acid', 'Caproic acid', 'Caprylic acid', 'Alcohol', 'Caffeine',
       'Calcium', 'Carbohydrate', 'Carotene, alpha', 'Carotene, beta',
       'Cholesterol', 'Choline', 'Copper', 'Cryptoxanthin, beta', 'Energy',
       'Fatty acids, total monounsaturated',
       'Fatty acids, total polyunsaturated', 'Fatty acids, total saturated',
       'Dietary Fiber', 'Folate, DFE', 'Folate, food', 'Folate', 'Folic acid',
       'Iron', 'Lutein + zeaxanthin', 'Lycopene', 'Magnesium', 'Niacin',
       'Phosphorus', 'Potassium', 'Protein', 'Retinol', 'Riboflavin',
       'Selenium', 'Sodium', '

In [9]:
def get_diet_nutritional_info(diet_name, nutrients_df):
    """
    Fetches nutrient information for foods in a specified diet by using the ingred_code
    and merging with an existing nutrients dataset.

    Parameters:
        diet_name (str): The diet category (e.g., "frozen", "canned").
        nutrients_df (pd.DataFrame): The dataset containing nutrient information for each ingred_code.

    Returns:
        pd.DataFrame: Nutritional content for the diet, formatted with:
                      - Rows as nutrients (e.g., Protein, Zinc, Water).
                      - Columns as food items in the diet.
    """
    # Read the specific diet dataset
    diet_df = read_sheet(file_paths[diet_name])  

    # Ensure `ingred_code` exists in both datasets
    if "ingred_code" not in diet_df.columns:
        raise ValueError(f"Column 'ingred_code' not found in {diet_name} dataset.")
    
    if "ingred_code" not in nutrients_df.columns:
        raise ValueError("Column 'ingred_code' not found in nutrients dataset.")

    # Merge diet data with nutrient information using `ingred_code`
    merged_df = diet_df.merge(nutrients_df, on="ingred_code", how="left")

    # Add a column for the diet name
    merged_df["Diet"] = diet_name  

    # Pivot the table: Rows = Nutrients, Columns = Food Items
    nutrient_table = merged_df.set_index(["Diet", "parent_desc"]).drop(columns=["ingred_code"]).T

    return nutrient_table

In [10]:
##example for frozen
nutritional_info_df = get_diet_nutritional_info("frozen", nutrients_df)

# Show first 10 rows to verify the diet name
nutritional_info_df.head(10)

Diet,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen
parent_desc,"Yogurt, frozen, NS as to flavor, lowfat milk","Yogurt, frozen, chocolate, lowfat milk","Yogurt, frozen, chocolate, lowfat milk.1","Yogurt, frozen, chocolate, lowfat milk.2","Yogurt, frozen, flavors other than chocolate, lowfat milk","Yogurt, frozen, NS as to flavor, nonfat milk","Yogurt, frozen, NS as to flavor, nonfat milk.1","Yogurt, frozen, NS as to flavor, nonfat milk.2","Yogurt, frozen, chocolate, nonfat milk","Yogurt, frozen, flavors other than chocolate, with sorbet or sorbet-coated",...,Vegetables as ingredient in curry,Vegetables as ingredient in curry.1,Sauce as ingredient in hamburgers,Sauce as ingredient in hamburgers.1,Sauce as ingredient in hamburgers.2,Industrial oil as ingredient in food,Industrial oil as ingredient in food.1,Industrial oil as ingredient in food.2,Industrial oil as ingredient in food.3,"Coleslaw dressing, light"
parent_foodcode,11460150,11460160,11460160,11460160,11460170,11460190,11460190,11460190,11460200,11460250,...,99997810,99997810,99998130,99998130,99998130,99998210,99998210,99998210,99998210,83208000
ingred_desc,"Yogurt, frozen, flavors other than chocolate, ...","Yogurt, plain, low fat, 12 grams protein per 8...","Cocoa, dry powder, unsweetened, processed with...","Sugars, granulated","Yogurt, frozen, flavors other than chocolate, ...","Yogurt, plain, skim milk, 13 grams protein per...","Sugars, granulated","Frozen yogurts, chocolate, nonfat milk, sweete...","Frozen yogurts, chocolate, nonfat milk, sweete...","Ice creams, vanilla, light",...,"Tomatoes, red, ripe, raw, year round average","Potatoes, baked, flesh and skin, without salt","Mustard, prepared, yellow","Salad dressing, mayonnaise, regular",Catsup,"Oil, industrial, canola, high oleic","Oil, industrial, soy, low linolenic","Oil, industrial, soy, ultra low linolenic","Oil, industrial, soy, fully hydrogenated","Salad Dressing, coleslaw, reduced fat"
ingred_wt,100.0,81.8,5.2,13.0,100.0,44.0,13.0,44.0,100.0,70.0,...,15.0,25.0,10.0,30.0,60.0,25.0,25.0,25.0,25.0,100.0
Ingredient description,"Yogurt, frozen, flavors other than chocolate, ...","Yogurt, plain, low fat, 12 grams protein per 8...","Cocoa, dry powder, unsweetened, processed with...","Sugars, granulated","Yogurt, frozen, flavors other than chocolate, ...","Yogurt, plain, skim milk, 13 grams protein per...","Sugars, granulated","Frozen yogurts, chocolate, nonfat milk, sweete...","Frozen yogurts, chocolate, nonfat milk, sweete...","Ice creams, vanilla, light",...,"Tomatoes, red, ripe, raw, year round average","Potatoes, baked, flesh and skin, without salt","Mustard, prepared, yellow","Salad dressing, mayonnaise, regular",Catsup,"Oil, industrial, canola, high oleic","Oil, industrial, soy, low linolenic","Oil, industrial, soy, ultra low linolenic","Oil, industrial, soy, fully hydrogenated","Salad Dressing, coleslaw dressing, reduced fat"
Capric acid,0.069,0.044,0.0,0.0,0.069,0.005,0.0,0.018,0.018,0.117,...,0.0,0.001,0.005,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Lauric acid,0.085,0.053,0.0,0.0,0.085,0.006,0.0,0.009,0.009,0.127,...,0.0,0.004,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Myristic acid,0.262,0.163,0.02,0.0,0.262,0.019,0.0,0.071,0.071,0.471,...,0.0,0.001,0.004,0.055,0.0,0.0,0.095,0.095,0.0,0.02
Palmitic acid,0.685,0.422,3.55,0.0,0.685,0.049,0.0,0.241,0.241,1.26,...,0.02,0.022,0.117,7.909,0.01,3.621,9.815,10.187,10.005,1.96
Palmitoleic acid,0.054,0.034,0.0,0.0,0.054,0.004,0.0,0.018,0.018,0.07,...,0.001,0.001,0.008,0.088,0.001,0.191,0.0,0.095,0.0,0.08
Stearic acid,0.247,0.151,4.08,0.0,0.247,0.018,0.0,0.104,0.104,0.57,...,0.008,0.005,0.038,3.099,0.004,2.01,4.308,3.638,83.094,1.0


#### Nutrition matrix to use in solving the linear program:

In [11]:
# create lower bounds and upper bounds.
def get_b(group) :
    bmin = rda.loc[rda['Constraint Type'].isin(['RDA', 'AI']), group]
    bmax = rda.loc[rda['Constraint Type'].isin(['UL']), group]
    b = pd.concat([bmin, -bmax])
    return b

## Solutions: 
#### We found solutions for liquid, canned, and frozen diets. Carnivore and fresh diets yeilded no solution. 

# Carnivorous Diet Solution

In [12]:
carn_recipes = pd.read_csv("Wilbur Atwater min_cost_data - carnivore_recipes2.csv")
nutrition = pd.read_csv("Wilbur Atwater min_cost_data - nutrients.csv")
# from fndds diet problem: normalize weights to percentage terms. 
carn_recipes['ingred_wt'] = carn_recipes['ingred_wt']/carn_recipes.groupby(['parent_foodcode'])['ingred_wt'].transform("sum")

# we're going to extend the recipes data frame to include the nutrient profiles of its ingredients (in 100g)
carn_df = carn_recipes.merge(nutrition, how="left", on="ingred_code")

# multiply all nutrients per 100g of an ingredient by the weight of that ingredient in a recipe.
numeric_cols = list(carn_df.select_dtypes(include=["number"]).columns)
numeric_cols.remove("ingred_wt")
carn_df[numeric_cols] = carn_df[numeric_cols].mul(carn_df["ingred_wt"], axis=0)

# sum nutrients of food codes (over the multiple ingredients)
# python tip: one can merge dictionaries dict1 dict2 using **, that is: dict_merge = {**dict1, **dict2}. 
#The ** effectively "unpacks" the key value pairs in each dictionary
carn_df = carn_df.groupby('parent_foodcode').agg({**{col: "sum" for col in numeric_cols},
                                        "parent_desc": "first"})

carn_df.index.name = "recipe_id"

food_names_2 = carn_df["parent_desc"]
print(food_names_2.head())
carn_df.head()

recipe_id
3462.515152                   Beef burgundy
4624.304849                     Paella, NFS
4851.119509                     Oyster stew
5061.267196    Jambalaya with meat and rice
5116.786264             Paella with seafood
Name: parent_desc, dtype: object


Unnamed: 0_level_0,parent_foodcode,ingred_code,Capric acid,Lauric acid,Myristic acid,Palmitic acid,Palmitoleic acid,Stearic acid,Oleic acid,Linoleic Acid,...,"Vitamin B-12, added",Vitamin B6,Vitamin C,Vitamin D,Vitamin E,"Vitamin E, added",Vitamin K,Water,Zinc,parent_desc
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3462.515152,3462.515152,0.258368,0.0,0.0,0.0,3.8e-05,0.0,3e-05,0.00012,0.00015,...,0.0,0.000152,0.006565,0.0,0.000216,0.0,0.079401,0.000976,0.00046,Beef burgundy
4624.304849,4624.304849,0.343779,2.3e-05,2.1e-05,0.000206,0.000217,1.4e-05,6.1e-05,0.000188,0.000168,...,0.0,0.000223,0.002028,0.0,0.000223,0.0,0.136126,0.001574,0.000531,"Paella, NFS"
4851.119509,4851.119509,0.347299,6e-06,1.6e-05,5e-06,9.1e-05,1.3e-05,5.6e-05,0.000111,0.000119,...,0.0,5e-05,0.0,0.0,0.000178,0.0,0.028006,0.002132,0.000204,Oyster stew
5061.267196,5061.267196,0.377702,2.2e-05,4.3e-05,2.8e-05,0.000318,0.0,5.2e-05,8.7e-05,9.2e-05,...,0.0,0.000102,0.009248,0.0,0.001384,0.0,0.317126,0.001441,0.001143,Jambalaya with meat and rice
5116.786264,5116.786264,0.380531,2.5e-05,2.3e-05,0.000228,0.00024,1.5e-05,6.7e-05,0.000208,0.000186,...,0.0,0.000247,0.002245,0.0,0.000247,0.0,0.150678,0.001742,0.000587,Paella with seafood


In [13]:
prices_carn = prices_df[["parent_foodcode", "year", "price"]]

prices_carn = prices_carn.set_index(["year", "parent_foodcode"])

# we'll focus on the latest price data
prices_carn = prices_carn.xs("2017/2018", level="year")

# drop rows of prices where the price is "NA"
prices_carn = prices_carn.dropna(subset="price")
common_recipes = carn_df.index.intersection(prices_carn.index)

# python tip: given a list of indices, "loc" both subsets and sorts. 

carn_df = carn_df.loc[common_recipes]
prices_carn = prices_carn.loc[common_recipes]

# lets remap the price dataframe index to be the actual food names.
prices_carn.index = prices_carn.index.map(food_names_2)
A_carn_all = carn_df.T

print(f"We have prices for {prices_carn.shape[0]} unique recipes (FNDDS food codes)")

We have prices for 199 unique recipes (FNDDS food codes)


In [14]:
group = "carnivore"
bmin = rda.loc[rda['Constraint Type'].isin(['RDA', 'AI']), group]
bmax = rda.loc[rda['Constraint Type'].isin(['UL']), group]

# reindex ensures we only keep nutrients in bmin/bmax
Amin = A_carn_all.reindex(bmin.index).dropna(how='all')
Amax = A_carn_all.reindex(bmax.index).dropna(how='all')

b_carn = pd.concat([bmin, -bmax])
A_carn = pd.concat([Amin, -Amax])

#python tip: by typing "=" after the variable name inside the curly braces, it formats the output so we don't have to write f"variable = {variable}"
print(f"{bmin.shape=}")
print(f"{Amin.shape=}")
print(f"{bmax.shape=}")
print(f"{Amax.shape=}")
print(f"{b_carn.shape=}")
print(f"{A_carn.shape=}")
print(f"{prices_carn.shape=}")
group = 'carnivore'
tol = 1e-6

result = lp(prices_carn, -A_carn, -b_carn, method="highs")
result

bmin.shape=(26,)
Amin.shape=(26, 199)
bmax.shape=(2,)
Amax.shape=(2, 199)
b_carn.shape=(28,)
A_carn.shape=(28, 199)
prices_carn.shape=(199, 1)


        message: Optimization terminated successfully. (HiGHS Status 7: Optimal)
        success: True
         status: 0
            fun: 15.29181725310329
              x: [ 0.000e+00  0.000e+00 ...  0.000e+00  0.000e+00]
            nit: 27
          lower:  residual: [ 0.000e+00  0.000e+00 ...  0.000e+00
                              0.000e+00]
                 marginals: [ 6.698e-01  1.980e-01 ...  2.023e+01
                              2.312e+01]
          upper:  residual: [       inf        inf ...        inf
                                    inf]
                 marginals: [ 0.000e+00  0.000e+00 ...  0.000e+00
                              0.000e+00]
          eqlin:  residual: []
                 marginals: []
        ineqlin:  residual: [ 4.265e+02  1.517e+02 ...  0.000e+00
                              6.735e+02]
                 marginals: [-0.000e+00 -0.000e+00 ... -1.422e-02
                             -0.000e+00]
 mip_node_count: 0
 mip_dual_bound: 0.0
        mip_

In [15]:
'''
select from 
['Child_1_3', 'Female_4_8', 'Male_4_8', 'Female_9_13', 'Male_9_13', 
'Female_14_18', 'Male_14_18','Female_19_30', 'Male_19_30', 
'Female_31_50', 'Male_31_50', 'Female_51U', 'Male_51U', 'carnivore']
'''

group = 'carnivore'
tol = 1e-6

result_carn = lp(prices_carn, -A_carn, -b_carn, method="highs")
result_carn
print(f"Cost of diet for {group} is ${result_carn.fun:.2f} per day.")
diet = pd.Series(result_carn.x,index=prices_carn.index)

print("\nYou'll be eating (in 100s of grams or milliliters):")
print(round(diet[diet >= tol], 2))

Cost of diet for carnivore is $15.29 per day.

You'll be eating (in 100s of grams or milliliters):
Pork, spareribs, barbecued, with sauce, lean only eaten    0.34
Tuna, fresh, raw                                           4.00
Oysters, raw                                               0.03
Sweet and sour chicken or turkey                           3.83
Kung pao chicken                                           0.14
Egg, yolk only, raw                                        2.07
Bacon bits                                                 0.56
dtype: float64


In [16]:
tab_carn = pd.DataFrame({"Outcome":A_carn.to_numpy()@diet.to_numpy(),"Recommendation":np.abs(b_carn)})
print("\nWith the following nutritional outcomes of interest:")
print(tab_carn)


With the following nutritional outcomes of interest:
                    Outcome  Recommendation
Nutrient                                   
Energy          2426.548533         2000.00
Protein          197.701757           46.00
Carbohydrate     116.060354           78.00
Dietary Fiber           9.8            9.80
Linoleic Acid     36.429659           12.00
Linolenic Acid     4.068006            1.10
Calcium               525.0          525.00
Iron              18.123637           18.00
Magnesium        273.523868          263.50
Phosphorus       2647.54625          700.00
Potassium            2820.0         2820.00
Zinc              11.946903            8.00
Copper                  0.9            0.90
Selenium         543.490189           55.00
Vitamin A        937.396222          700.00
Vitamin E         13.533267           12.00
Vitamin D         19.097122           15.00
Vitamin C             11.25           11.25
Thiamin            1.560385            1.10
Riboflavin         1.8

#### No solution for carnivore diet