# Project 2: Subsistence Diets

In [12]:
apikey = "E6haYn1Fyu8cDrLwizdiVB0pPh1FqIlqdEze5lfw" 

In [13]:
# %pip install pandas

import pandas as pd
import numpy as np

### [A] Dietary Reference Intakes Function

Write a function that takes as arguments the characteristics of a person (e.g., age, sex) and returns a `pandas.Series' of Dietary Reference Intakes (DRI's) or "Recommended Daily Allowances" (RDA) of a variety of nutrients appropriate for your population of interest.

In [14]:
rda = pd.read_csv("rda.csv", index_col = 0)

options = ['Child_1_3', 'Female_4_8', 'Male_4_8', 'Female_9_13', 'Male_9_13', 'Female_14_18', 'Male_14_18', 'Female_19_30', 'Male_19_30', 'Female_31_50', 'Male_31_50', 'Female_51U', 'Male_51U']

bmin = rda.loc[rda['Constraint Type'].isin(['RDA', 'AI'])]
bmax = rda.loc[rda['Constraint Type'].isin(['UL'])]

In [15]:
def dietary_ref_intake(age = 20,sex = "Female", data = rda):
    """Takes in age (integer) and sex (string), and returns a Series of dietary reference intakes for the chosen population, you can optionally use a different data frame, the min or max RDAs"""

    if age <= 3:
        col = 'Child_1_3'
    if sex in ["M", "male", "m"]:
        sex = "Male"
    elif sex in ["F", "f", "female"]:
        sex = "Female"
    if age <= 3:
        col = 'Child_1_3'
    elif age >= 51:
        col = sex + "_51U" 
    else:
        age_ranges = [(4,8),(9,13),(14,18),(19,30),(31,50),(51,100)]
        for age_range in age_ranges:
            if age >= age_range[0] and age <= age_range[1]:
                col = sex + '_' + str(age_range[0]) + '_' + str(age_range[1])
    return pd.Series(data[col])  

#### Examples

In [16]:
dietary_ref_intake(age=22,sex='M')

Nutrient
Energy            2400.0
Protein             56.0
Carbohydrate       130.0
Dietary Fiber       33.6
Linoleic Acid       17.0
Linolenic Acid       1.6
Calcium           1000.0
Iron                 8.0
Magnesium          400.0
Phosphorus         700.0
Potassium         4700.0
Sodium_max        2300.0
Zinc                11.0
Copper               0.9
Selenium            55.0
Vitamin A          900.0
Vitamin E           15.0
Vitamin D           15.0
Vitamin C           90.0
Thiamin              1.2
Riboflavin           1.3
Niacin              16.0
Vitamin B6           1.3
Vitamin B12          2.4
Choline            550.0
Vitamin K          120.0
Folate             400.0
Energy_max        3100.0
Name: Male_19_30, dtype: float64

In [17]:
dietary_ref_intake(age=80,sex='F', data = bmax)

Nutrient
Sodium_max    2300.0
Energy_max    3100.0
Name: Female_51U, dtype: float64

### [A] Data on Prices for Different Foods

Construct a google spreadsheet of the prices of different food products for each diet (frozen food diet, meat diet, fresh food diet, liquid diet, and canned-food diet)

In [102]:
# Define file paths again if they are not available
file_paths = {
    "carnivore": "~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - carnivore_recipes.csv",
    "canned": "~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - canned_recipes.csv",
    "frozen": "~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - frozen_recipes.csv",
    "fresh": "~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - fresh_recipes.csv",
    "liquid": "~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - liquid_recipes.csv",
    "prices": "~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - prices.csv",
}


# Function to read a dataset
def read_sheet(file_path):
    df = pd.read_csv(file_path, index_col=False)
    df = df.iloc[:, :7].dropna(subset=['parent_foodcode'])
    df = df.reset_index(drop=True)
    return df

# Load prices dataset
prices_df = pd.read_csv(file_paths["prices"])
prices_df['parent_foodcode'] = prices_df['parent_foodcode'].astype(int)  # Convert type for merging

# Function to merge price with a given diet dataset
def read_and_merge_with_prices(diet_name):
    df = read_sheet(file_paths[diet_name])  # Read the diet dataset
    df['parent_foodcode'] = df['parent_foodcode'].astype(int)  # Ensure data type matches for merging
    merged_df = df.merge(prices_df, on="parent_foodcode", how="left")  # Left join to include all diet rows
    return merged_df

# Now run the function without errors
frozen_diet_with_prices = read_and_merge_with_prices("frozen")

In [104]:
#Example of merged diet and price
frozen_diet_with_prices.head()

Unnamed: 0,parent_foodcode,parent_desc,ingred_code,ingred_desc,ingred_wt,year,mod_code,method,method_description,nhanes,price
0,11460150,"Yogurt, frozen, NS as to flavor, lowfat milk",1298,"Yogurt, frozen, flavors other than chocolate, ...",100.0,2013/2014,,2.0,Links to altEC,Extra,0.335298
1,11460160,"Yogurt, frozen, chocolate, lowfat milk",1117,"Yogurt, plain, low fat, 12 grams protein per 8...",81.8,2011/2012,0.0,1.0,Links to FNDDS,,0.27658
2,11460160,"Yogurt, frozen, chocolate, lowfat milk",1117,"Yogurt, plain, low fat, 12 grams protein per 8...",81.8,2013/2014,,1.0,Links to FNDDS,Extra,0.296941
3,11460160,"Yogurt, frozen, chocolate, lowfat milk",1117,"Yogurt, plain, low fat, 12 grams protein per 8...",81.8,2015/2016,,1.0,Links to FNDDS,Extra,0.301143
4,11460160,"Yogurt, frozen, chocolate, lowfat milk",19166,"Cocoa, dry powder, unsweetened, processed with...",5.2,2011/2012,0.0,1.0,Links to FNDDS,,0.27658


### [A] Nutritional Content of Different Foods

Write a function that describes the nutritional content for each diet.

In [None]:
def read_nutrients():
    """
    Reads the nutrients dataset and ensures column formatting is correct.
    """
    nutrients_df = pd.read_csv("~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - nutrients.csv", index_col=False)

    # Strip any spaces from column names to avoid merge issues
    nutrients_df.columns = nutrients_df.columns.str.strip()

    # Print columns for debugging
    print("Nutrients dataset columns:", nutrients_df.columns)

    return nutrients_df

# Load the nutrients dataset once
nutrients_df = read_nutrients()

In [None]:
def get_diet_nutritional_info(diet_name, nutrients_df):
    """
    Fetches nutrient information for foods in a specified diet by using the ingred_code
    and merging with an existing nutrients dataset.

    Parameters:
        diet_name (str): The diet category (e.g., "frozen", "canned").
        nutrients_df (pd.DataFrame): The dataset containing nutrient information for each ingred_code.

    Returns:
        pd.DataFrame: Nutritional content for the diet, formatted with:
                      - Rows as nutrients (e.g., Protein, Zinc, Water).
                      - Columns as food items in the diet.
    """
    # Read the specific diet dataset
    diet_df = read_sheet(file_paths[diet_name])  

    # Ensure `ingred_code` exists in both datasets
    if "ingred_code" not in diet_df.columns:
        raise ValueError(f"Column 'ingred_code' not found in {diet_name} dataset.")
    
    if "ingred_code" not in nutrients_df.columns:
        raise ValueError("Column 'ingred_code' not found in nutrients dataset.")

    # Merge diet data with nutrient information using `ingred_code`
    merged_df = diet_df.merge(nutrients_df, on="ingred_code", how="left")

    # Add a column for the diet name
    merged_df["Diet"] = diet_name  

    # Pivot the table: Rows = Nutrients, Columns = Food Items
    nutrient_table = merged_df.set_index(["Diet", "parent_desc"]).drop(columns=["ingred_code"]).T

    return nutrient_table

In [None]:
##example for frozen
nutritional_info_df = get_diet_nutritional_info("frozen", nutrients_df)

# Show first 10 rows to verify the diet name
nutritional_info_df.head(10)

Liquid Solution
==============

In [None]:
liquid_recipes = pd.read_csv("Wilbur Atwater min_cost_data - liquid_recipes.csv")
nutrition = pd.read_csv("Wilbur Atwater min_cost_data - nutrients.csv")
# from fndds diet problem: normalize weights to percentage terms. 
liquid_recipes['ingred_wt'] = liquid_recipes['ingred_wt']/liquid_recipes.groupby(['parent_foodcode'])['ingred_wt'].transform("sum")

# we're going to extend the recipes data frame to include the nutrient profiles of its ingredients (in 100g)
liquid_df = liquid_recipes.merge(nutrition, how="left", on="ingred_code")

# multiply all nutrients per 100g of an ingredient by the weight of that ingredient in a recipe.
numeric_cols = list(liquid_df.select_dtypes(include=["number"]).columns)
numeric_cols.remove("ingred_wt")
liquid_df[numeric_cols] = liquid_df[numeric_cols].mul(liquid_df["ingred_wt"], axis=0)

# sum nutrients of food codes (over the multiple ingredients)
# python tip: one can merge dictionaries dict1 dict2 using **, that is: dict_merge = {**dict1, **dict2}. 
#The ** effectively "unpacks" the key value pairs in each dictionary
liquid_df = liquid_df.groupby('parent_foodcode').agg({**{col: "sum" for col in numeric_cols},
                                        "parent_desc": "first"})

liquid_df.index.name = "recipe_id"

food_names = liquid_df["parent_desc"]
print(food_names.head())
liquid_df.head()

In [None]:
prices_liquid = prices_df[["parent_foodcode", "year", "price"]]



prices_liquid = prices_liquid.set_index(["year", "parent_foodcode"])
print(prices_liquid.index.levels[0])

# we'll focus on the latest price data
prices_liquid = prices_liquid.xs("2017/2018", level="year")

# drop rows of prices where the price is "NA"
prices_liquid = prices_liquid.dropna(subset="price")
common_recipes = liquid_df.index.intersection(prices_liquid.index)

# python tip: given a list of indices, "loc" both subsets and sorts. 
liquid_df = liquid_df.loc[common_recipes]
prices_liquid = prices_liquid.loc[common_recipes]

# lets remap the price dataframe index to be the actual food names.
prices_liquid.index = prices_liquid.index.map(food_names)
A_liquid_all = liquid_df.T


print(f"We have prices for {prices_liquid.shape[0]} unique recipes (FNDDS food codes)")

In [None]:
# pick a demographic (column from rda dataframe)
'''
select from 
['Child_1_3', 'Female_4_8', 'Male_4_8', 'Female_9_13', 'Male_9_13', 
'Female_14_18', 'Male_14_18','Female_19_30', 'Male_19_30', 
'Female_31_50', 'Male_31_50', 'Female_51U', 'Male_51U']
'''
group = "Female_19_30"

# create lower bounds and upper bounds.
bmin = rda.loc[rda['Constraint Type'].isin(['RDA', 'AI']), group]
bmax = rda.loc[rda['Constraint Type'].isin(['UL']), group]

# reindex ensures we only keep nutrients in bmin/bmax
Amin = A_liquid_all.reindex(bmin.index).dropna(how='all')
Amax = A_liquid_all.reindex(bmax.index).dropna(how='all')

b = pd.concat([bmin, -bmax])
A = pd.concat([Amin, -Amax])

#python tip: by typing "=" after the variable name inside the curly braces, it formats the output so we don't have to write f"variable = {variable}"
print(f"{bmin.shape=}")
print(f"{Amin.shape=}")
print(f"{bmax.shape=}")
print(f"{Amax.shape=}")
print(f"{b.shape=}")
print(f"{A.shape=}")
print(f"{prices_liquid.shape=}")

In [None]:
'''
select from 
['Child_1_3', 'Female_4_8', 'Male_4_8', 'Female_9_13', 'Male_9_13', 
'Female_14_18', 'Male_14_18','Female_19_30', 'Male_19_30', 
'Female_31_50', 'Male_31_50', 'Female_51U', 'Male_51U']
'''

group = 'Female_19_30'
tol = 1e-6

result = lp(prices_liquid, -A, -b, method="highs")
result
print(f"Cost of diet for {group} is ${result.fun:.2f} per day.")
diet = pd.Series(result.x,index=prices_liquid.index)

print("\nYou'll be eating (in 100s of grams or milliliters):")
print(round(diet[diet >= tol], 2))

In [None]:
tab = pd.DataFrame({"Outcome":A.to_numpy()@diet.to_numpy(),"Recommendation":np.abs(b)})
print("\nWith the following nutritional outcomes of interest:")
print(tab)

In [None]:
print("\nConstraining nutrients are:")
excess = tab.diff(axis=1).iloc[:,1]
print(excess.loc[np.abs(excess) < tol].index.tolist())

Fresh Diet Solution
===================

In [None]:
fresh_recipes = pd.read_csv("Wilbur Atwater min_cost_data - fresh_recipes.csv")
nutrition = pd.read_csv("Wilbur Atwater min_cost_data - nutrients.csv")
# from fndds diet problem: normalize weights to percentage terms. 
fresh_recipes['ingred_wt'] = fresh_recipes['ingred_wt']/fresh_recipes.groupby(['parent_foodcode'])['ingred_wt'].transform("sum")

# we're going to extend the recipes data frame to include the nutrient profiles of its ingredients (in 100g)
fresh_df = fresh_recipes.merge(nutrition, how="left", on="ingred_code")

# multiply all nutrients per 100g of an ingredient by the weight of that ingredient in a recipe.
numeric_cols = list(fresh_df.select_dtypes(include=["number"]).columns)
numeric_cols.remove("ingred_wt")
fresh_df[numeric_cols] = fresh_df[numeric_cols].mul(fresh_df["ingred_wt"], axis=0)

# sum nutrients of food codes (over the multiple ingredients)
# python tip: one can merge dictionaries dict1 dict2 using **, that is: dict_merge = {**dict1, **dict2}. 
#The ** effectively "unpacks" the key value pairs in each dictionary
fresh_df = fresh_df.groupby('parent_foodcode').agg({**{col: "sum" for col in numeric_cols},
                                        "parent_desc": "first"})

fresh_df.index.name = "recipe_id"

food_names_2 = fresh_df["parent_desc"]
print(food_names_2.head())
fresh_df.head()

In [None]:
prices_fresh = prices_df[["parent_foodcode", "year", "price"]]

prices_fresh = prices_fresh.set_index(["year", "parent_foodcode"])

# we'll focus on the latest price data
prices_fresh = prices_fresh.xs("2017/2018", level="year")

# drop rows of prices where the price is "NA"
prices_fresh = prices_fresh.dropna(subset="price")
common_recipes = fresh_df.index.intersection(prices_fresh.index)

# python tip: given a list of indices, "loc" both subsets and sorts. 

fresh_df = fresh_df.loc[common_recipes]
prices_fresh = prices_fresh.loc[common_recipes]

# lets remap the price dataframe index to be the actual food names.
prices_fresh.index = prices_fresh.index.map(food_names_2)
A_fresh_all = fresh_df.T

print(f"We have prices for {prices_fresh.shape[0]} unique recipes (FNDDS food codes)")

In [None]:
# pick a demographic (column from rda dataframe)
'''
select from 
['Child_1_3', 'Female_4_8', 'Male_4_8', 'Female_9_13', 'Male_9_13', 
'Female_14_18', 'Male_14_18','Female_19_30', 'Male_19_30', 
'Female_31_50', 'Male_31_50', 'Female_51U', 'Male_51U']
'''
group = "Female_19_30"

# reindex ensures we only keep nutrients in bmin/bmax
Amin = A_fresh_all.reindex(bmin.index).dropna(how='all')
Amax = A_fresh_all.reindex(bmax.index).dropna(how='all')

b = pd.concat([bmin, -bmax])
A_fresh = pd.concat([Amin, -Amax])

#python tip: by typing "=" after the variable name inside the curly braces, it formats the output so we don't have to write f"variable = {variable}"
print(f"{bmin.shape=}")
print(f"{Amin.shape=}")
print(f"{bmax.shape=}")
print(f"{Amax.shape=}")
print(f"{b.shape=}")
print(f"{A.shape=}")
print(f"{prices_fresh.shape=}")

In [None]:
group = 'Female_19_30'
tol = 1e-6

result = lp(prices_fresh, -A_fresh, -b, method="highs")
result

Frozen Diet
===========

In [None]:
frozen_recipes = pd.read_csv("Wilbur Atwater min_cost_data - frozen_recipes.csv")
nutrition = pd.read_csv("Wilbur Atwater min_cost_data - nutrients.csv")
# from fndds diet problem: normalize weights to percentage terms. 
frozen_recipes['ingred_wt'] = frozen_recipes['ingred_wt']/frozen_recipes.groupby(['parent_foodcode'])['ingred_wt'].transform("sum")

# we're going to extend the recipes data frame to include the nutrient profiles of its ingredients (in 100g)
frozen_df = frozen_recipes.merge(nutrition, how="left", on="ingred_code")

# multiply all nutrients per 100g of an ingredient by the weight of that ingredient in a recipe.
numeric_cols = list(frozen_df.select_dtypes(include=["number"]).columns)
numeric_cols.remove("ingred_wt")
frozen_df[numeric_cols] = frozen_df[numeric_cols].mul(frozen_df["ingred_wt"], axis=0)

# sum nutrients of food codes (over the multiple ingredients)
# python tip: one can merge dictionaries dict1 dict2 using **, that is: dict_merge = {**dict1, **dict2}. 
#The ** effectively "unpacks" the key value pairs in each dictionary
frozen_df = frozen_df.groupby('parent_foodcode').agg({**{col: "sum" for col in numeric_cols},
                                        "parent_desc": "first"})

frozen_df.index.name = "recipe_id"

food_names_2 = frozen_df["parent_desc"]
print(food_names_2.head())
frozen_df.head()

In [None]:
prices_frozen = prices_df[["parent_foodcode", "year", "price"]]

prices_frozen = prices_frozen.set_index(["year", "parent_foodcode"])

# we'll focus on the latest price data
prices_frozen = prices_frozen.xs("2017/2018", level="year")

# drop rows of prices where the price is "NA"
prices_frozen = prices_frozen.dropna(subset="price")
common_recipes = frozen_df.index.intersection(prices_frozen.index)

# python tip: given a list of indices, "loc" both subsets and sorts. 

frozen_df = frozen_df.loc[common_recipes]
prices_frozen = prices_frozen.loc[common_recipes]

# lets remap the price dataframe index to be the actual food names.
prices_frozen.index = prices_frozen.index.map(food_names_2)
A_frozen_all = frozen_df.T

print(f"We have prices for {prices_frozen.shape[0]} unique recipes (FNDDS food codes)")

In [None]:
# pick a demographic (column from rda dataframe)
'''
select from 
['Child_1_3', 'Female_4_8', 'Male_4_8', 'Female_9_13', 'Male_9_13', 
'Female_14_18', 'Male_14_18','Female_19_30', 'Male_19_30', 
'Female_31_50', 'Male_31_50', 'Female_51U', 'Male_51U']
'''
group = "Female_19_30"

# reindex ensures we only keep nutrients in bmin/bmax
Amin = A_frozen_all.reindex(bmin.index).dropna(how='all')
Amax = A_frozen_all.reindex(bmax.index).dropna(how='all')

b = pd.concat([bmin, -bmax])
A_frozen = pd.concat([Amin, -Amax])

#python tip: by typing "=" after the variable name inside the curly braces, it formats the output so we don't have to write f"variable = {variable}"
print(f"{bmin.shape=}")
print(f"{Amin.shape=}")
print(f"{bmax.shape=}")
print(f"{Amax.shape=}")
print(f"{b.shape=}")
print(f"{A.shape=}")
print(f"{prices_frozen.shape=}")

In [None]:
group = 'Female_19_30'
tol = 1e-6

result = lp(prices_frozen, -A_frozen, -b, method="highs")
result

Carnivorous Solution
==================

In [None]:
carn_recipes = pd.read_csv("Wilbur Atwater min_cost_data - carnivore_recipes.csv")
nutrition = pd.read_csv("Wilbur Atwater min_cost_data - nutrients.csv")
# from fndds diet problem: normalize weights to percentage terms. 
carn_recipes['ingred_wt'] = carn_recipes['ingred_wt']/carn_recipes.groupby(['parent_foodcode'])['ingred_wt'].transform("sum")

# we're going to extend the recipes data frame to include the nutrient profiles of its ingredients (in 100g)
carn_df = carn_recipes.merge(nutrition, how="left", on="ingred_code")

# multiply all nutrients per 100g of an ingredient by the weight of that ingredient in a recipe.
numeric_cols = list(carn_df.select_dtypes(include=["number"]).columns)
numeric_cols.remove("ingred_wt")
carn_df[numeric_cols] = carn_df[numeric_cols].mul(carn_df["ingred_wt"], axis=0)

# sum nutrients of food codes (over the multiple ingredients)
# python tip: one can merge dictionaries dict1 dict2 using **, that is: dict_merge = {**dict1, **dict2}. 
#The ** effectively "unpacks" the key value pairs in each dictionary
carn_df = carn_df.groupby('parent_foodcode').agg({**{col: "sum" for col in numeric_cols},
                                        "parent_desc": "first"})

carn_df.index.name = "recipe_id"

food_names_2 = carn_df["parent_desc"]
print(food_names_2.head())
carn_df.head()

In [None]:
prices_carn = prices_df[["parent_foodcode", "year", "price"]]

prices_carn = prices_carn.set_index(["year", "parent_foodcode"])

# we'll focus on the latest price data
prices_carn = prices_carn.xs("2017/2018", level="year")

# drop rows of prices where the price is "NA"
prices_carn = prices_carn.dropna(subset="price")
common_recipes = carn_df.index.intersection(prices_carn.index)

# python tip: given a list of indices, "loc" both subsets and sorts. 

carn_df = carn_df.loc[common_recipes]
prices_carn = prices_carn.loc[common_recipes]

# lets remap the price dataframe index to be the actual food names.
prices_carn.index = prices_carn.index.map(food_names_2)
A_carn_all = carn_df.T

print(f"We have prices for {prices_carn.shape[0]} unique recipes (FNDDS food codes)")