# Project 2: Subsistence Diets

In [133]:
from  scipy.optimize import linprog as lp

import pandas as pd
import numpy as np
import warnings
import requests

### [A] Dietary Reference Intakes Function

Write a function that takes as arguments the characteristics of a person (e.g., age, sex) and returns a `pandas.Series' of Dietary Reference Intakes (DRI's) or "Recommended Daily Allowances" (RDA) of a variety of nutrients appropriate for your population of interest.

In [98]:
rda = pd.read_csv("rda.csv", index_col = 0)

options = ['Child_1_3', 'Female_4_8', 'Male_4_8', 'Female_9_13', 'Male_9_13', 'Female_14_18', 'Male_14_18', 'Female_19_30', 'Male_19_30', 'Female_31_50', 'Male_31_50', 'Female_51U', 'Male_51U']

bmin = rda.loc[rda['Constraint Type'].isin(['RDA', 'AI'])]
bmax = rda.loc[rda['Constraint Type'].isin(['UL'])]

In [99]:
def dietary_ref_intake(age = 20,sex = "Female", data = rda):
    """Takes in age (integer) and sex (string), and returns a Series of dietary reference intakes for the chosen population, you can optionally use a different data frame, the min or max RDAs"""

    if age <= 3:
        col = 'Child_1_3'
    if sex in ["M", "male", "m"]:
        sex = "Male"
    elif sex in ["F", "f", "female"]:
        sex = "Female"
    if age <= 3:
        col = 'Child_1_3'
    elif age >= 51:
        col = sex + "_51U" 
    else:
        age_ranges = [(4,8),(9,13),(14,18),(19,30),(31,50),(51,100)]
        for age_range in age_ranges:
            if age >= age_range[0] and age <= age_range[1]:
                col = sex + '_' + str(age_range[0]) + '_' + str(age_range[1])
    return pd.Series(data[col])  

#### Examples

In [100]:
dietary_ref_intake(age=22,sex='M')

Nutrient
Energy            2400.0
Protein             56.0
Carbohydrate       130.0
Dietary Fiber       33.6
Linoleic Acid       17.0
Linolenic Acid       1.6
Calcium           1000.0
Iron                 8.0
Magnesium          400.0
Phosphorus         700.0
Potassium         4700.0
Sodium            2300.0
Zinc                11.0
Copper               0.9
Selenium            55.0
Vitamin A          900.0
Vitamin E           15.0
Vitamin D           15.0
Vitamin C           90.0
Thiamin              1.2
Riboflavin           1.3
Niacin              16.0
Vitamin B6           1.3
Vitamin B12          2.4
Choline            550.0
Vitamin K          120.0
Folate             400.0
Energy            3100.0
Name: Male_19_30, dtype: float64

In [101]:
dietary_ref_intake(age=80,sex='F', data = bmax)

Nutrient
Sodium    2300.0
Energy    3100.0
Name: Female_51U, dtype: float64

### [A] Data on Prices for Different Foods

Construct a google spreadsheet of the prices of different food products for each diet (frozen food diet, meat diet, fresh food diet, liquid diet, and canned-food diet)

In [102]:
# Define file paths again if they are not available
file_paths = {
    "carnivore": "~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - carnivore_recipes.csv",
    "canned": "~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - canned_recipes.csv",
    "frozen": "~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - frozen_recipes.csv",
    "fresh": "~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - fresh_recipes.csv",
    "liquid": "~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - liquid_recipes.csv",
    "prices": "~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - prices.csv"
}


# Function to read a dataset
def read_sheet(file_path):
    df = pd.read_csv(file_path, index_col=False)
    df = df.iloc[:, :7].dropna(subset=['parent_foodcode'])
    df = df.reset_index(drop=True)
    return df

# Load prices dataset
prices_df = pd.read_csv(file_paths["prices"])
prices_df['parent_foodcode'] = prices_df['parent_foodcode'].astype(int)  # Convert type for merging

# Function to merge price with a given diet dataset
def read_and_merge_with_prices(diet_name):
    df = read_sheet(file_paths[diet_name])  # Read the diet dataset
    df['parent_foodcode'] = df['parent_foodcode'].astype(int)  # Ensure data type matches for merging
    merged_df = df.merge(prices_df, on="parent_foodcode", how="left")  # Left join to include all diet rows
    return merged_df

# Now run the function without errors
frozen_diet_with_prices = read_and_merge_with_prices("frozen")


In [103]:
#Example of merged diet and price
frozen_diet_with_prices.head()

Unnamed: 0,parent_foodcode,parent_desc,ingred_code,ingred_desc,ingred_wt,year,mod_code,method,method_description,nhanes,price
0,11460150,"Yogurt, frozen, NS as to flavor, lowfat milk",1298,"Yogurt, frozen, flavors other than chocolate, ...",100.0,2013/2014,,2.0,Links to altEC,Extra,0.335298
1,11460160,"Yogurt, frozen, chocolate, lowfat milk",1117,"Yogurt, plain, low fat, 12 grams protein per 8...",81.8,2011/2012,0.0,1.0,Links to FNDDS,,0.27658
2,11460160,"Yogurt, frozen, chocolate, lowfat milk",1117,"Yogurt, plain, low fat, 12 grams protein per 8...",81.8,2013/2014,,1.0,Links to FNDDS,Extra,0.296941
3,11460160,"Yogurt, frozen, chocolate, lowfat milk",1117,"Yogurt, plain, low fat, 12 grams protein per 8...",81.8,2015/2016,,1.0,Links to FNDDS,Extra,0.301143
4,11460160,"Yogurt, frozen, chocolate, lowfat milk",19166,"Cocoa, dry powder, unsweetened, processed with...",5.2,2011/2012,0.0,1.0,Links to FNDDS,,0.27658


### [A] Nutritional Content of Different Foods

Write a function that describes the nutritional content for each diet.

In [104]:
def read_nutrients():
    """
    Reads the nutrients dataset and ensures column formatting is correct.
    """
    nutrients_df = pd.read_csv("~/Documents/GitHub/Project2_EEP153/Wilbur Atwater min_cost_data - nutrients.csv", index_col=False)

    # Strip any spaces from column names to avoid merge issues
    nutrients_df.columns = nutrients_df.columns.str.strip()

    # Print columns for debugging
    print("Nutrients dataset columns:", nutrients_df.columns)

    return nutrients_df

# Load the nutrients dataset once
nutrients_df = read_nutrients()

Nutrients dataset columns: Index(['ingred_code', 'Ingredient description', 'Capric acid', 'Lauric acid',
       'Myristic acid', 'Palmitic acid', 'Palmitoleic acid', 'Stearic acid',
       'Oleic acid', 'Linoleic Acid', 'Linolenic Acid', 'Stearidonic acid',
       'Eicosenoic acid', 'Arachidonic acid', 'Eicosapentaenoic acid',
       'Erucic acid', 'Docosapentaenoic acid', 'Docosahexaenoic acid',
       'Butyric acid', 'Caproic acid', 'Caprylic acid', 'Alcohol', 'Caffeine',
       'Calcium', 'Carbohydrate', 'Carotene, alpha', 'Carotene, beta',
       'Cholesterol', 'Choline', 'Copper', 'Cryptoxanthin, beta', 'Energy',
       'Fatty acids, total monounsaturated',
       'Fatty acids, total polyunsaturated', 'Fatty acids, total saturated',
       'Dietary Fiber', 'Folate, DFE', 'Folate, food', 'Folate', 'Folic acid',
       'Iron', 'Lutein + zeaxanthin', 'Lycopene', 'Magnesium', 'Niacin',
       'Phosphorus', 'Potassium', 'Protein', 'Retinol', 'Riboflavin',
       'Selenium', 'Sodium', '

In [105]:
def get_diet_nutritional_info(diet_name, nutrients_df):
    """
    Fetches nutrient information for foods in a specified diet by using the ingred_code
    and merging with an existing nutrients dataset.

    Parameters:
        diet_name (str): The diet category (e.g., "frozen", "canned").
        nutrients_df (pd.DataFrame): The dataset containing nutrient information for each ingred_code.

    Returns:
        pd.DataFrame: Nutritional content for the diet, formatted with:
                      - Rows as nutrients (e.g., Protein, Zinc, Water).
                      - Columns as food items in the diet.
    """
    # Read the specific diet dataset
    diet_df = read_sheet(file_paths[diet_name])  

    # Ensure `ingred_code` exists in both datasets
    if "ingred_code" not in diet_df.columns:
        raise ValueError(f"Column 'ingred_code' not found in {diet_name} dataset.")
    
    if "ingred_code" not in nutrients_df.columns:
        raise ValueError("Column 'ingred_code' not found in nutrients dataset.")

    # Merge diet data with nutrient information using `ingred_code`
    merged_df = diet_df.merge(nutrients_df, on="ingred_code", how="left")

    # Add a column for the diet name
    merged_df["Diet"] = diet_name  

    # Pivot the table: Rows = Nutrients, Columns = Food Items
    nutrient_table = merged_df.set_index(["Diet", "parent_desc"]).drop(columns=["ingred_code"]).T

    return nutrient_table

In [106]:
##example for frozen
nutritional_info_df = get_diet_nutritional_info("frozen", nutrients_df)

# Show first 10 rows to verify the diet name
nutritional_info_df.head(10)

Diet,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen,frozen
parent_desc,"Yogurt, frozen, NS as to flavor, lowfat milk","Yogurt, frozen, chocolate, lowfat milk","Yogurt, frozen, chocolate, lowfat milk.1","Yogurt, frozen, chocolate, lowfat milk.2","Yogurt, frozen, flavors other than chocolate, lowfat milk","Yogurt, frozen, NS as to flavor, nonfat milk","Yogurt, frozen, NS as to flavor, nonfat milk.1","Yogurt, frozen, NS as to flavor, nonfat milk.2","Yogurt, frozen, chocolate, nonfat milk","Yogurt, frozen, flavors other than chocolate, with sorbet or sorbet-coated",...,Vegetables as ingredient in curry,Vegetables as ingredient in curry.1,Sauce as ingredient in hamburgers,Sauce as ingredient in hamburgers.1,Sauce as ingredient in hamburgers.2,Industrial oil as ingredient in food,Industrial oil as ingredient in food.1,Industrial oil as ingredient in food.2,Industrial oil as ingredient in food.3,"Coleslaw dressing, light"
parent_foodcode,11460150,11460160,11460160,11460160,11460170,11460190,11460190,11460190,11460200,11460250,...,99997810,99997810,99998130,99998130,99998130,99998210,99998210,99998210,99998210,83208000
ingred_desc,"Yogurt, frozen, flavors other than chocolate, ...","Yogurt, plain, low fat, 12 grams protein per 8...","Cocoa, dry powder, unsweetened, processed with...","Sugars, granulated","Yogurt, frozen, flavors other than chocolate, ...","Yogurt, plain, skim milk, 13 grams protein per...","Sugars, granulated","Frozen yogurts, chocolate, nonfat milk, sweete...","Frozen yogurts, chocolate, nonfat milk, sweete...","Ice creams, vanilla, light",...,"Tomatoes, red, ripe, raw, year round average","Potatoes, baked, flesh and skin, without salt","Mustard, prepared, yellow","Salad dressing, mayonnaise, regular",Catsup,"Oil, industrial, canola, high oleic","Oil, industrial, soy, low linolenic","Oil, industrial, soy, ultra low linolenic","Oil, industrial, soy, fully hydrogenated","Salad Dressing, coleslaw, reduced fat"
ingred_wt,100.0,81.8,5.2,13.0,100.0,44.0,13.0,44.0,100.0,70.0,...,15.0,25.0,10.0,30.0,60.0,25.0,25.0,25.0,25.0,100.0
Ingredient description,"Yogurt, frozen, flavors other than chocolate, ...","Yogurt, plain, low fat, 12 grams protein per 8...","Cocoa, dry powder, unsweetened, processed with...","Sugars, granulated","Yogurt, frozen, flavors other than chocolate, ...","Yogurt, plain, skim milk, 13 grams protein per...","Sugars, granulated","Frozen yogurts, chocolate, nonfat milk, sweete...","Frozen yogurts, chocolate, nonfat milk, sweete...","Ice creams, vanilla, light",...,"Tomatoes, red, ripe, raw, year round average","Potatoes, baked, flesh and skin, without salt","Mustard, prepared, yellow","Salad dressing, mayonnaise, regular",Catsup,"Oil, industrial, canola, high oleic","Oil, industrial, soy, low linolenic","Oil, industrial, soy, ultra low linolenic","Oil, industrial, soy, fully hydrogenated","Salad Dressing, coleslaw dressing, reduced fat"
Capric acid,0.069,0.044,0.0,0.0,0.069,0.005,0.0,0.018,0.018,0.117,...,0.0,0.001,0.005,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Lauric acid,0.085,0.053,0.0,0.0,0.085,0.006,0.0,0.009,0.009,0.127,...,0.0,0.004,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Myristic acid,0.262,0.163,0.02,0.0,0.262,0.019,0.0,0.071,0.071,0.471,...,0.0,0.001,0.004,0.055,0.0,0.0,0.095,0.095,0.0,0.02
Palmitic acid,0.685,0.422,3.55,0.0,0.685,0.049,0.0,0.241,0.241,1.26,...,0.02,0.022,0.117,7.909,0.01,3.621,9.815,10.187,10.005,1.96
Palmitoleic acid,0.054,0.034,0.0,0.0,0.054,0.004,0.0,0.018,0.018,0.07,...,0.001,0.001,0.008,0.088,0.001,0.191,0.0,0.095,0.0,0.08
Stearic acid,0.247,0.151,4.08,0.0,0.247,0.018,0.0,0.104,0.104,0.57,...,0.008,0.005,0.038,3.099,0.004,2.01,4.308,3.638,83.094,1.0


#### Nutrition matrix to use in solving the linear program:

In [107]:
# create lower bounds and upper bounds.
def get_b(group) :
    bmin = rda.loc[rda['Constraint Type'].isin(['RDA', 'AI']), group]
    bmax = rda.loc[rda['Constraint Type'].isin(['UL']), group]
    b = pd.concat([bmin, -bmax])
    return b

## Solutions: 
#### We found solutions for liquid, canned, and frozen diets. Carnivore and fresh diets yeilded no solution. 

# Liquid Diet Solution

In [108]:
liquid_recipes = pd.read_csv("Wilbur Atwater min_cost_data - liquid_recipes.csv")
nutrition = pd.read_csv("Wilbur Atwater min_cost_data - nutrients.csv")
# from fndds diet problem: normalize weights to percentage terms. 
liquid_recipes['ingred_wt'] = liquid_recipes['ingred_wt']/liquid_recipes.groupby(['parent_foodcode'])['ingred_wt'].transform("sum")

# we're going to extend the recipes data frame to include the nutrient profiles of its ingredients (in 100g)
liquid_df = liquid_recipes.merge(nutrition, how="left", on="ingred_code")

# multiply all nutrients per 100g of an ingredient by the weight of that ingredient in a recipe.
numeric_cols = list(liquid_df.select_dtypes(include=["number"]).columns)
numeric_cols.remove("ingred_wt")
liquid_df[numeric_cols] = liquid_df[numeric_cols].mul(liquid_df["ingred_wt"], axis=0)

# sum nutrients of food codes (over the multiple ingredients)
# python tip: one can merge dictionaries dict1 dict2 using **, that is: dict_merge = {**dict1, **dict2}. 
#The ** effectively "unpacks" the key value pairs in each dictionary
liquid_df = liquid_df.groupby('parent_foodcode').agg({**{col: "sum" for col in numeric_cols},
                                        "parent_desc": "first"})

liquid_df.index.name = "recipe_id"

food_names = liquid_df["parent_desc"]
print(food_names.head())
liquid_df.head()

recipe_id
353.309772                                   Fruit smoothie, NFS
353.903556            Fruit smoothie, with whole fruit and dairy
382.332989     Fruit smoothie, with whole fruit and dairy, ad...
1258.921519                  Yogurt parfait, low fat, with fruit
1262.662207                        Yogurt, whole milk, baby food
Name: parent_desc, dtype: object


Unnamed: 0_level_0,parent_foodcode,ingred_code,Capric acid,Lauric acid,Myristic acid,Palmitic acid,Palmitoleic acid,Stearic acid,Oleic acid,Linoleic Acid,...,"Vitamin B-12, added",Vitamin B6,Vitamin C,Vitamin D,Vitamin E,"Vitamin E, added",Vitamin K,Water,Zinc,parent_desc
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
353.309772,353.309772,30.560832,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.305814,0.0,0.0,0.0,0.0,0.0,"Fruit smoothie, NFS"
353.903556,353.903556,30.612167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.306328,0.0,0.0,0.0,0.0,0.0,"Fruit smoothie, with whole fruit and dairy"
382.332989,382.332989,33.071245,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.330935,0.0,0.0,0.0,0.0,0.0,"Fruit smoothie, with whole fruit and dairy, ad..."
1258.921519,1258.921519,109.913989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.099879,0.0,0.0,0.0,0.0,0.0,"Yogurt parfait, low fat, with fruit"
1262.662207,1262.662207,109.913989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.099879,0.0,0.0,0.0,0.0,0.0,"Yogurt, whole milk, baby food"


In [109]:
prices_liquid = prices_df[["parent_foodcode", "year", "price"]]

prices_liquid = prices_liquid.set_index(["year", "parent_foodcode"])
print(prices_liquid.index.levels[0])

# we'll focus on the latest price data
prices_liquid = prices_liquid.xs("2017/2018", level="year")

# drop rows of prices where the price is "NA"
prices_liquid = prices_liquid.dropna(subset="price")
common_recipes = liquid_df.index.intersection(prices_liquid.index)

# python tip: given a list of indices, "loc" both subsets and sorts. 
liquid_df = liquid_df.loc[common_recipes]
prices_liquid = prices_liquid.loc[common_recipes]

# lets remap the price dataframe index to be the actual food names.
prices_liquid.index = prices_liquid.index.map(food_names)
A_liquid_all = liquid_df.T

print(f"We have prices for {prices_liquid.shape[0]} unique recipes (FNDDS food codes)")

Index(['2011/2012', '2013/2014', '2015/2016', '2017/2018'], dtype='object', name='year')
We have prices for 381 unique recipes (FNDDS food codes)


In [110]:
# pick a demographic (column from rda dataframe)
'''
select from 
['Child_1_3', 'Female_4_8', 'Male_4_8', 'Female_9_13', 'Male_9_13', 
'Female_14_18', 'Male_14_18','Female_19_30', 'Male_19_30', 
'Female_31_50', 'Male_31_50', 'Female_51U', 'Male_51U']
'''
group = "Female_19_30"
bmin = rda.loc[rda['Constraint Type'].isin(['RDA', 'AI']), group]
bmax = rda.loc[rda['Constraint Type'].isin(['UL']), group]

# reindex ensures we only keep nutrients in bmin/bmax
Amin = A_liquid_all.reindex(bmin.index).dropna(how='all')
Amax = A_liquid_all.reindex(bmax.index).dropna(how='all')

b_liquid = get_b(group)
A_liquid = pd.concat([Amin, -Amax])

#python tip: by typing "=" after the variable name inside the curly braces, it formats the output so we don't have to write f"variable = {variable}"
print(f"{bmin.shape=}")
print(f"{Amin.shape=}")
print(f"{bmax.shape=}")
print(f"{Amax.shape=}")
print(f"{b_liquid.shape=}")
print(f"{A_liquid.shape=}")
print(f"{prices_liquid.shape=}")

bmin.shape=(26,)
Amin.shape=(26, 381)
bmax.shape=(2,)
Amax.shape=(2, 381)
b_liquid.shape=(28,)
A_liquid.shape=(28, 381)
prices_liquid.shape=(381, 1)


In [111]:
'''
select from 
['Child_1_3', 'Female_4_8', 'Male_4_8', 'Female_9_13', 'Male_9_13', 
'Female_14_18', 'Male_14_18','Female_19_30', 'Male_19_30', 
'Female_31_50', 'Male_31_50', 'Female_51U', 'Male_51U']
'''

group = 'Female_19_30'
tol = 1e-6

result_liquid = lp(prices_liquid, -A_liquid, -b_liquid, method="highs")
result_liquid
print(f"Cost of diet for {group} is ${result_liquid.fun:.2f} per day.")
diet = pd.Series(result_liquid.x,index=prices_liquid.index)

print("\nYou'll be eating (in 100s of grams or milliliters):")
print(round(diet[diet >= tol], 2))

Cost of diet for Female_19_30 is $4.63 per day.

You'll be eating (in 100s of grams or milliliters):
Soy milk, light, chocolate                                                              20.17
Orange juice, 100%,  freshly squeezed                                                    0.72
Vegetable noodle soup, reduced sodium, canned, prepared with water or ready-to-serve     5.72
Corn oil                                                                                 0.75
Molasses                                                                                 0.06
Tea, iced, brewed, black, unsweetened                                                   11.20
Nutritional powder mix, high protein (Herbalife)                                         0.09
dtype: float64


In [112]:
tab_liquid = pd.DataFrame({"Outcome":A_liquid.to_numpy()@diet.to_numpy(),"Recommendation":np.abs(b_liquid)})
print("\nWith the following nutritional outcomes of interest:")
print(tab_liquid)


With the following nutritional outcomes of interest:
                    Outcome  Recommendation
Nutrient                                   
Energy               2000.0          2000.0
Protein            58.93592            46.0
Carbohydrate      234.71626           130.0
Dietary Fiber          28.0            28.0
Linoleic Acid      48.96892            12.0
Linolenic Acid     2.092766             1.1
Calcium         2676.872671          1000.0
Iron              18.613537            18.0
Magnesium        476.001765           310.0
Phosphorus      2098.015412           700.0
Potassium            4700.0          4700.0
Zinc               8.368895             8.0
Copper             4.877076             0.9
Selenium               55.0            55.0
Vitamin A       1588.448858           700.0
Vitamin E         18.783996            15.0
Vitamin D         25.281843            15.0
Vitamin C              75.0            75.0
Thiamin            1.342018             1.1
Riboflavin         4.6

In [113]:
print("\nConstraining nutrients are:")
excess = tab_liquid.diff(axis=1).iloc[:,1]
print(excess.loc[np.abs(excess) < tol].index.tolist())


Constraining nutrients are:
['Energy', 'Dietary Fiber', 'Potassium', 'Selenium', 'Vitamin C', 'Folate']


# Canned Diet Solution

In [114]:
can_recipes = pd.read_csv("Wilbur Atwater min_cost_data - canned_recipes.csv")
# from fndds diet problem: normalize weights to percentage terms. 
can_recipes["ingred_wt"] = pd.to_numeric(can_recipes["ingred_wt"], errors="coerce")

can_recipes['ingred_wt'] = can_recipes['ingred_wt']/can_recipes.groupby(['parent_foodcode'])['ingred_wt'].transform("sum")

# we're going to extend the recipes data frame to include the nutrient profiles of its ingredients (in 100g)
can_recipes["ingred_code"] = can_recipes["ingred_code"].astype(str)
nutrition["ingred_code"] = nutrition["ingred_code"].astype(str)
can_df = can_recipes.merge(nutrition, how="left", on="ingred_code")

# multiply all nutrients per 100g of an ingredient by the weight of that ingredient in a recipe.
numeric_cols = list(can_df.select_dtypes(include=["number"]).columns)
numeric_cols.remove("ingred_wt")
can_df[numeric_cols] = can_df[numeric_cols].mul(can_df["ingred_wt"], axis=0)

# sum nutrients of food codes (over the multiple ingredients)
# python tip: one can merge dictionaries dict1 dict2 using **, that is: dict_merge = {**dict1, **dict2}. 
#The ** effectively "unpacks" the key value pairs in each dictionary
can_df = can_df.groupby('parent_foodcode').agg({**{col: "sum" for col in numeric_cols},
                                        "parent_desc": "first"})

can_df.index.name = "recipe_id"

food_names_can = can_df["parent_desc"]
print(food_names.head())
can_df.head()

recipe_id
353.309772                                   Fruit smoothie, NFS
353.903556            Fruit smoothie, with whole fruit and dairy
382.332989     Fruit smoothie, with whole fruit and dairy, ad...
1258.921519                  Yogurt parfait, low fat, with fruit
1262.662207                        Yogurt, whole milk, baby food
Name: parent_desc, dtype: object


Unnamed: 0_level_0,Capric acid,Lauric acid,Myristic acid,Palmitic acid,Palmitoleic acid,Stearic acid,Oleic acid,Linoleic Acid,Linolenic Acid,Stearidonic acid,...,"Vitamin B-12, added",Vitamin B6,Vitamin C,Vitamin D,Vitamin E,"Vitamin E, added",Vitamin K,Water,Zinc,parent_desc
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14710100,0.056328,0.052383,0.275363,0.812222,0.068006,0.339095,0.680682,0.061338,0.033125,0.0,...,0.0,0.028404,0.125851,0.605551,0.298006,0.0,2.45081,85.563055,0.284704,"Cheddar cheese soup, home recipe, canned or re..."
21401400,0.02,0.02,0.33,2.59,0.43,1.29,4.24,0.32,0.09,0.0,...,0.0,0.38,0.0,0.2,0.17,0.0,1.5,61.56,6.67,"Beef, roast, canned"
21416150,0.1,0.07,0.42,3.04,0.8,2.29,5.0,0.42,0.21,0.0,...,0.0,0.13,0.0,0.2,0.15,0.0,1.6,57.72,3.57,"Corned beef, canned, ready-to-eat"
22311500,0.01115,0.00595,0.0512,1.2976,0.2114,0.62335,2.64355,0.6089,0.05375,0.0,...,0.0,0.4291,0.0,0.6,0.2005,0.0,0.0,71.8895,1.8115,"Ham, smoked or cured, canned, NS as to fat eaten"
22311510,0.01115,0.00595,0.0512,1.2976,0.2114,0.62335,2.64355,0.6089,0.05375,0.0,...,0.0,0.4291,0.0,0.6,0.2005,0.0,0.0,71.8895,1.8115,"Ham, smoked or cured, canned, lean and fat eaten"


In [115]:
prices_can = prices_df[["parent_foodcode", "year", "price"]]
prices_can = prices_can.set_index(["year", "parent_foodcode"])

prices_can = prices_can.xs("2017/2018", level="year")
prices_can = prices_can.dropna(subset="price")
can_df.index = can_df.index.astype(str)
prices_can.index = prices_can.index.astype(str)

common_can_recipes = can_df.index.intersection(prices_can.index)


print(common_can_recipes)

can_df = can_df.loc[common_can_recipes]
prices_can = prices_can.loc[common_can_recipes]

prices_can.index = prices_can.index.map(food_names_can)
A_can_all = can_df.T

print(f"We have prices for {prices_can.shape[0]} unique recipes (FNDDS food codes)")

Index(['14710100', '21416150', '24198570', '25221910', '25230530', '25230540',
       '25230550', '26100180', '26101180', '26121180',
       ...
       '95312410', '95312560', '95312600', '95312700', '95313200', '95320200',
       '95320500', '95322200', '95322500', '95330100'],
      dtype='object', length=209)
We have prices for 209 unique recipes (FNDDS food codes)


In [116]:
group = "Female_19_30"

# reindex ensures we only keep nutrients in bmin/bmax
Amin = A_can_all.reindex(bmin.index).dropna(how='all')
Amax = A_can_all.reindex(bmax.index).dropna(how='all')

b = pd.concat([bmin, -bmax])
A_can = pd.concat([Amin, -Amax])

#python tip: by typing "=" after the variable name inside the curly braces, it formats the output so we don't have to write f"variable = {variable}"
print(f"{bmin.shape=}")
print(f"{Amin.shape=}")
print(f"{bmax.shape=}")
print(f"{Amax.shape=}")
print(f"{b.shape=}")
print(f"{A_can.shape=}")
print(f"{prices_can.shape=}")

bmin.shape=(26,)
Amin.shape=(26, 209)
bmax.shape=(2,)
Amax.shape=(2, 209)
b.shape=(28,)
A_can.shape=(28, 209)
prices_can.shape=(209, 1)


In [117]:
group = 'Female_19_30'
tol = 1e-6

result_canned = lp(prices_can, -A_can, -b, method="highs")
result_canned
print(f"Cost of diet for {group} is ${result_canned.fun:.2f} per day.")
diet = pd.Series(result_canned.x,index=prices_can.index)

print("\nYou'll be eating (in 100s of grams or milliliters):")
print(round(diet[diet >= tol], 2))

Cost of diet for Female_19_30 is $5.40 per day.

You'll be eating (in 100s of grams or milliliters):
Kidney beans, from canned, reduced sodium                                 2.70
Refried beans, from canned, reduced sodium                                6.02
Coconut cream, canned, sweetened                                          1.49
Orange juice, 100%, with calcium added, canned, bottled or in a carton    5.88
Nutritional powder mix, high protein (Herbalife)                          0.91
dtype: float64


In [118]:
tab = pd.DataFrame({"Outcome":A_can.to_numpy()@diet.to_numpy(),"Recommendation":np.abs(b)})
print("\nWith the following nutritional outcomes of interest:")
print(tab)


With the following nutritional outcomes of interest:
                    Outcome  Recommendation
Nutrient                                   
Energy               2000.0          2000.0
Protein           97.820619            46.0
Carbohydrate     284.221252           130.0
Dietary Fiber     38.122663            28.0
Linoleic Acid          12.0            12.0
Linolenic Acid     2.361702             1.1
Calcium         1726.516146          1000.0
Iron              33.272533            18.0
Magnesium        831.644199           310.0
Phosphorus      2433.207667           700.0
Potassium            4700.0          4700.0
Zinc              23.393584             8.0
Copper             4.022198             0.9
Selenium         125.803897            55.0
Vitamin A       1710.103308           700.0
Vitamin E         18.627736            15.0
Vitamin D              15.0            15.0
Vitamin C        272.546382            75.0
Thiamin            2.977777             1.1
Riboflavin         2.8

In [119]:
print("\nConstraining nutrients are:")
excess = tab.diff(axis=1).iloc[:,1]
print(excess.loc[np.abs(excess) < tol].index.tolist())


Constraining nutrients are:
['Energy', 'Linoleic Acid', 'Potassium', 'Vitamin D']


Fresh Diet Solution
===================

In [120]:
fresh_recipes = pd.read_csv("Wilbur Atwater min_cost_data - fresh_recipes.csv")
nutrition = pd.read_csv("Wilbur Atwater min_cost_data - nutrients.csv")
# from fndds diet problem: normalize weights to percentage terms. 
fresh_recipes['ingred_wt'] = fresh_recipes['ingred_wt']/fresh_recipes.groupby(['parent_foodcode'])['ingred_wt'].transform("sum")

# we're going to extend the recipes data frame to include the nutrient profiles of its ingredients (in 100g)
fresh_df = fresh_recipes.merge(nutrition, how="left", on="ingred_code")

# multiply all nutrients per 100g of an ingredient by the weight of that ingredient in a recipe.
numeric_cols = list(fresh_df.select_dtypes(include=["number"]).columns)
numeric_cols.remove("ingred_wt")
fresh_df[numeric_cols] = fresh_df[numeric_cols].mul(fresh_df["ingred_wt"], axis=0)

# sum nutrients of food codes (over the multiple ingredients)
# python tip: one can merge dictionaries dict1 dict2 using **, that is: dict_merge = {**dict1, **dict2}. 
#The ** effectively "unpacks" the key value pairs in each dictionary
fresh_df = fresh_df.groupby('parent_foodcode').agg({**{col: "sum" for col in numeric_cols},
                                        "parent_desc": "first"})

fresh_df.index.name = "recipe_id"

food_names_2 = fresh_df["parent_desc"]
print(food_names_2.head())
fresh_df.head()

recipe_id
11169.565415                           Tuna, fresh, coated, fried
11199.028926     Tuna, fresh, coated, baked or broiled, fat added
11725.577014    Tuna, fresh, coated, baked or broiled, no adde...
17236.529516               Fresh corn custard, Puerto Rican style
89940.382826                          Onions, from fresh, creamed
Name: parent_desc, dtype: object


Unnamed: 0_level_0,parent_foodcode,ingred_code,Capric acid,Lauric acid,Myristic acid,Palmitic acid,Palmitoleic acid,Stearic acid,Oleic acid,Linoleic Acid,...,"Vitamin B-12, added",Vitamin B6,Vitamin C,Vitamin D,Vitamin E,"Vitamin E, added",Vitamin K,Water,Zinc,parent_desc
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11169.565415,11169.565415,0.866979,1.5e-05,4e-05,1.3e-05,0.000228,3.3e-05,0.00014,0.000276,0.000296,...,0.0,0.000124,0.0,0.0,0.000444,0.0,0.069914,0.005321,0.000508,"Tuna, fresh, coated, fried"
11199.028926,11199.028926,0.869266,1.5e-05,4e-05,1.3e-05,0.000228,3.3e-05,0.00014,0.000277,0.000297,...,0.0,0.000125,0.0,0.0,0.000445,0.0,0.070098,0.005335,0.00051,"Tuna, fresh, coated, baked or broiled, fat added"
11725.577014,11725.577014,0.910137,1.6e-05,4.2e-05,1.3e-05,0.000239,3.5e-05,0.000147,0.00029,0.000311,...,0.0,0.00013,0.0,0.0,0.000466,0.0,0.073394,0.005586,0.000534,"Tuna, fresh, coated, baked or broiled, no adde..."
17236.529516,17236.529516,2.61598,1.3e-05,1.3e-05,5.2e-05,0.000522,0.000117,0.000144,0.009179,0.004073,...,0.0,0.00047,0.027399,0.0,0.003262,0.0,0.0,0.012878,0.007176,"Fresh corn custard, Puerto Rican style"
89940.382826,89940.382826,2.441264,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000239,0.000119,"Onions, from fresh, creamed"


In [121]:
prices_fresh = prices_df[["parent_foodcode", "year", "price"]]

prices_fresh = prices_fresh.set_index(["year", "parent_foodcode"])

# we'll focus on the latest price data
prices_fresh = prices_fresh.xs("2017/2018", level="year")

# drop rows of prices where the price is "NA"
prices_fresh = prices_fresh.dropna(subset="price")
common_recipes = fresh_df.index.intersection(prices_fresh.index)

# python tip: given a list of indices, "loc" both subsets and sorts. 

fresh_df = fresh_df.loc[common_recipes]
prices_fresh = prices_fresh.loc[common_recipes]

# lets remap the price dataframe index to be the actual food names.
prices_fresh.index = prices_fresh.index.map(food_names_2)
A_fresh_all = fresh_df.T

print(f"We have prices for {prices_fresh.shape[0]} unique recipes (FNDDS food codes)")

We have prices for 7 unique recipes (FNDDS food codes)


In [122]:
# pick a demographic (column from rda dataframe)
'''
select from 
['Child_1_3', 'Female_4_8', 'Male_4_8', 'Female_9_13', 'Male_9_13', 
'Female_14_18', 'Male_14_18','Female_19_30', 'Male_19_30', 
'Female_31_50', 'Male_31_50', 'Female_51U', 'Male_51U']
'''
group = "Female_19_30"

# reindex ensures we only keep nutrients in bmin/bmax
Amin = A_fresh_all.reindex(bmin.index).dropna(how='all')
Amax = A_fresh_all.reindex(bmax.index).dropna(how='all')

b_fresh = pd.concat([bmin, -bmax])
A_fresh = pd.concat([Amin, -Amax])

#python tip: by typing "=" after the variable name inside the curly braces, it formats the output so we don't have to write f"variable = {variable}"
print(f"{bmin.shape=}")
print(f"{Amin.shape=}")
print(f"{bmax.shape=}")
print(f"{Amax.shape=}")
print(f"{b_fresh.shape=}")
print(f"{A_fresh.shape=}")
print(f"{prices_fresh.shape=}")

bmin.shape=(26,)
Amin.shape=(26, 7)
bmax.shape=(2,)
Amax.shape=(2, 7)
b_fresh.shape=(28,)
A_fresh.shape=(28, 7)
prices_fresh.shape=(7, 1)


In [123]:
group = 'Female_19_30'
tol = 1e-6

result_fresh = lp(prices_fresh, -A_fresh, -b_fresh, method="highs")
result_fresh

       message: The problem is infeasible. (HiGHS Status 8: model_status is Infeasible; primal_status is None)
       success: False
        status: 2
           fun: None
             x: None
           nit: 0
         lower:  residual: None
                marginals: None
         upper:  residual: None
                marginals: None
         eqlin:  residual: None
                marginals: None
       ineqlin:  residual: None
                marginals: None

### We do not have enough recipes to solve the fresh diet problem. 

# Frozen Diet Solution

In [124]:
frozen_recipes = pd.read_csv("Wilbur Atwater min_cost_data - frozen_recipes.csv")
nutrition = pd.read_csv("Wilbur Atwater min_cost_data - nutrients.csv")
# from fndds diet problem: normalize weights to percentage terms. 
frozen_recipes['ingred_wt'] = frozen_recipes['ingred_wt']/frozen_recipes.groupby(['parent_foodcode'])['ingred_wt'].transform("sum")

# we're going to extend the recipes data frame to include the nutrient profiles of its ingredients (in 100g)
frozen_df = frozen_recipes.merge(nutrition, how="left", on="ingred_code")

# multiply all nutrients per 100g of an ingredient by the weight of that ingredient in a recipe.
numeric_cols = list(frozen_df.select_dtypes(include=["number"]).columns)
numeric_cols.remove("ingred_wt")
frozen_df[numeric_cols] = frozen_df[numeric_cols].mul(frozen_df["ingred_wt"], axis=0)

# sum nutrients of food codes (over the multiple ingredients)
# python tip: one can merge dictionaries dict1 dict2 using **, that is: dict_merge = {**dict1, **dict2}. 
#The ** effectively "unpacks" the key value pairs in each dictionary
frozen_df = frozen_df.groupby('parent_foodcode').agg({**{col: "sum" for col in numeric_cols},
                                        "parent_desc": "first"})

frozen_df.index.name = "recipe_id"

food_names_2 = frozen_df["parent_desc"]
print(food_names_2.head())
frozen_df.head()

recipe_id
2810.663675                                   Pie, yogurt, frozen
4845.977321     Chicken and vegetable entree with rice, diet f...
14417.205754    Chicken, fried, with potatoes, vegetable, dess...
21165.685045    Chicken, fried, with potatoes, vegetable, dess...
21884.276827    Chicken in butter sauce with potatoes and vege...
Name: parent_desc, dtype: object


Unnamed: 0_level_0,parent_foodcode,ingred_code,Capric acid,Lauric acid,Myristic acid,Palmitic acid,Palmitoleic acid,Stearic acid,Oleic acid,Linoleic Acid,...,"Vitamin B-12, added",Vitamin B6,Vitamin C,Vitamin D,Vitamin E,"Vitamin E, added",Vitamin K,Water,Zinc,parent_desc
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2810.663675,2810.663675,52.632292,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.526677,0.0,0.0,0.0,0.0,0.0,"Pie, yogurt, frozen"
4845.977321,4845.977321,0.352475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.4e-05,1.7e-05,"Chicken and vegetable entree with rice, diet f..."
14417.205754,14417.205754,2.361813,0.0,0.0,2.4e-05,0.004319,2.3e-05,0.003163,0.019814,0.0114,...,0.0,5e-06,0.000102,0.0,0.004611,0.0,0.047646,0.008464,0.0,"Chicken, fried, with potatoes, vegetable, dess..."
21165.685045,21165.685045,13.815966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003761,8e-06,"Chicken, fried, with potatoes, vegetable, dess..."
21884.276827,21884.276827,1.591757,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000156,7.8e-05,Chicken in butter sauce with potatoes and vege...


In [125]:
prices_frozen = prices_df[["parent_foodcode", "year", "price"]]

prices_frozen = prices_frozen.set_index(["year", "parent_foodcode"])

# we'll focus on the latest price data
prices_frozen = prices_frozen.xs("2017/2018", level="year")

# drop rows of prices where the price is "NA"
prices_frozen = prices_frozen.dropna(subset="price")
common_recipes = frozen_df.index.intersection(prices_frozen.index)

# python tip: given a list of indices, "loc" both subsets and sorts. 

frozen_df = frozen_df.loc[common_recipes]
prices_frozen = prices_frozen.loc[common_recipes]

# lets remap the price dataframe index to be the actual food names.
prices_frozen.index = prices_frozen.index.map(food_names_2)
A_frozen_all = frozen_df.T

print(f"We have prices for {prices_frozen.shape[0]} unique recipes (FNDDS food codes)")

We have prices for 164 unique recipes (FNDDS food codes)


In [126]:
# pick a demographic (column from rda dataframe)
'''
select from 
['Child_1_3', 'Female_4_8', 'Male_4_8', 'Female_9_13', 'Male_9_13', 
'Female_14_18', 'Male_14_18','Female_19_30', 'Male_19_30', 
'Female_31_50', 'Male_31_50', 'Female_51U', 'Male_51U']
'''
group = "Female_19_30"

# reindex ensures we only keep nutrients in bmin/bmax
Amin = A_frozen_all.reindex(bmin.index).dropna(how='all')
Amax = A_frozen_all.reindex(bmax.index).dropna(how='all')

b = pd.concat([bmin, -bmax])
A_frozen = pd.concat([Amin, -Amax])

#python tip: by typing "=" after the variable name inside the curly braces, it formats the output so we don't have to write f"variable = {variable}"
print(f"{bmin.shape=}")
print(f"{Amin.shape=}")
print(f"{bmax.shape=}")
print(f"{Amax.shape=}")
print(f"{b.shape=}")
print(f"{A_frozen.shape=}")
print(f"{prices_frozen.shape=}")

bmin.shape=(26,)
Amin.shape=(26, 164)
bmax.shape=(2,)
Amax.shape=(2, 164)
b.shape=(28,)
A_frozen.shape=(28, 164)
prices_frozen.shape=(164, 1)


In [127]:
group = 'Female_19_30'
tol = 1e-6

result_frozen = lp(prices_frozen, -A_frozen, -b, method="highs")
print(f"Cost of diet for {group} is ${result_frozen.fun:.2f} per day.")
diet = pd.Series(result_frozen.x,index=prices_frozen.index)

print("\nYou'll be eating (in 100s of grams or milliliters):")
print(round(diet[diet >= tol], 2))

Cost of diet for Female_19_30 is $5.80 per day.

You'll be eating (in 100s of grams or milliliters):
Frozen yogurt, chocolate                             8.06
Potato tots, frozen, baked                           0.02
Sweet potato fries, frozen, baked                    1.36
Tea, iced, brewed, black, unsweetened               11.40
Fruit juice drink, with high vitamin C               4.27
Nutritional powder mix, high protein (Herbalife)     1.14
Nutritional powder mix, protein, soy based, NFS      0.19
dtype: float64


In [128]:
tab = pd.DataFrame({"Outcome":A_frozen.to_numpy()@diet.to_numpy(),"Recommendation":np.abs(b)})
print("\nWith the following nutritional outcomes of interest:")
print(tab)


With the following nutritional outcomes of interest:
                    Outcome  Recommendation
Nutrient                                   
Energy               2000.0          2000.0
Protein           99.062407            46.0
Carbohydrate     303.252954           130.0
Dietary Fiber          28.0            28.0
Linoleic Acid          12.0            12.0
Linolenic Acid       1.6924             1.1
Calcium         1733.767522          1000.0
Iron              33.880887            18.0
Magnesium        872.455421           310.0
Phosphorus      2703.583733           700.0
Potassium            4700.0          4700.0
Zinc              25.718525             8.0
Copper             4.181898             0.9
Selenium          115.88982            55.0
Vitamin A       3052.933563           700.0
Vitamin E         22.128296            15.0
Vitamin D              15.0            15.0
Vitamin C        202.216979            75.0
Thiamin            2.951564             1.1
Riboflavin         4.2

In [129]:
print("\nConstraining nutrients are:")
excess = tab.diff(axis=1).iloc[:,1]
print(excess.loc[np.abs(excess) < tol].index.tolist())


Constraining nutrients are:
['Energy', 'Dietary Fiber', 'Linoleic Acid', 'Potassium', 'Vitamin D', 'Choline']


# Carnivorous Diet Solution

In [130]:
carn_recipes = pd.read_csv("Wilbur Atwater min_cost_data - carnivore_recipes.csv")
nutrition = pd.read_csv("Wilbur Atwater min_cost_data - nutrients.csv")
# from fndds diet problem: normalize weights to percentage terms. 
carn_recipes['ingred_wt'] = carn_recipes['ingred_wt']/carn_recipes.groupby(['parent_foodcode'])['ingred_wt'].transform("sum")

# we're going to extend the recipes data frame to include the nutrient profiles of its ingredients (in 100g)
carn_df = carn_recipes.merge(nutrition, how="left", on="ingred_code")

# multiply all nutrients per 100g of an ingredient by the weight of that ingredient in a recipe.
numeric_cols = list(carn_df.select_dtypes(include=["number"]).columns)
numeric_cols.remove("ingred_wt")
carn_df[numeric_cols] = carn_df[numeric_cols].mul(carn_df["ingred_wt"], axis=0)

# sum nutrients of food codes (over the multiple ingredients)
# python tip: one can merge dictionaries dict1 dict2 using **, that is: dict_merge = {**dict1, **dict2}. 
#The ** effectively "unpacks" the key value pairs in each dictionary
carn_df = carn_df.groupby('parent_foodcode').agg({**{col: "sum" for col in numeric_cols},
                                        "parent_desc": "first"})

carn_df.index.name = "recipe_id"

food_names_2 = carn_df["parent_desc"]
print(food_names_2.head())
carn_df.head()

recipe_id
3462.515152                   Beef burgundy
4624.304849                     Paella, NFS
4851.119509                     Oyster stew
5061.267196    Jambalaya with meat and rice
5116.786264             Paella with seafood
Name: parent_desc, dtype: object


Unnamed: 0_level_0,parent_foodcode,ingred_code,Capric acid,Lauric acid,Myristic acid,Palmitic acid,Palmitoleic acid,Stearic acid,Oleic acid,Linoleic Acid,...,"Vitamin B-12, added",Vitamin B6,Vitamin C,Vitamin D,Vitamin E,"Vitamin E, added",Vitamin K,Water,Zinc,parent_desc
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3462.515152,3462.515152,0.258368,0.0,0.0,0.0,3.8e-05,0.0,3e-05,0.00012,0.00015,...,0.0,0.000152,0.006565,0.0,0.000216,0.0,0.079401,0.000976,0.00046,Beef burgundy
4624.304849,4624.304849,0.343779,2.3e-05,2.1e-05,0.000206,0.000217,1.4e-05,6.1e-05,0.000188,0.000168,...,0.0,0.000223,0.002028,0.0,0.000223,0.0,0.136126,0.001574,0.000531,"Paella, NFS"
4851.119509,4851.119509,0.347299,6e-06,1.6e-05,5e-06,9.1e-05,1.3e-05,5.6e-05,0.000111,0.000119,...,0.0,5e-05,0.0,0.0,0.000178,0.0,0.028006,0.002132,0.000204,Oyster stew
5061.267196,5061.267196,0.377702,2.2e-05,4.3e-05,2.8e-05,0.000318,0.0,5.2e-05,8.7e-05,9.2e-05,...,0.0,0.000102,0.009248,0.0,0.001384,0.0,0.317126,0.001441,0.001143,Jambalaya with meat and rice
5116.786264,5116.786264,0.380531,2.5e-05,2.3e-05,0.000228,0.00024,1.5e-05,6.7e-05,0.000208,0.000186,...,0.0,0.000247,0.002245,0.0,0.000247,0.0,0.150678,0.001742,0.000587,Paella with seafood


In [131]:
prices_carn = prices_df[["parent_foodcode", "year", "price"]]

prices_carn = prices_carn.set_index(["year", "parent_foodcode"])

# we'll focus on the latest price data
prices_carn = prices_carn.xs("2017/2018", level="year")

# drop rows of prices where the price is "NA"
prices_carn = prices_carn.dropna(subset="price")
common_recipes = carn_df.index.intersection(prices_carn.index)

# python tip: given a list of indices, "loc" both subsets and sorts. 

carn_df = carn_df.loc[common_recipes]
prices_carn = prices_carn.loc[common_recipes]

# lets remap the price dataframe index to be the actual food names.
prices_carn.index = prices_carn.index.map(food_names_2)
A_carn_all = carn_df.T

print(f"We have prices for {prices_carn.shape[0]} unique recipes (FNDDS food codes)")

We have prices for 225 unique recipes (FNDDS food codes)


In [132]:
group = "Female_19_30"



# reindex ensures we only keep nutrients in bmin/bmax
Amin = A_carn_all.reindex(bmin.index).dropna(how='all')
Amax = A_carn_all.reindex(bmax.index).dropna(how='all')

b = pd.concat([bmin, -bmax])
A_carn = pd.concat([Amin, -Amax])

#python tip: by typing "=" after the variable name inside the curly braces, it formats the output so we don't have to write f"variable = {variable}"
print(f"{bmin.shape=}")
print(f"{Amin.shape=}")
print(f"{bmax.shape=}")
print(f"{Amax.shape=}")
print(f"{b.shape=}")
print(f"{A_carn.shape=}")
print(f"{prices_carn.shape=}")
group = 'Female_19_30'
tol = 1e-6

result = lp(prices_carn, -A_carn, -b, method="highs")
result

bmin.shape=(26,)
Amin.shape=(26, 225)
bmax.shape=(2,)
Amax.shape=(2, 225)
b.shape=(28,)
A_carn.shape=(28, 225)
prices_carn.shape=(225, 1)


       message: The problem is infeasible. (HiGHS Status 8: model_status is Infeasible; primal_status is None)
       success: False
        status: 2
           fun: None
             x: None
           nit: 19
         lower:  residual: None
                marginals: None
         upper:  residual: None
                marginals: None
         eqlin:  residual: None
                marginals: None
       ineqlin:  residual: None
                marginals: None

#### No solution for carnivore diet