In [3]:
%run imports.py

# Preprocessing

In [4]:
# Read in the food nutrients and recommended daily allowance DataFrames.
nutrients_df = pd.read_msgpack('nutrients.msg')
rda_df = pd.read_msgpack('rda.msg')

In [5]:
# Convert all measurements to grams.
def to_grams(df):
    def col_to_grams(col):
        si = re.findall(r'\(([^A]?)(?:g|J|L|cal|IU)', col.name)
        if len(si) != 1:
            raise ValueError(f'Bad column format: {si} ({col.name})')
        si = si[0]
        if not si:
            return col
        elif si == 'k':
            return 1e3 * col
        elif si == 'm':
            return 1e-3 * col
        elif si in ['µ', 'μ']:  # These two mus are different chars.
            return 1e-6 * col
        else:
            raise ValueError(f'Unknown unit: {si} ({col.name})')
    for col in df.select_dtypes(exclude=['object']).columns:
        df[col] = col_to_grams(df[col])
    return df

nutrients_df = to_grams(nutrients_df)
rda_df = to_grams(rda_df)

In [6]:
# Remove RDA columns that are not measured in the food nutrients DataFrame.
verified_missing = [
    # Elements
    'Chromium(μg/d)',
    'Iodine(μg/d)',
    'Molybdenum(μg/d)',
    'Chloride(g/d)',
    # Vitamins
    'Biotin(μg/d)',
    # Macronutrients
    'Total Watera(L/d)'
]
rda_df = rda_df.drop(verified_missing, axis=1)

In [7]:
# Rename RDA columns to match the food nutrients column names.
exceptions = {
    # Elements
    # Vitamins
    'Vitamin A(μg/d)a': 'Vitamin A, RAE(µg)',
    'Vitamin D(μg/d)b,c': 'Vitamin D3 (cholecalciferol)(µg)',
    'Vitamin E(mg/d)d': 'Vitamin E (alpha-tocopherol)(mg)',
    'Vitamin B6(mg/d)': 'Vitamin B-6(mg)',
    'Folate(μg/d)f': 'Folate, DFE(µg)',
    'Vitamin B12(μg/d)': 'Vitamin B-12(µg)',
    # Macronutrients
    'Total Fiber(g/d)': 'Fiber, total dietary(g)',
    'Fat(g/d)': 'Total lipid (fat)(g)',
    'Linoleic Acid(g/d)': '18:2 n-6 c,c(g)',  # Part of 'Fatty acids, total polyunsaturated(g)'
    'α-Linolenic Acid(g/d)': '18:3 n-3 c,c,c (ALA)(g)',  # Part of 'Fatty acids, total polyunsaturated(g)'
    'Proteinb(g/d)': 'Protein(g)'
}
columns = {**exceptions}
for col in rda_df.columns:
    if col in exceptions:
        continue
    query = re.findall(r'^([a-z\s]+)[^a-z]', col.lower())[0]
    results = [c for c in nutrients_df.columns if query in c.lower()]
    if len(results) != 1:
        raise ValueError(f'Found {results} for "{query}" in column {col}')
    columns[col] = results[0]
rda_df = rda_df.rename(columns=columns)
assert all([col in nutrients_df.columns for col in rda_df.columns])

# MILP model that optimizes health, time, cost

Possible losses:
- Time (Preparation): Waste
- Time (Shopping): Number of items
- Time (Eating): Meal weight
- Health: Recommended Dietary Allowances (Adequate Intake)
- Money: Cost

In [288]:
# Extract the RDA for the user.
me = 'Males: 19–30 y'
my_rda = rda_df.loc[me].dropna()
# Drop Fluoride because it is very difficult to find in foods.
my_rda.drop(['Fluoride, F(µg)'], axis=0, inplace=True)
# Keep only the nutrients we will be evaluating on and
# normalize the nutrients to (% of RDA) / (household measure).
pct_rda_df = nutrients_df.fillna(0.)[['Description', 'Weight(g)'] + my_rda.index.tolist()].copy()
pct_rda_df.loc[:, my_rda.index] = pct_rda_df.loc[:, my_rda.index].divide(my_rda, axis=1)
pct_rda_df.sample(5)

Unnamed: 0_level_0,Description,Weight(g),"Calcium, Ca(mg)","Copper, Cu(mg)","Iron, Fe(mg)","Magnesium, Mg(mg)","Manganese, Mn(mg)","Phosphorus, P(mg)","Selenium, Se(µg)","Zinc, Zn(mg)","Potassium, K(mg)","Sodium, Na(mg)","Vitamin A, RAE(µg)","Vitamin C, total ascorbic acid(mg)",Vitamin D3 (cholecalciferol)(µg),Vitamin E (alpha-tocopherol)(mg),Vitamin K (phylloquinone)(µg),Thiamin(mg),Riboflavin(mg),Niacin(mg),Vitamin B-6(mg),"Folate, DFE(µg)",Vitamin B-12(µg),Pantothenic acid(mg),"Choline, total(mg)","Carbohydrate, by difference(g)","Fiber, total dietary(g)","18:2 n-6 c,c(g)","18:3 n-3 c,c,c (ALA)(g)",Protein(g)
NDB_NO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
11302,"Peas, edible-podded, frozen, unprepared",144.0,0.072,0.121111,0.36,0.0825,0.146957,0.104286,0.018182,0.053636,0.058723,0.004,0.011111,0.352222,0.0,0.0,0.0,0.071667,0.110769,0.045,0.170769,0.145,0.0,0.2074,0.0,0.079769,0.118421,0.0,0.0,0.071964
18001,"Bagels, plain, enriched, with calcium propiona...",99.0,0.217,0.141111,0.44125,0.0725,0.232174,0.14,0.387273,0.074545,0.022553,0.278667,0.0,0.0,0.0,0.492667,0.001667,0.468333,0.262308,0.279375,0.053077,0.4025,0.0,0.0806,0.026364,0.398923,0.042105,0.029235,0.015,0.186607
11284,"Onions, dehydrated flakes",5.0,0.013,0.023333,0.01,0.0125,0.03,0.021429,0.003636,0.008182,0.017234,0.000667,0.0,0.042222,0.0,0.000667,0.001667,0.020833,0.003846,0.003125,0.061538,0.02,0.0,0.0138,0.004909,0.032,0.013158,0.0,0.0,0.008036
15190,"Fish, burbot, cooked, dry heat",90.0,0.058,0.255556,0.12875,0.0925,0.35087,0.328571,0.265455,0.079091,0.099149,0.074667,0.004444,0.0,0.0,0.0,0.0,0.321667,0.119231,0.111,0.239231,0.0025,0.034583,0.0312,0.0,0.0,0.0,0.0,0.0,0.397857
5002,"Chicken, broilers or fryers, meat and skin and...",85.0,0.018,0.084444,0.19,0.045,0.023478,0.191429,0.314545,0.147273,0.034468,0.160667,0.171111,0.003333,0.0,0.0,0.0,0.08,0.162308,0.376312,0.209231,0.08,0.029583,0.1766,0.0,0.059077,0.0,0.0,0.0,0.346607


In [289]:
# Drop exotic or difficult to find foods.
drop = [
    4001,   # Fat, beef tallow
    10115,  # Pork pancreas
    #14016,  # Amandel melk
    14054,  # Amandel melk (chocolate)
    14019,  # Tequila sunrise
    9059,   # Breadfruit
    11218,  # Calabash
    19873,  # Frozen novelties (orange)
    14160,  # White wine duplicate
    7014,   # Liver sausage
    11237,  # Kanpyo
    16509,  # LOMA LINDA Linketts
    1096,   # Milk canned with vit D
    11163,  # Collards greens
    19874,  # Frozen novelties
    1127,   # Egg, yolk, raw, frozen, sugared, pasteurized
    18992,  # KELLOGG'S, BEANATURAL, Original 3-Bean Chips
]
pct_rda_df.drop(drop, axis=0, inplace=True)

In [290]:
# Variables.
purchases = pulp.LpVariable.dicts(
    'purchases',
    indexs=range(len(pct_rda_df)),
    lowBound=0,
    upBound=1,
    cat='Binary')
portions = pulp.LpVariable.dicts(
    'portions',
    indexs=range(len(pct_rda_df)),
    lowBound=0,
    upBound=50,
    cat='Integer')
rda_errors = pulp.LpVariable.dicts(
    'rda_errors',
    indexs=range(len(my_rda)),
    lowBound=0,
    upBound=2,
    cat='Continuous')
max_rda_error = pulp.LpVariable(
    'max_rda_error',
    lowBound=0,
    upBound=2,
    cat='Continuous')
total_weight = pulp.LpVariable(
    'total_weight',
    lowBound=0,
    upBound=2000,
    cat='Continuous')

# Cost function.
rda_vs_weight = 0.9
max_foods = 10
max_weight_per_food = 200
prob = pulp.LpProblem('food', pulp.LpMinimize)
prob += rda_vs_weight * max_rda_error + (1 - rda_vs_weight) * total_weight / 1e3
#prob += rda_vs_weight * pulp.lpSum(rda_errors) / len(rda_errors) + (1 - rda_vs_weight) * total_weight / 1e3

# Constraints:
# - Total weight.
prob += total_weight >= pulp.lpSum(pct_rda_df.iat[portion, 1] * portions[portion] for portion in portions)
# - Max RDA error.
for rda_error in rda_errors:
    prob += max_rda_error >= rda_errors[rda_error]
# - No more than max_weight per food.
for portion in portions:
    prob += pct_rda_df.iat[portion, 1] * portions[portion] <= max_weight_per_food
# - No more than max_foods.
prob += pulp.lpSum(purchases[purchase] for purchase in purchases) <= max_foods
# - Purchase is binary.
for portion, purchase in zip(portions, purchases):
    prob += portions[portion] <= 100 * purchases[purchase]
# - RDA error is absolute error per nutrient.
for rda_error in rda_errors:
    raw_error = pulp.lpSum(pct_rda_df.iat[portion, rda_error + 2] * portions[portion] for portion in portions) - 1.
    prob += -rda_errors[rda_error] <= raw_error
    prob += raw_error <= rda_errors[rda_error]
prob.writeLP('food.lp')

In [291]:
solver = pulp.solvers.COIN_CMD(
    maxSeconds=30,
    options=['feasibilityPump off', 'probing off', 'knapsack off', 'clique off', 'flowcover off'])
prob.solve(solver=solver)

print(f'Med RDA error is {np.median([rda_errors[i].value() for i in rda_errors])}')
print(f'Avg RDA error is {np.mean([rda_errors[i].value() for i in rda_errors])}')
print(f'Max RDA error is {max_rda_error.value()}')
print(f'Total weight is {total_weight.value()}g')

x = np.array([portions[p].value() for p in portions])
pct_rda_df.iloc[:, :2].loc[x > 0]
pct_rda_df.iloc[:, 1:].loc[x > 0].multiply(x[x > 0], axis=0)
pct_rda_df.iloc[:, 1:].loc[x > 0].multiply(x[x > 0], axis=0).sum(axis=0)

0

Med RDA error is 0.18291666499999998
Avg RDA error is 0.2044281917142857
Max RDA error is 0.326
Total weight is 716.3g


Unnamed: 0_level_0,Description,Weight(g)
NDB_NO,Unnamed: 1_level_1,Unnamed: 2_level_1
1097,"Milk, canned, evaporated, nonfat, with added v...",31.9
1130,"Egg, whole, cooked, omelet",15.0
2015,"Spices, curry powder",2.0
4026,"Salad dressing, mayonnaise, soybean and safflo...",13.8
4585,"Margarine-like, margarine-butter blend, soybea...",14.1
9216,"Orange peel, raw",6.0
11192,"Cowpeas (blackeyes), immature seeds, cooked, b...",165.0
15014,"Fish, cisco, smoked",28.35
15039,"Fish, herring, Atlantic, raw",28.35
19423,"Snacks, potato chips, fat-free, made with olestra",28.35


Unnamed: 0_level_0,Weight(g),"Calcium, Ca(mg)","Copper, Cu(mg)","Iron, Fe(mg)","Magnesium, Mg(mg)","Manganese, Mn(mg)","Phosphorus, P(mg)","Selenium, Se(µg)","Zinc, Zn(mg)","Potassium, K(mg)","Sodium, Na(mg)","Vitamin A, RAE(µg)","Vitamin C, total ascorbic acid(mg)",Vitamin D3 (cholecalciferol)(µg),Vitamin E (alpha-tocopherol)(mg),Vitamin K (phylloquinone)(µg),Thiamin(mg),Riboflavin(mg),Niacin(mg),Vitamin B-6(mg),"Folate, DFE(µg)",Vitamin B-12(µg),Pantothenic acid(mg),"Choline, total(mg)","Carbohydrate, by difference(g)","Fiber, total dietary(g)","18:2 n-6 c,c(g)","18:3 n-3 c,c,c (ALA)(g)",Protein(g)
NDB_NO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
1097,63.8,0.186,0.011111,0.0225,0.045,0.001739,0.177143,0.029091,0.052727,0.045106,0.049333,0.084444,0.008889,0.08,0.0,0.0,0.023333,0.152308,0.007,0.027692,0.015,0.006667,0.094,0.029091,0.055692,0.0,0.0,0.0,0.086071
1130,60.0,0.028,0.04,0.11,0.02,0.006957,0.142857,0.283636,0.058182,0.015319,0.061333,0.115556,0.0,0.08,0.050667,0.023333,0.016667,0.178462,0.0025,0.064615,0.06,0.018333,0.1544,0.269818,0.003077,0.0,0.0,0.0,0.113571
2015,6.0,0.03,0.08,0.1425,0.0375,0.216522,0.03,0.043636,0.024545,0.014681,0.002,0.0,0.0,0.0,0.1,0.05,0.01,0.009231,0.012188,0.004615,0.0075,0.0,0.0126,0.007091,0.025846,0.086842,0.0,0.009375,0.015536
4026,27.6,0.004,0.0,0.0175,0.0,0.0,0.011429,0.007273,0.003636,0.002128,0.104,0.026667,0.0,0.0,0.405333,0.056667,0.0,0.0,0.000125,0.123077,0.005,0.003333,0.0164,0.0,0.005692,0.0,0.0,0.0,0.005357
4585,56.4,0.004,0.004444,0.005,0.0,0.001739,0.005714,0.0,0.0,0.002553,0.269333,0.511111,0.0,0.0,0.146667,0.406667,0.003333,0.009231,0.00075,0.003077,0.0,0.0,0.0048,0.006545,0.003385,0.0,0.692,0.93,0.002857
9216,54.0,0.09,0.06,0.05625,0.0225,0.0,0.012857,0.016364,0.008182,0.024894,0.0,0.01,0.82,0.0,0.006,0.0,0.0525,0.034615,0.030375,0.076154,0.045,0.0,0.0522,0.0,0.103846,0.142105,0.0,0.0,0.014464
11192,165.0,0.211,0.243333,0.23125,0.215,0.410435,0.12,0.074545,0.154545,0.146809,0.004667,0.073333,0.04,0.0,0.024,0.365833,0.139167,0.187692,0.144687,0.082308,0.525,0.0,0.0508,0.098727,0.257923,0.215789,0.0,0.0,0.093393
15014,28.35,0.007,0.067778,0.0175,0.0125,0.002609,0.061429,0.092727,0.008182,0.01766,0.090667,0.088889,0.0,0.253333,0.004,0.0,0.010833,0.034615,0.040938,0.058462,0.0025,0.050417,0.0174,0.048909,0.0,0.0,0.0,0.0,0.082857
15039,113.4,0.064,0.115556,0.155,0.09,0.017391,0.382857,0.749091,0.101818,0.079149,0.069333,0.035556,0.008889,0.32,0.08,0.0,0.086667,0.203077,0.228,0.264615,0.03,0.646667,0.1464,0.133818,0.0,0.0,0.0,0.0,0.363571
19423,141.75,0.05,0.694444,0.425,0.25,0.473913,0.35,0.027273,0.45,0.35,0.523333,0.0,0.444444,0.0,0.0,0.0,0.3875,0.053846,0.387188,0.173077,0.3,0.0,0.278,0.1,0.708846,0.25,0.0,0.0,0.195536


Weight(g)                             716.300000
Calcium, Ca(mg)                         0.674000
Copper, Cu(mg)                          1.316667
Iron, Fe(mg)                            1.182500
Magnesium, Mg(mg)                       0.692500
Manganese, Mn(mg)                       1.131304
Phosphorus, P(mg)                       1.294286
Selenium, Se(µg)                        1.323636
Zinc, Zn(mg)                            0.861818
Potassium, K(mg)                        0.698298
Sodium, Na(mg)                          1.174000
Vitamin A, RAE(µg)                      0.945556
Vitamin C, total ascorbic acid(mg)      1.322222
Vitamin D3 (cholecalciferol)(µg)        0.733333
Vitamin E (alpha-tocopherol)(mg)        0.816667
Vitamin K (phylloquinone)(µg)           0.902500
Thiamin(mg)                             0.730000
Riboflavin(mg)                          0.863077
Niacin(mg)                              0.853750
Vitamin B-6(mg)                         0.877692
Folate, DFE(µg)     