In [2]:
import pandas as pd
import cvxpy as cp

In [3]:
# CVXPY version: https://kevintcarlberg.net/files/opt_class_icme/7_convexity.pdf

In [4]:
pd.options.display.width = 180

In [67]:
df = pd.read_csv('../data/clean_flyer_data3.csv')

In [104]:
# Various sets of columns
cols = ['name', 'cost_per_serving', 'total_fat', 'protein', 'total_carbs', 'sodium', 'sugars', 'fiber']
vcols = cols + ['vegan']
gfcols = cols + ['gluten_free']
kcols = cols + ['kosher']
macro_nutrient_cols = ['total_fat', 'protein', 'total_carbs']

In [171]:
tdf[tdf.calories == 130]

Unnamed: 0,name,cost_per_serving,total_fat,protein,total_carbs,sodium,sugars,fiber,calories
226,Organic Grass-Fed Uncured Beef Hot Dogs,1.198,90.0,36.0,4.0,310.0,0.0,0.0,130.0
353,Cinnamon Rugelach,0.399,54.0,4.0,72.0,210.0,12.0,0.0,130.0


In [105]:
# List for any foods we want to omit
# exclude_list = ['Organic High Protein Tofu']
exclude_list = []

In [106]:
# subset the data by that which isn't empy
tdf = df.loc[~df[cols].isnull().any(axis=1), cols].copy()
vdf = df.loc[(~df[vcols].isnull().any(axis=1)) & (df.vegan==1), vcols].copy()
gfdf = df.loc[(~df[vcols].isnull().any(axis=1)) & (df.gluten_free==1), gfcols].copy()
kdf = df.loc[(~df[vcols].isnull().any(axis=1)) & (df.kosher==1), kcols].copy()

In [107]:
# Convert grams to calories
tdf['protein'] = tdf.protein * 4
tdf['total_carbs'] = tdf.total_carbs * 4
tdf['total_fat'] = tdf.total_fat * 9

vdf['protein'] = vdf.protein * 4
vdf['total_carbs'] = vdf.total_carbs * 4
vdf['total_fat'] = vdf.total_fat * 9

In [108]:
# Compute the calories from the macronutrients
tdf['calories'] = tdf[macro_nutrient_cols].sum(axis=1)
vdf['calories'] = vdf[macro_nutrient_cols].sum(axis=1)
gfdf['calories'] = gfdf[macro_nutrient_cols].sum(axis=1)
kdf['calories'] = kdf[macro_nutrient_cols].sum(axis=1)

In [109]:
# tdf = tdf[tdf.interest == 1]
# tdf = tdf[[c for c in cols if c != 'calories'] + ['computed_calories']]
tdf = tdf[~tdf.name.isin(exclude_list)]

In [88]:
from tabulate import tabulate

In [110]:
print(tabulate(tdf.head(), tablefmt="pipe", headers="keys"))

|    | name                           |   cost_per_serving |   total_fat |   protein |   total_carbs |   sodium |   sugars |   fiber |   calories |
|---:|:-------------------------------|-------------------:|------------:|----------:|--------------:|---------:|---------:|--------:|-----------:|
|  0 | For the Love of Chocolate Cake |            1.33    |       180   |        16 |           176 |      200 |       31 |       3 |      372   |
|  1 | Pancake Bread                  |            0.49875 |        90   |         8 |           100 |      230 |       15 |       0 |      198   |
|  2 | Pizza Crusts                   |            0.43625 |        22.5 |        20 |           112 |      330 |        1 |       1 |      154.5 |
|  3 | Sprouted Wheat Sourdough       |            0.1995  |         0   |        28 |            56 |      170 |        1 |       2 |       84   |
|  5 | Cold Pressed Green Juice       |            3.99    |         0   |        20 |            80 |      320 

## The Problem

We want to minimize some aspect of our diet (how much we spend, how much salt we intake) subject to some other constraints (like nutritional composition of the food, a budget, etc). We have
 - n: number of products
 - m: number of nutrition dimensions
 - c: a vector of costs for each food
 - A: an n x m matrix of products
 
We're looking for x, which is a vector of length n with components representing the amount of each product we'll include that both satisfies the constraints and minimizes the overall cost


In [90]:
def create_constraints(A, x, diet):
    """
    Takes a list of diet parameters and 
    returns a list of cvxpy constraint 
    expressions
    """
    min_cals = diet[0]
    max_cals = diet[1]
    fat = diet[2]
    protein = diet[3]
    carbs = diet[4]
    max_servings = diet[5]
    
    return [
        A[:, 0]*x >= min_cals,
        A[:, 0]*x <= max_cals,
        A[:, 1]*x >= min_cals * fat,
        A[:, 1]*x <= max_cals * fat,
        A[:, 2]*x >= min_cals * protein,
        A[:, 2]*x <= max_cals * protein,
        A[:, 3]*x >= min_cals * carbs,
        A[:, 3]*x <= max_cals * carbs,
        x >= 0,
        x <= max_servings
    ]

In [91]:
def create_variables(df, cols, objective_col):
    """
    Takes a dataframe with response information and
    returns a tuple of variables and a problem
    """
    n = df.shape[0]
    c = df[objective_col].values
    A = df[cols].values
    
    return n, c, A

In [178]:
def create_problem(n, c, A, diet):
    """
    Takes a set of variables and a diet and
    returns a vector and a problem
    """
    x = cp.Variable(n, integer=True)
    obj = cp.Minimize(c.T*x)
    constr = create_constraints(A, x, diet)
    problem = cp.Problem(obj, constr)
    
    return x, problem

In [184]:
def create_summary(df, cols):
    """
    Takes a dataframe with serving information
    and returns a dataframe of servings and costs
    and total cost
    """
    for col in cols:
        df.loc[:, col] = df[col] * df.optimal_servings
    df['cost'] = df.cost_per_serving * df.optimal_servings
    total_cost = df.cost.sum()
    return df, total_cost

### Minimizing Cost in Line with a Diet

In [189]:
diets = {
    'Balanced': [2200, 2800, .3, .3, .4, 4],
    'Keto': [2200, 2800, .7, .24, .01, 4], # high fat and protein
    'Atkins': [2200, 3200, .45, .45, .1, 4] # low carb
}

In [190]:
def optimize_diets(df, cols, diet, diet_config):
    n, c, A = create_variables(df, cols[1:], 'cost_per_serving')

    print(diet)
    print()
    # Create the problem
    x, problem = create_problem(n, c, A, diet_config)
    # Solve the problem
    opt_value = problem.solve(verbose=True, solver='GLPK_MI')

    print("Problem status: {}".format(problem.status))
    print()

    # Show the summary
    df['optimal_servings'] = x.value
    # Add info about the dual solution here
    # df['dual'] = problem.constraints[0].dual_value
    servings, total_cost = create_summary(df[df.optimal_servings > 0].copy(), cols[1:])

    print(tabulate(servings.sort_values('optimal_servings', ascending=False), tablefmt="pipe", headers="keys"))
    print()
    print(servings[cols[1:]].sum())
    print()
    print("% of calories")
    print(servings[cols].sum() / servings.calories.sum() * 100)
    print()
    print("Total cost: ${}".format(round(total_cost, 2)))
    print()
    print()

In [191]:
diet_cols = ['cost_per_serving', 'calories', 'total_fat', 'protein', 'total_carbs']

In [192]:
for diet, diet_params in diets.items():
    optimize_diets(tdf.copy(), diet_cols, diet, diet_params)

Balanced

Problem status: optimal

|     | name                                              |   cost_per_serving |   total_fat |   protein |   total_carbs |   sodium |   sugars |   fiber |   calories |   optimal_servings |     cost |
|----:|:--------------------------------------------------|-------------------:|------------:|----------:|--------------:|---------:|---------:|--------:|-----------:|-------------------:|---------:|
|   3 | Sprouted Wheat Sourdough                          |           0.1995   |         0   |       112 |           224 |      170 |        1 |       2 |      336   |                  4 | 0.798    |
|  41 | Organic High Protein Tofu                         |           0.498    |       252   |       224 |            48 |       15 |        0 |       0 |      524   |                  4 | 1.992    |
| 370 | Pumpkin Pancake Mixes, Gluten-Full or Gluten-Free |           0.165833 |         0   |        64 |           576 |      380 |       10 |       1 |      640  

### Minimize Sodium with Max Cost of $N / day

In [143]:
def create_sodium_constraints(A, x, diet):
    """
    Takes a list of diet parameters and 
    returns a list of cvxpy constraint 
    expressions
    """
    min_cals = diet[0]
    max_cost = diet[1]
    fat = diet[2]
    protein = diet[3]
    carbs = diet[4]
    sugars = diet[5]
    max_servings = diet[6]
    
    return [
        A[:, 0]*x >= min_cals,
        A[:, 1]*x <= max_cost,
        A[:, 2]*x >= min_cals * fat,
        A[:, 3]*x >= min_cals * protein,
        A[:, 4]*x >= min_cals * carbs,
        A[:, 5]*x <= sugars,        
        x >= 0,
        x <= max_servings
    ]

In [144]:
def optimize_sodium(df, cols, constr_config):
    n, c, A = create_variables(df, cols[1:], 'sodium')
    
    print('Minimize sodium with Cost Constraint')
    print()
    print(constr_config)
    
    # Variable, constraints, and problem
    x = cp.Variable(n, integer=True)
    obj = cp.Minimize(c.T*x)
    constr = create_sodium_constraints(A, x, constr_config)
    problem = cp.Problem(obj, constr)

    # Solve the problem and show the output
    opt_value = problem.solve(verbose=True)
    df['servings'] = x.value
#     df['dual'] = problem.constraints[0].dual_value
    servings, total_cost = create_summary(df[df.servings > 0].copy(), cols)
    
#     for constr in problem.constraints:
#         print(constr.dual_value)
    
    print(servings)
    print()
    print(servings[cols[1:]].sum())
    print()
    print("% of calories")
    print(servings[cols].sum() / servings.computed_calories.sum() * 100)
    print()
    print("Total cost: ${}".format(round(total_cost, 2)))
    print()

In [145]:
min_sodium_cols = ['sodium', 'computed_calories', 'cost_per_serving', 'total_fat', 'protein', 'total_carbs', 'sugars']

In [146]:
sodium_constr_config = [2400, 8, .3, .3, .4, 200, 7]

In [147]:
configs = [sodium_constr_config + [c] for c in range(1, 10)]

In [65]:
optimize_sodium(tdf.copy(), min_sodium_cols, sodium_constr_config)

Minimize sodium with Cost Constraint

[2400, 8, 0.3, 0.3, 0.4, 200, 7]
                                 name  cost_per_serving  total_fat  protein  total_carbs  sodium  sugars  fiber  computed_calories  servings       cost
30           Sprouted Wheat Sourdough          0.798000        0.0    112.0        224.0   170.0     4.0    2.0              336.0       4.0   3.192000
68          Organic High Protein Tofu          3.486000      441.0    392.0         84.0    15.0     0.0    0.0              917.0       7.0  24.402000
81   Organic Blue Corn Tortilla Chips          0.996667      324.0     48.0        272.0    40.0     0.0    2.0              644.0       4.0   3.986667
128                      Banana Chips          0.931250      405.0     20.0        380.0     0.0    15.0    2.0              805.0       5.0   4.656250
528                 Cornish Game Hens          1.773333      288.0    152.0          0.0    70.0     0.0    0.0              440.0       2.0   3.546667

computed_calorie

In [82]:
# for config in configs:
#     optimize_sodium(tdf.copy(), min_sodium_cols, config)