In [127]:
import pandas as pd
import cvxpy as cp

In [128]:
# CVXPY version: https://kevintcarlberg.net/files/opt_class_icme/7_convexity.pdf

In [129]:
pd.options.display.width = 180

In [130]:
df = pd.read_csv('./data/clean_fearless_flyer_with_interest.csv')

In [198]:
cols = ['id', 'name', 'cost_per_serving', 'calories', 'total_fat', 'protein', 'total_carbs', 'sodium', 'sugars']
macro_nutrient_cols = ['total_fat', 'protein', 'total_carbs']

In [199]:
tdf = df.loc[~df[cols].isnull().any(axis=1), cols].copy()

In [200]:
# tdf = tdf[tdf.interest==1]

In [201]:
tdf['protein'] = tdf.protein * 4
tdf['total_carbs'] = tdf.total_carbs * 4
tdf['total_fat'] = tdf.total_fat * 9
# tdf['added_sugars'] = tdf.added_sugars * 4

In [202]:
tdf['computed_calories'] = tdf[macro_nutrient_cols].sum(axis=1)

In [203]:
tdf.head()

Unnamed: 0,id,name,cost_per_serving,calories,total_fat,protein,total_carbs,sodium,sugars,computed_calories
27,4652,For the Love of Chocolate Cake,1.33,350.0,180.0,16.0,176.0,200.0,31.0,372.0
28,4653,Pancake Bread,0.49875,190.0,90.0,8.0,100.0,230.0,15.0,198.0
29,4654,Pizza Crusts,0.43625,150.0,22.5,20.0,112.0,330.0,1.0,154.5
30,4655,Sprouted Wheat Sourdough,0.1995,90.0,0.0,28.0,56.0,170.0,1.0,84.0
32,4657,Cold Pressed Green Juice,3.99,100.0,0.0,20.0,80.0,320.0,11.0,100.0


## The Problem

We want to minimize some aspect of our diet (how much we spend, how much salt we intake) subject to some other constraints (like nutritional composition of the food, a budget, etc). We have
 - n: number of products
 - m: number of nutrition dimensions
 - c: a vector of costs for each food
 - A: an n x m matrix of products
 
We're looking for x, which is a vector of length n with components representing the amount of each product we'll include that both satisfies the constraints and minimizes the overall cost


In [204]:
def create_constraints(A, x, diet):
    """
    Takes a list of diet parameters and 
    returns a list of cvxpy constraint 
    expressions
    """
    min_cals = diet[0]
    max_cals = diet[1]
    fat = diet[2]
    protein = diet[3]
    carbs = diet[4]
    max_servings = diet[5]
    
    return [
        A[:, 0]*x >= min_cals,
        A[:, 0]*x <= max_cals,
        A[:, 1]*x >= min_cals * fat,
        A[:, 1]*x <= max_cals * fat,
        A[:, 2]*x >= min_cals * protein,
        A[:, 2]*x <= max_cals * protein,
        A[:, 3]*x >= min_cals * carbs,
        A[:, 3]*x <= max_cals * carbs,
        x >= 0,
        x <= max_servings
    ]

In [205]:
def create_variables(df, cols, objective_col):
    """
    Takes a dataframe with response information and
    returns a set of variables and a problem
    """
    n = df.shape[0]
    m = len(cols)
    c = df[objective_col].values
    A = df[cols].values
    
    return n, m, c, A

In [206]:
def create_problem(n, m, c, A, diet):
    """
    Takes a set of variables and a diet and
    returns a vector and a problem
    """
    x = cp.Variable(n, integer=True)
    obj = cp.Minimize(c.T*x)
    constr = create_constraints(A, x, diet)
    problem = cp.Problem(obj, constr)
    
    return x, problem

In [207]:
def create_summary(df, cols):
    """
    Takes a dataframe with serving information
    and returns a dataframe of servings and costs
    and total cost
    """
    servings = df.copy()
    for col in cols[1:]:
        servings.loc[:, col] = servings[col] * servings.servings
    servings['cost'] = servings.cost_per_serving * servings.servings
    total_cost = servings.cost.sum()
    return servings, total_cost

### Minimizing Cost in Line with a Diet

In [208]:
diets = {
    'Balanced': [2200, 2500, .3, .3, .4, 3],
    'Keto': [2200, 2500, .7, .24, .01, 3], # high fat and protein
    'Atkins': [2200, 2500, .45, .45, .1, 3], # low low carb
    # Low sodium
    # Max protein given an amount of money to spend budget
    # Minimize sugar for gary taubs constrained by a given amount
    # Maximize fibre
}

In [209]:
def optimize_diets(tdf, cols):
    df = tdf.copy()
    n, m, c, A = create_variables(df, cols[1:], 'cost_per_serving')
    
    for diet, diet_params in diets.items():
        print(diet)
        print()        
        # Create the problem
        x, problem = create_problem(n, m, c, A, diet_params)
        # Solve the problem
        opt_value = problem.solve(verbose=True)
        
        print("Problem status: {}".format(problem.status))
        print()
        
        # Show the summary
        df['servings'] = x.value
        servings, total_cost = create_summary(df[df.servings > 0].copy(), cols)
        
        print(servings.sort_values('servings', ascending=False))
        print()
        print(servings[cols].sum())
        print()
        print("% of calories")
        print(servings[cols].sum() / servings.computed_calories.sum() * 100)
        print()
        print("Total cost: ${}".format(round(total_cost, 2)))
        print()
        print()

In [210]:
diet_cols = ['cost_per_serving', 'computed_calories', 'total_fat', 'protein', 'total_carbs']

In [211]:
optimize_diets(tdf, diet_cols)

Keto

Problem status: optimal

       id                           name  cost_per_serving  calories  total_fat  protein  total_carbs  sodium  sugars  computed_calories  servings    cost
152  4787             Unexpected Cheddar          0.570000     120.0      270.0     84.0          0.0   170.0     0.0              354.0       3.0  1.7100
528  5185              Cornish Game Hens          0.886667     220.0      432.0    228.0          0.0    70.0     0.0              660.0       3.0  2.6600
36   4661          French Fromage Slices          0.623750     100.0      162.0     40.0          0.0   125.0     0.0              202.0       2.0  1.2475
106  4737            Pesto Alla Genovese          0.830000     250.0      468.0     24.0         24.0   840.0     0.0              516.0       2.0  1.6600
409  5061  Organic Grass Fed Ground Beef          1.497500     240.0      306.0    168.0          0.0    75.0     0.0              474.0       2.0  2.9950

cost_per_serving        4.407917
compu

### Keto + Minimum Added Sugars

In [212]:
def optimize_keto_sugar(df, cols):
    n, m, c, A = create_variables(df, cols[1:], 'cost_per_serving')
    
    print('Keto + Minimum Added Sugar')
    print()

    # Variable, constraints, and problem
    x = cp.Variable(n, integer=True)
    obj = cp.Minimize(c.T*x)
    constr = create_constraints(A, x, diets['Keto']) + [A[:, 4]*x <= 1]
    problem = cp.Problem(obj, constr)

    # Solve the problem and show the output
    opt_value = problem.solve(verbose=True)
    df['servings'] = x.value
    servings, total_cost = create_summary(df[df.servings > 0].copy(), cols)

    print(servings)
    print()
    print(servings[cols].sum())
    print()
    print("% of calories")
    print(servings[cols].sum() / servings.computed_calories.sum() * 100)
    print()
    print("Total cost: ${}".format(round(total_cost, 2)))
    print()

In [213]:
keto_sugar_cols = ['cost_per_serving', 'computed_calories', 'total_fat', 'protein', 'total_carbs', 'sugars']

In [214]:
optimize_keto_sugar(tdf.copy(), keto_sugar_cols)

Keto + Minimum Added Sugar

       id                           name  cost_per_serving  calories  total_fat  protein  total_carbs  sodium  sugars  computed_calories  servings    cost
36   4661          French Fromage Slices          0.623750     100.0      162.0     40.0          0.0   125.0     0.0              202.0       2.0  1.2475
106  4737            Pesto Alla Genovese          0.830000     250.0      468.0     24.0         24.0   840.0     0.0              516.0       2.0  1.6600
152  4787             Unexpected Cheddar          0.570000     120.0      270.0     84.0          0.0   170.0     0.0              354.0       3.0  1.7100
409  5061  Organic Grass Fed Ground Beef          1.497500     240.0      306.0    168.0          0.0    75.0     0.0              474.0       2.0  2.9950
528  5185              Cornish Game Hens          0.886667     220.0      432.0    228.0          0.0    70.0     0.0              660.0       3.0  2.6600

cost_per_serving        4.407917
computed

### Minimize Sodium with Max Cost of $8 / day

In [215]:
def create_sodium_constraints(A, x, diet):
    """
    Takes a list of diet parameters and 
    returns a list of cvxpy constraint 
    expressions
    """
    min_cals = diet[0]
    max_cost = diet[1]
    fat = diet[2]
    protein = diet[3]
    carbs = diet[4]
    sugars = diet[5]
    max_servings = diet[6]
    
    return [
        A[:, 0]*x >= min_cals,
        A[:, 1]*x <= max_cost,
        A[:, 2]*x >= min_cals * fat,
        A[:, 3]*x >= min_cals * protein,
        A[:, 4]*x >= min_cals * carbs,
        A[:, 5]*x <= sugars,        
        x >= 0,
        x <= max_servings
    ]

In [216]:
def optimize_sodium(df, cols):
    n, m, c, A = create_variables(df, cols[1:], 'sodium')
    
    print('Keto + Minimum Added Sugar')
    print()
    
    # Variable, constraints, and problem
    x = cp.Variable(n, integer=True)
    obj = cp.Minimize(c.T*x)
    constr = create_sodium_constraints(A, x, [2200, 8, .3, .3, .4, 10, 3])
    problem = cp.Problem(obj, constr)

    # Solve the problem and show the output
    opt_value = problem.solve(verbose=True)
    df['servings'] = x.value
    servings, total_cost = create_summary(df[df.servings > 0].copy(), cols)

    print(servings)
    print()
    print(servings[cols].sum())
    print()
    print("% of calories")
    print(servings[cols].sum() / servings.computed_calories.sum() * 100)
    print()
    print("Total cost: ${}".format(round(total_cost, 2)))
    print()

In [217]:
min_sodium_cols = ['sodium', 'computed_calories', 'cost_per_serving', 'total_fat', 'protein', 'total_carbs', 'sugars']

In [218]:
optimize_sodium(tdf.copy(), min_sodium_cols)

Keto + Minimum Added Sugar

       id                               name  cost_per_serving  calories  total_fat  protein  total_carbs  sodium  sugars  computed_calories  servings      cost
30   4655           Sprouted Wheat Sourdough          0.598500      90.0        0.0     84.0        168.0   170.0     3.0              252.0       3.0  1.795500
55   4683                   Malabari Paratha          0.597000     120.0       94.5     36.0        240.0   120.0     3.0              370.5       3.0  1.791000
68   4698          Organic High Protein Tofu          1.494000     130.0      189.0    168.0         36.0    15.0     0.0              393.0       3.0  4.482000
81   4711   Organic Blue Corn Tortilla Chips          0.747500     130.0      243.0     36.0        204.0    40.0     0.0              483.0       3.0  2.242500
128  4760                       Banana Chips          0.186250     160.0       81.0      4.0         76.0     0.0     3.0              161.0       1.0  0.186250
136  4