# Optimal Trader Joe's Grocery List

In [None]:
pip install pulp --quiet

In [None]:
import pandas as pd
from pulp import LpProblem, LpVariable, LpMinimize, lpSum, LpStatus

## Loading Data

In [None]:
data = pd.read_csv('nutrition_prices.csv')
data

In [None]:
data.info()

## Cleaning Data

In [None]:
columns_to_drop = ['Unnamed: 0.1', 'Unnamed: 0']
nutrition_data_cleaned = data.drop(columns=columns_to_drop)

In [None]:
numeric_columns = ['dietary_fiber', 'sugars', 'potassium']
for col in numeric_columns:
    nutrition_data_cleaned[col] = pd.to_numeric(nutrition_data_cleaned[col], errors='coerce')

# nutrition_data_cleaned = nutrition_data_cleaned.drop_duplicates(['item']) ## Drop Dups

In [None]:
critical_columns = ['calories', 'protein', 'retail_price', 'calcium', 'iron', 'potassium']
nutrition_data_cleaned = nutrition_data_cleaned.dropna(subset=critical_columns)

In [None]:
nutrition_data_cleaned.info()

In [None]:
columns_to_check = ['total_fat', 'total_carbohydrates', 'vitamin_d', 'sodium', 'dietary_fiber', 'sugars']
for col in columns_to_check:
    nutrition_data_cleaned[col].fillna(0, inplace=True)

In [None]:
nutrition_data_cleaned = nutrition_data_cleaned[
    (nutrition_data_cleaned['calories'] > 0) &
    (nutrition_data_cleaned['protein'] > 0) &
    (nutrition_data_cleaned['retail_price'] > 0)
]

In [None]:
nutrition_data_cleaned

## Optimization Model

#### Model

In [None]:
model = LpProblem("Optimal_Grocery_List", LpMinimize)

#### Binary Decision Variables

In [None]:
x = {i: LpVariable(f"x_{i}", cat="Binary") for i in nutrition_data_cleaned.index}

#### Objective Function to Minimize Cost

In [None]:
model += lpSum(nutrition_data_cleaned.loc[i, 'retail_price'] * x[i] for i in nutrition_data_cleaned.index), "Total Cost"

#### Constraints

In [None]:
constraints = {
    "calories": 2200,
    "protein": 60,
    "fat_max": 80,
    "carbohydrate": 250,
    "sodium_max": 3000,
    "fiber": 20,
    "sugar_max": 60,
    "cholesterol_max": 300,
    "saturated_fat_max": 20,
    "vitamin_d": 10,
    "budget": 250
}

In [None]:
# Constraint 1: Calories
model += lpSum(nutrition_data_cleaned.loc[i, 'calories'] * x[i] for i in nutrition_data_cleaned.index) >= constraints["calories"], "Calorie_Constraint"
# Constraint 2: Protein
model += lpSum(nutrition_data_cleaned.loc[i, 'protein'] * x[i] for i in nutrition_data_cleaned.index) >= constraints["protein"], "Protein_Constraint"
# Constraint 3: Fat
model += lpSum(nutrition_data_cleaned.loc[i, 'total_fat'] * x[i] for i in nutrition_data_cleaned.index) <= constraints["fat_max"], "Fat_Constraint"
# Constraint 4: Carbohydrates
model += lpSum(nutrition_data_cleaned.loc[i, 'total_carbohydrates'] * x[i] for i in nutrition_data_cleaned.index) >= constraints["carbohydrate"], "Carbohydrate_Constraint"
# Constraint 5: Sodium
model += lpSum(nutrition_data_cleaned.loc[i, 'sodium'] * x[i] for i in nutrition_data_cleaned.index) <= constraints["sodium_max"], "Sodium_Constraint"
# Constraint 6: Fiber
model += lpSum(nutrition_data_cleaned.loc[i, 'dietary_fiber'] * x[i] for i in nutrition_data_cleaned.index) >= constraints["fiber"], "Fiber_Constraint"
# Constraint 7: Sugar
model += lpSum(nutrition_data_cleaned.loc[i, 'sugars'] * x[i] for i in nutrition_data_cleaned.index) <= constraints["sugar_max"], "Sugar_Constraint"
# Constraint 8: Cholesterol
model += lpSum(nutrition_data_cleaned.loc[i, 'cholesterol'] * x[i] for i in nutrition_data_cleaned.index) <= constraints["cholesterol_max"], "Cholesterol_Constraint"
# Constraint 9: Saturated Fat
model += lpSum(nutrition_data_cleaned.loc[i, 'saturated_fat'] * x[i] for i in nutrition_data_cleaned.index) <= constraints["saturated_fat_max"], "Saturated_Fat_Constraint"
# Constraint 10: Vitamin D
model += lpSum(nutrition_data_cleaned.loc[i, 'vitamin_d'] * x[i] for i in nutrition_data_cleaned.index) >= constraints["vitamin_d"], "Vitamin_D_Constraint"
# Constraint 11: Budget
model += lpSum(nutrition_data_cleaned.loc[i, 'retail_price'] * x[i] for i in nutrition_data_cleaned.index) <= constraints["budget"], "Budget_Constraint"

#### Model Solver

In [None]:
model.solve()
print("Model Status:", LpStatus[model.status])

In [None]:
selected_items = [nutrition_data_cleaned.loc[i, 'item_title'] for i in nutrition_data_cleaned.index if x[i].value() == 1]
print("Selected Items:")
for item in selected_items:
    print(item)

In [None]:
total_cost = sum(nutrition_data_cleaned.loc[i, 'retail_price'] for i in nutrition_data_cleaned.index if x[i].value() == 1)
print(f"Total Cost: ${total_cost:.2f}")

Top K Solutions (Solution Pool)

In [None]:
# A file called grocery_list.txt is created/overwritten for writing to
import os

grocery_lists = open('grocery_lists.txt','w')
iter = 0
K = 5 # fetch top K solutions
while True:
    model.solve()
    # The solution is printed if it was deemed "optimal" i.e met the constraints
    if LpStatus[model.status] == "Optimal":
        # Gather info to save
        selected_items = [nutrition_data_cleaned.loc[i, 'item_title'] for i in nutrition_data_cleaned.index if x[i].value() == 1]
        if iter == 0:
          optimal_item_list = selected_items # store the optimal grocery list to compare alternatives with
        total_cost = sum(nutrition_data_cleaned.loc[i, 'retail_price'] for i in nutrition_data_cleaned.index if x[i].value() == 1)
        # Macros
        Calories = sum(nutrition_data_cleaned.loc[i, 'calories'] for i in nutrition_data_cleaned.index if x[i].value() == 1)
        Protein = sum(nutrition_data_cleaned.loc[i, 'protein'] for i in nutrition_data_cleaned.index if x[i].value() == 1)
        Fat = sum(nutrition_data_cleaned.loc[i, 'total_fat'] for i in nutrition_data_cleaned.index if x[i].value() == 1)
        Carbohydrates = sum(nutrition_data_cleaned.loc[i, 'total_carbohydrates'] for i in nutrition_data_cleaned.index if x[i].value() == 1)
        macros = f'MACROS: Calories:{Calories:.0f} | Protein:{Protein:.0f} | Fat:{Fat:.0f} | Carbs:{Carbohydrates:.0f}'
        # Write solution to the grocery_lists.txt file
        if iter == 0:
          title = 'OPTIMAL GROCERY LIST'
          optimal_list = f'{title}: Total Cost: ${total_cost:.2f}\n' + f'{macros} \n-' + '\n-'.join(selected_items) + '\n\n'

        else:
          title = f'ALTERNATIVE GROCERY LIST {iter}'
          removed_items = list(set(optimal_item_list) - set(selected_items))
          added_items = list(set(selected_items) - set(optimal_item_list))
          optimal_list = f'{title}: Total Cost: ${total_cost:.2f}\n' + f'{macros} \n-' + '\n-'.join(selected_items) + f'\nItems removed: {removed_items}'+ f'\nItems added: {added_items}' + '\n\n'

        grocery_lists.write(optimal_list)
        print(optimal_list)

        try: # delete existing constraint if exists
          del model.constraints['OptimalSol']
        except:
          pass
        model += lpSum(nutrition_data_cleaned.loc[i, 'retail_price'] * x[i] for i in nutrition_data_cleaned.index) >= total_cost+0.01, f"OptimalSol"
        iter +=1
        if iter >= K: # only get top K
          break
    # If a new optimal solution cannot be found, we end the program
    else:
        break

grocery_lists.close()

# The location of the solutions is give to the user
print("Solutions Written to grocery_lists.txt")