In [None]:
# We need to define a more sophisticated approach to find the optimal combination of foods
# that approximates the nutritional goals as closely as possible.
# This problem is known as the "knapsack problem" which is a common problem in combinatorial optimization.

import pandas as pd
from itertools import combinations

# Load the data from the Excel file
file_path = 'Serving_updated2.xlsx'
df = pd.read_excel(file_path)

# Exclude rows where the serving amount is 0 or NaN
#df_filtered = df[df['metric_serving_amount'].notna() & (df['metric_serving_amount'] > 0)]
df_filtered = df[df['metric_serving_amount'].notna() & (df['metric_serving_amount'] > 0)].copy()

# Define the goals for breakfast and snack
breakfast_goal = {'calories': 114.675, 'fat': 1.875, 'carbohydrate': 1.4, 'protein': 17.55}
snack_goal = {'calories': 157.87, 'fat': 5.35, 'carbohydrate': 14.16, 'protein': 12.62}

# Helper function to normalize a food item's nutrition to the serving size and calculate its score based on the goal
def score_food_item(row, goal):
    score = 0
    nutrients = ['calories', 'fat', 'carbohydrate', 'protein']
    for nutrient in nutrients:
        nutrient_per_serving = (row[nutrient] / row['metric_serving_amount']) if row['metric_serving_amount'] > 0 else 0
        score += abs(nutrient_per_serving - goal.get(nutrient, 0))
    return score

# Add a new column to the dataframe with the score for breakfast
df_filtered['breakfast_score'] = df_filtered.apply(lambda row: score_food_item(row, {
    'calories': breakfast_goal['calories'],
    'fat': breakfast_goal['fat'],
    'carbohydrate': breakfast_goal['carbohydrate'],
    'protein': breakfast_goal['protein']
}), axis=1)

# Add a new column to the dataframe with the score for snack
df_filtered['snack_score'] = df_filtered.apply(lambda row: score_food_item(row, {
    'calories': snack_goal['calories'],
    'fat': snack_goal['fat'],
    'carbohydrate': snack_goal['carbohydrate'],
    'protein': snack_goal['protein']
}), axis=1)

# Sort the items based on their score
sorted_breakfast = df_filtered.sort_values(by='breakfast_score')
sorted_snack = df_filtered.sort_values(by='snack_score')

# Function to find a combination of foods that meets the goal as closely as possible
def find_combination(sorted_df, goal):
    best_combination = None
    best_difference = float('inf')
    for i in range(1, len(sorted_df) + 1):
        print(f"Trying {i} out of {len(sorted_df)}")
        for combo in combinations(sorted_df.index, i):
            combo_df = sorted_df.loc[list(combo)]
            total_nutrition = combo_df[['calories', 'fat', 'carbohydrate', 'protein']].sum()
            difference = sum(abs(total_nutrition[goal_nutrient] - goal[goal_nutrient]) for goal_nutrient in goal)
            
            #print(f"combo: {combo_df}")
            #print(f"total_nutrition: {total_nutrition}")
            #print(f"difference: {difference}")
            
            if difference < best_difference:
                print("New best")
                print(f"combo: {combo_df}")
                print(f"total_nutrition: {total_nutrition}")
                print(f"difference: {difference}")
                best_difference = difference
                best_combination = combo
                # If the difference is below a certain threshold, consider this the best combination
                if difference < 1:
                    return combo_df
    return sorted_df.loc[list(best_combination)]

# Find the best combinations
print("Finding best breakfast")
best_breakfast = find_combination(sorted_breakfast, breakfast_goal)
print("Finding best snack")
best_snack = find_combination(sorted_snack, snack_goal)

print("Printing results")
best_breakfast[['food_name', 'calories', 'fat', 'carbohydrate', 'protein']], best_snack[['food_name', 'calories', 'fat', 'carbohydrate', 'protein']]


Finding best breakfast
Trying 1 out of 29
New best
combo:    food_barcode   food_id               food_name  metric_serving_amount  \
9  7.874208e+10  102340.0  Extra Virgin Olive Oil                   15.0   

  metric_serving_unit  serving_description_num  \
9                  ml                      1.0   

  serving_description_measurement  calories   fat  carbohydrate  ...  \
9                            tbsp     120.0  14.0             0  ...   

   potassium  saturated_fat  sodium  sugar  trans_fat  vitamin_a  vitamin_c  \
9        0.0            2.0     0.0    0.0        0.0        NaN        NaN   

   vitamin_d  breakfast_score  snack_score  
9        0.0       126.566667   181.066667  

[1 rows x 27 columns]
total_nutrition: calories        120.0
fat              14.0
carbohydrate      0.0
protein           0.0
dtype: float64
difference: 36.400000000000006
New best
combo:     food_barcode     food_id             food_name  metric_serving_amount  \
19  7.874243e+10  35617266.