# Project idea

Consider an Italian user U with a pantry $$D = \{d_1, d_2, ..., d_d\}$$ made of foods from different categories $$C = \{c_1, c_2, ..., c_c\}$$ where $$c = d$$. The pantry will be built upon asking the user a food from every category that he/she prefers or has always at home to reduce food waste.

In [239]:
import pandas as pd

df = pd.read_csv(r"..\data\raw\nutritional-facts.csv")
df.head()

Unnamed: 0,Food Name,Category Name,Calcium,Calories,Carbs,Cholesterol,Copper,Fats,Fiber,Folate,...,Vitamin D,Vitamin E,Vitamin K,Omega-3 - ALA,Omega-6 - Eicosadienoic acid,Omega-6 - Gamma-linoleic acid,Omega-3 - Eicosatrienoic acid,Omega-6 - Dihomo-gamma-linoleic acid,Omega-6 - Linoleic acid,Omega-6 - Arachidonic acid
0,Apple,Fruits,0.006,52.0,14.0,0.0,3e-05,0.17,2.4,3e-06,...,0.0,0.00018,2.2e-06,,,,,,,
1,Apricot,Fruits,0.013,48.0,11.0,0.0,8e-05,0.39,2.0,9e-06,...,0.0,0.00089,3.3e-06,,,,,,,
2,Dried fruit,Fruits,0.055,241.0,63.0,0.0,0.00034,0.51,7.3,1e-05,...,0.0,0.0043,3.1e-06,,,,,,,
3,Avocado,Fruits,0.012,160.0,8.5,0.0,0.00019,15.0,6.7,8.1e-05,...,0.0,0.0021,2.1e-05,0.11,0.0,0.02,,,,
4,Banana,Fruits,0.005,89.0,23.0,0.0,8e-05,0.33,2.6,2e-05,...,0.0,0.0001,5e-07,,,,,,,


In [240]:
print("Columns:", df.columns)
categories = df["Category Name"].unique()
print("This dataset contains", len(categories), "categories of food.\nThey are:", categories)

Columns: Index(['Food Name', 'Category Name', 'Calcium', 'Calories', 'Carbs',
       'Cholesterol', 'Copper', 'Fats', 'Fiber', 'Folate', 'Iron', 'Magnesium',
       'Monounsaturated Fat', 'Net carbs', 'Omega-3 - DHA', 'Omega-3 - DPA',
       'Omega-3 - EPA', 'Phosphorus', 'Polyunsaturated fat', 'Potassium',
       'Protein', 'Saturated Fat', 'Selenium', 'Sodium', 'Trans Fat',
       'Vitamin A (IU)', 'Vitamin A RAE', 'Vitamin B1', 'Vitamin B12',
       'Vitamin B2', 'Vitamin B3', 'Vitamin B5', 'Vitamin B6', 'Vitamin C',
       'Zinc', 'Choline', 'Fructose', 'Histidine', 'Isoleucine', 'Leucine',
       'Lysine', 'Manganese', 'Methionine', 'Phenylalanine', 'Starch', 'Sugar',
       'Threonine', 'Tryptophan', 'Valine', 'Vitamin D', 'Vitamin E',
       'Vitamin K', 'Omega-3 - ALA', 'Omega-6 - Eicosadienoic acid',
       'Omega-6 - Gamma-linoleic acid', 'Omega-3 - Eicosatrienoic acid',
       'Omega-6 - Dihomo-gamma-linoleic acid', 'Omega-6 - Linoleic acid',
       'Omega-6 - Arachidonic ac

In [241]:
macros = ["Calories", "Carbs", "Fats", "Fiber", "Net carbs", "Protein"]
print(df[df["Category Name"] == "Fruits"][["Food Name"] + macros])

          Food Name  Calories  Carbs   Fats  Fiber  Net carbs  Protein
0             Apple      52.0   14.0   0.17    2.4       11.0     0.26
1           Apricot      48.0   11.0   0.39    2.0        9.1     1.40
2       Dried fruit     241.0   63.0   0.51    7.3       55.0     3.40
3           Avocado     160.0    8.5  15.00    6.7        1.8     2.00
4            Banana      89.0   23.0   0.33    2.6       20.0     1.10
5        Blackberry      43.0    9.6   0.49    5.3        4.3     1.40
6         Blueberry      57.0   14.0   0.33    2.4       12.0     0.74
7            Cherry      50.0   12.0   0.30    1.6       11.0     1.00
8         Cranberry      46.0   12.0   0.13    3.6        8.4     0.46
9           Currant      56.0   14.0   0.20    4.3        9.5     1.40
10            Dates     282.0   75.0   0.39    8.0       67.0     2.50
11             Figs      74.0   19.0   0.30    2.9       16.0     0.75
12      Fruit salad      50.0   13.0   0.03    1.0       12.0     0.51
13    

In [242]:
categories_to_drop = [
    "Sweets", "Soups", "Spices", "Fast Foods", "Meals, Entrees, and Side Dishes", "Baby Foods",
    "Gluten-Free Grains", "Lactose-Free Dairy", "Gluten-Free Baked Products"
]
df.drop(df[df["Category Name"].isin(categories_to_drop)].index, inplace=True)
categories = df["Category Name"].unique()
for category in categories:
    print(category, "has", len(df[df["Category Name"] == category]), "foods.")

Fruits has 44 foods.
Vegetables has 46 foods.
Seafood has 23 foods.
Dairy has 14 foods.
Eggs has 3 foods.
Mushrooms has 4 foods.
Grains has 5 foods.
Legumes has 21 foods.
White Meat has 6 foods.
Cured Meat has 15 foods.
Red Meat has 10 foods.
Oils and Sauces has 19 foods.
Nuts has 14 foods.
Greens has 12 foods.
Beverages has 22 foods.
Baked Products has 17 foods.


In [243]:
preferences = [0, 55, 106, 131, 141, 152, 185, 174, 187, 208, 205, 388, 402, 267, 281, 495]

# for category in categories:
#     # print every food for that category
#     print(df[df["Category Name"] == category][["Food Name"] + macros])
#     # ask the user to input the number of the food they prefer
#     food_number = int(input(f"Please enter the number of the food you prefer from {category}: "))
#     # save the food number in a list
#     preferences.append(food_number)
#     # clear the output before printing the new one
#     print("\n\n\n")

In [244]:
preferences

[0, 55, 106, 131, 141, 152, 185, 174, 187, 208, 205, 388, 402, 267, 281, 495]

In [245]:
preferences = df.loc[preferences]
preferences

Unnamed: 0,Food Name,Category Name,Calcium,Calories,Carbs,Cholesterol,Copper,Fats,Fiber,Folate,...,Vitamin D,Vitamin E,Vitamin K,Omega-3 - ALA,Omega-6 - Eicosadienoic acid,Omega-6 - Gamma-linoleic acid,Omega-3 - Eicosatrienoic acid,Omega-6 - Dihomo-gamma-linoleic acid,Omega-6 - Linoleic acid,Omega-6 - Arachidonic acid
0,Apple,Fruits,0.006,52.0,14.0,0.0,3e-05,0.17,2.4,3e-06,...,0.0,0.00018,2.2e-06,,,,,,,
55,Carrot,Vegetables,0.033,41.0,9.6,0.0,5e-05,0.24,2.8,1.9e-05,...,0.0,0.00066,1.3e-05,,0.0,,,,,
106,Tuna,Seafood,0.004,130.0,0.0,0.047,4e-05,0.59,0.0,2e-06,...,2e-06,0.00029,1e-07,,0.0,,,,,
131,Ricotta,Dairy,0.207,174.0,3.0,0.051,2e-05,13.0,0.0,1.2e-05,...,2e-07,0.00011,1.1e-06,,,,,,,
141,Egg,Eggs,0.05,155.0,1.1,0.373,0.002,11.0,0.0,4.4e-05,...,2.2e-06,0.001,3e-07,,,,,,,
152,Edible mushroom,Mushrooms,0.003,22.0,3.3,0.0,0.00032,0.34,1.0,1.7e-05,...,2e-07,1e-05,0.0,,0.0,,,,,
185,Pasta,Grains,0.006,131.0,25.0,0.033,9e-05,1.1,,6.4e-05,...,,,,,,,,,,
174,Tofu,Legumes,0.683,144.0,2.8,0.0,0.00038,8.7,2.3,2.9e-05,...,0.0,,,,,,,,,
187,Chicken meat,White Meat,0.015,239.0,0.0,0.088,7e-05,14.0,0.0,5e-06,...,0.0,0.00027,2.4e-06,,,,,,,
208,Ham,Cured Meat,0.008,145.0,1.5,0.053,8e-05,5.5,0.0,3e-06,...,8e-07,0.00025,0.0,,,,,,,


Now, we are able to find similar foods for each preference using cosine similarity

In [None]:
import numpy as np
from numpy.linalg import norm
import pandas as pd

def find_top_n_similar_foods(df: pd.DataFrame, preferences: list, n: int = 2) -> dict:

# Ensure no missing values
df.fillna(0, inplace=True)

# Assuming `preferences` is a list of food names
top5_similar_foods = {}

for preference in preferences:
    # Select the row corresponding to the current preference
    A_preferences = df[df["Food Name"] == preference[0]][macros].to_numpy().flatten()
    category_name = preference[1]

    # Calculate cosine similarity for each row in the dataframe within the same category
    similarities = []
    for idx, row in df[df["Category Name"] == category_name].iterrows():
        B = row[macros].to_numpy()

        # Compute norms
        norm_A = norm(A_preferences)
        norm_B = norm(B)

        # Check for zero norms and handle gracefully
        if norm_A == 0 or norm_B == 0:
            cosine = 0  # Default similarity for zero-magnitude vectors
        else:
            cosine = np.dot(A_preferences, B) / (norm_A * norm_B)

        similarities.append((idx, cosine))

    # Sort by similarity score in descending order
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)

    # Get the top 5 most similar foods excluding the preference itself
    top5 = [(df.loc[idx, "Food Name"], df.loc[idx, macros].to_dict(), score) 
            for idx, score in similarities if df.loc[idx, "Food Name"] != preference[0]][:5]

    # Save results
    top5_similar_foods[preference[1]] = top5

# Display results
for preference, similar_foods in top5_similar_foods.items():
    print(f"\nTop 5 similar foods for {preference}:")
    for food_name, macros_dict, score in similar_foods:
        print(f"- {food_name} (Similarity: {score:.2f})")
        print(f"  Macros: {macros_dict}")



Top 5 similar foods for o:

Top 5 similar foods for a:

Top 5 similar foods for h:

Top 5 similar foods for i:

Top 5 similar foods for r:

Top 5 similar foods for e:

Top 5 similar foods for m:

Top 5 similar foods for s:

Top 5 similar foods for y:

Top 5 similar foods for t:

Top 5 similar foods for u:


The next step involves building a meal using preferences and similarities

In [247]:
top5_similar_foods

{'o': [],
 'a': [],
 'h': [],
 'i': [],
 'r': [],
 'e': [],
 'm': [],
 's': [],
 'y': [],
 't': [],
 'u': []}

In [248]:
food_servings = pd.read_json(r"..\data\raw\food-servings.json")
food_servings

Unnamed: 0,Fruits,Vegetables,Seafood,Lactose-Free Dairy,Dairy,Eggs,Mushrooms,Grains,Legumes,Gluten-Free Grains,White Meat,Cured Meat,Red Meat,Oils and Sauces,Nuts,Greens,Beverages,Gluten-Free Baked Products,Baked Products
serving_size,150g,200g,150g,125ml,125ml,50g,80g,80g,150g,80g,100g,50g,100g,10ml,30g,80g,200ml,50g,50g
frequency_per_week,14,14,2,7,7,2,1,11,3,10,3,1,1,As needed,3,7,"Water frequently, others occasionally",7,7


In [249]:
# let's use this json file as a dynamic dictionary to track the frequencies per week
categories = food_servings.columns
categories = categories.drop([
    "Beverages", "Oils and Sauces", "Lactose-Free Dairy", "Gluten-Free Grains", "Gluten-Free Baked Products"
])

similar_foods = {}

for category in categories:
    frequency = food_servings[category]["frequency_per_week"]
    
    if frequency != 0:
        print(f"You have to eat {category} {frequency} times per week.")

        while frequency != 0:
            # choose a similar food from preferences array
            similar_foods[category] = top5_similar_foods[category]
            frequency -= 1

similar_foods       

You have to eat Fruits 14 times per week.


KeyError: 'Fruits'

In [236]:
import random

# Define the days of the week
days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

# Initialize the weekly meal plan dictionary
weekly_meal_plan = {day: [] for day in days_of_week}

# Distribute the preferences and similar foods across the days of the week
for category in categories:
    frequency = food_servings[category]["frequency_per_week"]
    
    if frequency != 0:
        # print(f"You have to eat {category} {frequency} times per week.")
        
        while frequency != 0:
            for day in days_of_week:
                if frequency == 0:
                    break
                # Randomly decide if the preference or similar food should be added to the current day
                if random.choice([True, False]):
                    food_choice = random.choice(preferences)
                    weekly_meal_plan[day].append(food_choice[0])
                    frequency -= 1

# Display the weekly meal plan
for day, meals in weekly_meal_plan.items():
    print(f"{day}: {', '.join(meals) if meals else 'No meals planned'}")


Monday: Tofu, Chicken meat, Pasta, Edible mushroom, Olive oil, Apple, Tofu, Olive oil, Meatball, Pasta, Chicken meat, Carrot, Meatball, Tofu
Tuesday: Chicken meat, Egg, Tofu, Meatball, Tomato sauce, Pasta, Lettuce, Lettuce, Ham, Lettuce, Lettuce, Ham
Wednesday: Tofu, Hazelnut, Meatball, Edible mushroom, Chicken meat, Ricotta, Meatball, Meatball
Thursday: Ham, Lettuce, Edible mushroom, Tofu, Meatball, Ricotta, Ham, Olive oil, Tofu, Hazelnut, Tofu, Italian bread
Friday: Hazelnut, Chicken meat, Ricotta, Tuna, Pasta, Carrot, Ham, Italian bread, Lettuce, Ham
Saturday: Tomato sauce, Lettuce, Egg, Hazelnut, Egg, Egg, Hazelnut, Egg, Egg
Sunday: Tomato sauce, Tomato sauce, Apple, Lettuce, Lettuce, Apple, Meatball, Hazelnut, Italian bread, Carrot, Carrot


In [237]:
# filter preferences per category
preferences_per_category = {}
for category in categories:
    preferences_per_category[category] = [preference for preference in preferences if preference[1] == category]

preferences_per_category

{'Fruits': [array(['Apple', 'Fruits', 0.006, 52.0, 14.0, 0.0, 3e-05, 0.17, 2.4, 3e-06,
         0.0001199999999999, 0.005, 0.01, 11.0, 0.0, 0.0, 0.0, 0.011, 0.05,
         0.107, 0.26, 0.03, 0.0, 0.001, 0.0, 54.0, 3e-06, 2e-05, 0.0, 3e-05,
         8.999999999999999e-05, 6e-05, 4e-05, 0.0046, 4e-05, 0.0034, 5.9,
         1e-05, 1e-05, 1e-05, 1e-05, 4e-05, 0.0, 1e-05, 0.05, 10.0, 1e-05,
         0.0, 1e-05, 0.0, 0.0001799999999999, 2.2e-06, nan, nan, nan, nan,
         nan, nan, nan], dtype=object)],
 'Vegetables': [array(['Carrot', 'Vegetables', 0.033, 41.0, 9.6, 0.0, 5e-05, 0.24, 2.8,
         1.9e-05, 0.0003, 0.012, 0.01, 6.8, 0.0, 0.0, 0.0, 0.035, 0.12,
         0.32, 0.93, 0.04, 1.0000000000000001e-07, 0.069, 0.0, 16706.0,
         0.000835, 7.000000000000001e-05, 0.0, 6e-05, 0.00098, 0.00027,
         0.00014, 0.0059, 0.0002399999999999, 0.0088, 0.55, 4e-05, 8e-05,
         0.0001, 0.0001, 0.00014, 2e-05, 6e-05, 1.4, 4.7, 0.00019, 1e-05,
         7.000000000000001e-05, 0.0, 0.0006

In [252]:
food_categories = {
    "Cured Meat": [
        "Turkey ham",
        "Turkey sausage",
        "Salami",
        "Pork bacon",
        "Ham",
        "Mortadella",
        "Italian sausage"
    ],
    "Dairy": [
        "Mozzarella",
        "Cream cheese",
        "Cheese",
        "Ricotta",
        "Romano cheese"
    ],
    "Lactose-Free Dairy": [
        "Romano cheese",
        "Swiss cheese",
        "Fontina",
        "Parmigiano-Reggiano",
        "Edam",
        "Feta",
        "Provolone"
    ],
    "Grains": [
        "Couscous",
        "Pasta",
        "Wheat bread",
        "Rice"
    ],
    "Legumes": [
        "Tofu",
        "Bean",
        "Tempeh",
        "Soybean",
        "Lentil",
        "Chickpeas",
        "Hummus"
    ],
    "Oil": ["Olive oil"],
    "Sauces": ["Tomato sauce"],
    "Red Meat": [
        "Steak",
        "Ground beef",
        "Pork",
        "Meatball",
        "Beef"
    ],
    "Seafood": [
        "Carp",
        "Mackarel",
        "Smoked salmon",
        "Sturgeon",
        "Fish sticks",
        "Cod",
        "Mussels",
        "Tuna",
        "Salmon",
        "Trout",
        "Shrimp"
    ],
    "White Meat": [
        "Rabbit Meat",
        "Chicken meat",
        "Lamb"
    ]
}
