# How to structure a meal

In [127]:
import pandas as pd

nutri_facts_common = pd.read_csv(r'..\data\raw\nutritional-facts.csv')
nutri_facts_common['Category Name'].value_counts()

Category Name
Sweets                             96
Meals, Entrees, and Side Dishes    90
Soups                              67
Seafood                            63
Vegetables                         46
Fast Foods                         45
Fruits                             44
Spices                             43
Legumes                            22
Beverages                          22
Baked Products                     21
Lactose-Free Dairy                 20
Dairy                              20
Oils and Sauces                    19
Nuts                               15
Cured Meat                         15
Greens                             12
Red Meat                           11
Grains                              9
Gluten-Free Grains                  7
White Meat                          6
Gluten-Free Baked Products          5
Mushrooms                           4
Eggs                                4
Baby Foods                          3
Name: count, dtype: int64

This dataset shows 18 categories (15 not considering Baby Foods, Fast Foods and Meals, Entrees, and Side Dishes). The goal is to map every category to its serving size and frequency using official documents.

## Baked Products
Includes bread and bread substitutes. Standard portion is 50g for bread (e.g., 1 small bun, 1 slice of bread) and 30g for bread substitutes (e.g., 3-4 rusks, 1 small pack of crackers, 3-4 taralli).

## Meat
Includes fresh red and white meat, as well as preserved meats. Standard portion is 100g for fresh meat (e.g., 1 slice, 4-5 stew pieces) and 50g for preserved meat (e.g., 3-4 slices of prosciutto or salami).

## Sweets
Includes baked sweets and confectionery. Standard portion is 30-50g (e.g., 1 croissant, 2-3 biscuits, 1 slice of cake).

## Vegetables
Includes leafy vegetables, cooked, or raw vegetables. Standard portion is 200g (e.g., 2-3 tomatoes, 3-4 carrots, 1 bowl of leafy vegetables).

## Meals, Entrees, and Side Dishes
Includes composed dishes and side dishes. Standard portion ranges from 150g to 300g depending on the dish.

## Fruits
Includes fresh fruit and dried or sugared fruit. Standard portion is 150g for fresh fruit (e.g., 1 medium apple, 2 small plums) and 30g for dried fruit (e.g., 7-8 nuts, 3 dried apricots).

## Beverages
Includes non-alcoholic and alcoholic beverages. Standard portion is 200-330mL (e.g., 1 medium glass of juice, 1 can of soda).

## Seafood
Includes fresh and preserved fish, mollusks, and crustaceans. Standard portion is 150g for fresh (e.g., 1 medium fillet, 3 prawns) and 50g for preserved seafood (e.g., 1 small can of tuna).

## Grains
Includes pasta, rice, and breakfast cereals. Standard portion is 80g for pasta/rice (e.g., 4 tablespoons of rice) and 30g for breakfast cereals (e.g., 6-8 tablespoons of cornflakes).

## Soups
Includes broths and soup-based dishes. Standard portion varies, generally around 250-300g depending on the recipe.

## Greens
Includes leafy greens and herbs. Standard portion is 80g (e.g., 1 large bowl of salad greens).

## Dairy
Includes milk, yogurt, and cheeses. Standard portion is 125mL for milk (e.g., 1 small glass), 125g for yogurt (e.g., 1 small pot), and 100g for fresh cheese (e.g., 1 small mozzarella).

## Oils and Sauces
Includes condiments and cooking oils. Standard portion is 10mL (e.g., 1 tablespoon of olive oil).

## Spices
Includes a variety of spices. Standard portion depends on the spice and recipe (e.g., a pinch to 1 teaspoon).

## Fast Foods
Includes ready-to-eat items. Standard portion varies based on the food type (e.g., 1 small serving of fries or a burger).

## Nuts
Includes whole nuts and seeds. Standard portion is 30g (e.g., 7-8 walnuts, 15-20 almonds).

## Mushrooms
Includes fresh mushrooms. Standard portion is 200g (e.g., 1 medium bowl of cooked mushrooms).

## Baby Foods
Includes baby-specific products. Standard portion varies based on age and product type.

In [128]:
import json
import random

# Step 1: Load the JSON data
with open('../data/raw/food-servings.json', 'r') as f:
    data = json.load(f)

# Step 2: Map the Categories with serving size and frequency
categories = {category: {
    "serving_size": item.get("serving_size", "N/A"),
    "frequency_per_week": item.get("frequency_per_week", 0)
} for category, item in data.items()}

# Step 3: Initialize the remaining frequency tracker
remaining_frequency = {category: info["frequency_per_week"] for category, info in categories.items() if isinstance(info["frequency_per_week"], int)}

def generate_lunch():
    lunch = {}
    
    # Main dish: select from protein or grains
    main_options = ["Seafood", "White Meat", "Legumes", "Grains"]
    main_choice = random.choice([opt for opt in main_options if remaining_frequency.get(opt, 0) > 0])
    lunch[main_choice] = categories[main_choice]["serving_size"]
    remaining_frequency[main_choice] -= 1

    # Vegetables
    if remaining_frequency["Vegetables"] > 0:
        lunch["Vegetables"] = categories["Vegetables"]["serving_size"]
        remaining_frequency["Vegetables"] -= 1

    # Fruits
    if remaining_frequency["Fruits"] > 0:
        lunch["Fruits"] = categories["Fruits"]["serving_size"]
        remaining_frequency["Fruits"] -= 1

    # Greens
    if remaining_frequency["Greens"] > 0:
        lunch["Greens"] = categories["Greens"]["serving_size"]
        remaining_frequency["Greens"] -= 1

    # Oils and Sauces
    lunch["Oils and Sauces"] = categories["Oils and Sauces"]["serving_size"]
    
    return lunch

# Step 4: Generate and print a sample lunch
sample_lunch = generate_lunch()
sample_lunch


{'Legumes': '150g',
 'Vegetables': '200g',
 'Fruits': '150g',
 'Greens': '80g',
 'Oils and Sauces': '10ml'}

In [145]:
import pandas as pd
import random
import json
import datetime

# Step 1: Load the nutritional facts CSV
nutritional_data = pd.read_csv('../data/raw/nutritional-facts.csv')

# Step 2: Load the food servings data
with open('../data/raw/food-servings.json', 'r') as f:
    servings_data = json.load(f)

# Step 3: Load the seasonality data
with open('../data/raw/food-seasonality.json', 'r') as f:
    seasonality_data = json.load(f)

# Step 4: Map each food in the CSV to its category
food_to_category = {
    category: nutritional_data[nutritional_data['Category Name'] == category]['Food Name'].tolist()
    for category in servings_data.keys() if category in nutritional_data['Category Name'].unique()
}

# Step 5: Get the current month to filter seasonal foods
current_month = datetime.datetime.now().strftime('%B')

# Step 6: Function to get seasonal foods from the seasonality data
def get_seasonal_foods(category):
    seasonal_foods = seasonality_data["Italy"].get(current_month, [])
    return [food for food in food_to_category.get(category, []) if food in seasonal_foods]

# Step 7: Generate a lunch by selecting foods from categories
def generate_lunch_with_foods():
    lunch = {}
    
    # Main dish: select from protein or grains
    main_options = ["Seafood", "White Meat", "Legumes"]
    main_choice = random.choice([opt for opt in main_options if servings_data[opt]["frequency_per_week"] > 0])
    food_choice = random.choice(food_to_category[main_choice])
    lunch[main_choice] = {"food": food_choice, "serving_size": servings_data[main_choice]["serving_size"]}
    
    # Decrease the remaining frequency
    servings_data[main_choice]["frequency_per_week"] -= 1

    # Grains
    if servings_data["Grains"]["frequency_per_week"] > 0:
        food_choice = random.choice(food_to_category["Grains"])
        lunch["Grains"] = {"food": food_choice, "serving_size": servings_data["Grains"]["serving_size"]}
        servings_data["Grains"]["frequency_per_week"] -= 1
    
    # Vegetables (filter based on seasonality)
    if servings_data["Vegetables"]["frequency_per_week"] > 0:
        seasonal_vegetables = get_seasonal_foods("Vegetables")
        if seasonal_vegetables:
            food_choice = random.choice(seasonal_vegetables)
            lunch["Vegetables"] = {"food": food_choice, "serving_size": servings_data["Vegetables"]["serving_size"]}
            servings_data["Vegetables"]["frequency_per_week"] -= 1

    # Fruits (filter based on seasonality)
    if servings_data["Fruits"]["frequency_per_week"] > 0:
        seasonal_fruits = get_seasonal_foods("Fruits")
        if seasonal_fruits:
            food_choice = random.choice(seasonal_fruits)
            lunch["Fruits"] = {"food": food_choice, "serving_size": servings_data["Fruits"]["serving_size"]}
            servings_data["Fruits"]["frequency_per_week"] -= 1

    # Oils and Sauces
    lunch["Oils and Sauces"] = {"food": "Olive oil", "serving_size": servings_data["Oils and Sauces"]["serving_size"]}

    return lunch

# Step 8: Generate and print a sample lunch
sample_lunch = generate_lunch_with_foods()
sample_lunch


{'White Meat': {'food': 'Chicken meat', 'serving_size': '100g'},
 'Grains': {'food': 'Wheat Bread', 'serving_size': '80g'},
 'Vegetables': {'food': 'Fennel', 'serving_size': '200g'},
 'Fruits': {'food': 'Kiwifruit', 'serving_size': '150g'},
 'Oils and Sauces': {'food': 'Olive oil', 'serving_size': '10ml'}}

In [149]:
import pandas as pd
import random
import json
import datetime
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Step 1: Load the nutritional facts CSV
nutritional_data = pd.read_csv('../data/raw/nutritional-facts.csv')

# Step 2: Load the food servings data
with open('../data/raw/food-servings.json', 'r') as f:
    servings_data = json.load(f)

# Step 3: Load the seasonality data
with open('../data/raw/food-seasonality.json', 'r') as f:
    seasonality_data = json.load(f)

# Step 4: Map each food in the CSV to its category
food_to_category = {
    category: nutritional_data[nutritional_data['Category Name'] == category]['Food Name'].tolist()
    for category in servings_data.keys() if category in nutritional_data['Category Name'].unique()
}

# Step 5: Get the current month to filter seasonal foods
current_month = datetime.datetime.now().strftime('%B')

# Step 6: Function to get seasonal foods from the seasonality data
def get_seasonal_foods(category):
    seasonal_foods = seasonality_data["Italy"].get(current_month, [])
    return [food for food in food_to_category.get(category, []) if food in seasonal_foods]

# Step 7: Find similar ingredients based on cosine similarity
def find_similar_ingredients(base_ingredients, nutritional_df, top_n=3):
    if nutritional_df.isnull().values.any():
        print("Missing values detected, filling NaN values with 0...")
        nutritional_df = nutritional_df.fillna(0)

    nutrition_matrix = nutritional_df.drop(columns=['Food Name', 'Category Name']).values
    similarity_matrix = cosine_similarity(nutrition_matrix)
    
    similar_ingredients = {}
    
    for ingredient in base_ingredients:
        matching_rows = nutritional_df[nutritional_df['Food Name'].str.strip().str.lower() == ingredient.lower()]
        
        if matching_rows.empty:
            print(f"Ingredient '{ingredient}' not found in the dataset!")
            similar_ingredients[ingredient] = []
            continue
        
        index = matching_rows.index[0]
        ingredient_category = matching_rows['Category Name'].iloc[0]
        
        category_filtered_df = nutritional_df[nutritional_df['Category Name'] == ingredient_category]
        category_nutrition_matrix = category_filtered_df.drop(columns=['Food Name', 'Category Name']).values
        category_similarity_matrix = cosine_similarity(category_nutrition_matrix)
        
        category_similarities = category_similarity_matrix[category_filtered_df.index == index].flatten()
        similar_indices = np.argsort(category_similarities)[::-1][1:top_n+1]  # Exclude self-match
        
        similar_foods = []
        similarity_scores = []
        
        for i in similar_indices:
            similar_foods.append(category_filtered_df.iloc[i]['Food Name'])
            similarity_scores.append(category_similarities[i])
        
        similar_ingredients[ingredient] = list(zip(similar_foods, similarity_scores))
    
    return similar_ingredients

# Step 8: Generate a lunch by selecting foods from categories and recommending alternatives
def generate_lunch_with_foods():
    lunch = {}
    
    # Main dish: select from protein or grains
    main_options = ["Seafood", "White Meat", "Legumes", "Grains"]
    main_choice = random.choice([opt for opt in main_options if servings_data[opt]["frequency_per_week"] > 0])
    food_choice = random.choice(food_to_category[main_choice])
    
    # Get similar foods
    similar_foods = find_similar_ingredients([food_choice], nutritional_data)
    alternative_food, similarity_score = similar_foods.get(food_choice, [(food_choice, 1)])[0]  # Suggest an alternative
    
    lunch[main_choice] = {
        "original_food": food_choice,
        "alternative_food": alternative_food,
        "similarity_score": similarity_score,
        "serving_size": servings_data[main_choice]["serving_size"]
    }
    servings_data[main_choice]["frequency_per_week"] -= 1

    # Vegetables (filter based on seasonality and suggest alternatives)
    if servings_data["Vegetables"]["frequency_per_week"] > 0:
        seasonal_vegetables = get_seasonal_foods("Vegetables")
        if seasonal_vegetables:
            food_choice = random.choice(seasonal_vegetables)
            similar_foods = find_similar_ingredients([food_choice], nutritional_data)
            alternative_food, similarity_score = similar_foods.get(food_choice, [(food_choice, 1)])[0]
            lunch["Vegetables"] = {
                "original_food": food_choice,
                "alternative_food": alternative_food,
                "similarity_score": similarity_score,
                "serving_size": servings_data["Vegetables"]["serving_size"]
            }
            servings_data["Vegetables"]["frequency_per_week"] -= 1

    # Fruits (filter based on seasonality and suggest alternatives)
    if servings_data["Fruits"]["frequency_per_week"] > 0:
        seasonal_fruits = get_seasonal_foods("Fruits")
        if seasonal_fruits:
            food_choice = random.choice(seasonal_fruits)
            similar_foods = find_similar_ingredients([food_choice], nutritional_data)
            alternative_food, similarity_score = similar_foods.get(food_choice, [(food_choice, 1)])[0]
            lunch["Fruits"] = {
                "original_food": food_choice,
                "alternative_food": alternative_food,
                "similarity_score": similarity_score,
                "serving_size": servings_data["Fruits"]["serving_size"]
            }
            servings_data["Fruits"]["frequency_per_week"] -= 1

    # Oils and Sauces (no alternatives needed)
    lunch["Oils and Sauces"] = {"food": "Olive oil", "serving_size": servings_data["Oils and Sauces"]["serving_size"]}

    return lunch

# Step 9: Generate and print a sample lunch
sample_lunch = generate_lunch_with_foods()
sample_lunch


Missing values detected, filling NaN values with 0...
Missing values detected, filling NaN values with 0...
Missing values detected, filling NaN values with 0...


{'Legumes': {'original_food': 'Tempeh',
  'alternative_food': 'Soybean',
  'similarity_score': np.float64(0.9974839391006918),
  'serving_size': '150g'},
 'Vegetables': {'original_food': 'Cauliflower',
  'alternative_food': 'Turnip',
  'similarity_score': np.float64(0.995431517048979),
  'serving_size': '200g'},
 'Fruits': {'original_food': 'Grapefruit',
  'alternative_food': 'Apricot',
  'similarity_score': np.float64(0.9999975234985471),
  'serving_size': '150g'},
 'Oils and Sauces': {'food': 'Olive oil', 'serving_size': '10ml'}}