In [None]:
from tensorflow.keras import backend as K
K.clear_session()

# ***`Data Preprocessing`***

In [None]:
# Install the 'rarfile' library to handle RAR file extraction in the environment.
!pip install rarfile

Collecting rarfile
  Downloading rarfile-4.2-py3-none-any.whl.metadata (4.4 kB)
Downloading rarfile-4.2-py3-none-any.whl (29 kB)
Installing collected packages: rarfile
Successfully installed rarfile-4.2


In [None]:
# Import necessary libraries for handling files, downloading, and data manipulation
import os
import pandas as pd
import gdown
import rarfile
import shutil

# Download the RAR file from Google Drive
url = 'https://drive.google.com/uc?id=1a6mvg1ewcz8rNxySb5niXxfDgyOaACGx'
output_rar = '/content/fitnesstan-Dataset.rar'
gdown.download(url, output_rar, quiet=False)

# Extract the RAR file to access the dataset
with rarfile.RarFile(output_rar, 'r') as rar_ref:
    rar_ref.extractall('/content')

Downloading...
From: https://drive.google.com/uc?id=1a6mvg1ewcz8rNxySb5niXxfDgyOaACGx
To: /content/fitnesstan-Dataset.rar
100%|██████████| 1.01M/1.01M [00:00<00:00, 15.3MB/s]


In [None]:
import pandas as pd

# Load the dataset to examine its structure
file_path = '/content/fitnesstan-Dataset/nutrition.csv'
nutrition_data = pd.read_csv(file_path)

# Display the first few rows of the dataset
nutrition_data.head(), nutrition_data.info(), nutrition_data.describe()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8789 entries, 0 to 8788
Data columns (total 77 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   Unnamed: 0                   8789 non-null   int64 
 1   name                         8789 non-null   object
 2   serving_size                 8789 non-null   object
 3   calories                     8789 non-null   int64 
 4   total_fat                    8789 non-null   object
 5   saturated_fat                7199 non-null   object
 6   cholesterol                  8789 non-null   object
 7   sodium                       8789 non-null   object
 8   choline                      8789 non-null   object
 9   folate                       8789 non-null   object
 10  folic_acid                   8789 non-null   object
 11  niacin                       8789 non-null   object
 12  pantothenic_acid             8789 non-null   object
 13  riboflavin                   8789

(   Unnamed: 0             name serving_size  calories total_fat saturated_fat  \
 0           0       Cornstarch        100 g       381      0.1g           NaN   
 1           1     Nuts, pecans        100 g       691       72g          6.2g   
 2           2    Eggplant, raw        100 g        25      0.2g           NaN   
 3           3   Teff, uncooked        100 g       367      2.4g          0.4g   
 4           4  Sherbet, orange        100 g       144        2g          1.2g   
 
   cholesterol    sodium  choline     folate  ...      fat  \
 0           0   9.00 mg   0.4 mg   0.00 mcg  ...   0.05 g   
 1           0   0.00 mg  40.5 mg  22.00 mcg  ...  71.97 g   
 2           0   2.00 mg   6.9 mg  22.00 mcg  ...   0.18 g   
 3           0  12.00 mg  13.1 mg          0  ...   2.38 g   
 4         1mg  46.00 mg   7.7 mg   4.00 mcg  ...   2.00 g   
 
   saturated_fatty_acids monounsaturated_fatty_acids  \
 0               0.009 g                     0.016 g   
 1               6.1

In [None]:
import pandas as pd
import numpy as np

In [None]:
file_path = '/content/fitnesstan-Dataset/nutrition.csv'
nutrition_data = pd.read_csv(file_path)

In [None]:
# Drop the 'Unnamed: 0' column (irrelevant index column)
nutrition_data = nutrition_data.drop(columns=['Unnamed: 0'])

# Identify missing values
missing_values = nutrition_data.isnull().sum()

# Drop columns with more than 50% missing values
nutrition_data = nutrition_data.loc[:, missing_values <= (0.5 * len(nutrition_data))]

# Fill missing values in numerical columns with mean
for column in nutrition_data.select_dtypes(include=['float64', 'int64']).columns:
    nutrition_data[column] = nutrition_data[column].fillna(nutrition_data[column].mean())

# For categorical columns, fill missing with mode
for column in nutrition_data.select_dtypes(include=['object']).columns:
    nutrition_data[column] = nutrition_data[column].fillna(nutrition_data[column].mode()[0])


In [None]:
nutrition_data = nutrition_data.drop_duplicates()

In [None]:
nutrition_data.columns = nutrition_data.columns.str.lower().str.replace(' ', '_').str.replace('[^a-z0-9_]', '')

In [None]:
cleaned_file_path = '/content/nutrition_cleaned.csv'
nutrition_data.to_csv(cleaned_file_path, index=False)
print(f"Cleaned dataset saved at {cleaned_file_path}")


Cleaned dataset saved at /content/nutrition_cleaned.csv


In [None]:
# Reload the cleaned dataset
cleaned_file_path = '/content/nutrition_cleaned.csv'
cleaned_data = pd.read_csv(cleaned_file_path)

In [None]:
# Step 1: Extract numeric weight (grams) from serving_size
if 'serving_size' in cleaned_data.columns:
    cleaned_data['serving_size_grams'] = (
        cleaned_data['serving_size']
        .str.extract(r'(\d+\.?\d*)')
        .astype(float)
    )
    # Fill missing serving sizes with the median
    cleaned_data['serving_size_grams'] = cleaned_data['serving_size_grams'].fillna(
        cleaned_data['serving_size_grams'].median()
    )
else:
    print("Column 'serving_size' is not present in the dataset.")

In [None]:
# Step 2: Create Calories per Gram
if 'calories' in cleaned_data.columns:
    cleaned_data['calories_per_gram'] = cleaned_data['calories'] / cleaned_data['serving_size_grams']
else:
    print("Column 'calories' is not present in the dataset.")

In [None]:
# Step 3: Convert columns to numeric for ratio calculations
columns_to_convert = ['serving_size', 'protein', 'carbohydrate', 'sugars', 'total_fat']

for column in columns_to_convert:
    if column in cleaned_data.columns:
        # Only attempt to replace non-numeric characters if the column is of string type
        if cleaned_data[column].dtype == 'object':
            # Clean the column by removing non-numeric characters and converting to float
            cleaned_data[column] = cleaned_data[column].replace(r'[^\d.]', '', regex=True).astype(float)
        else:
            # If the column is already numeric, just ensure it's in the correct type
            cleaned_data[column] = cleaned_data[column].astype(float)
    else:
        print(f"Column '{column}' is not present in the dataset.")


In [None]:
# Step 4: Calculate Macronutrient Ratios
cleaned_data['protein_ratio'] = cleaned_data['protein'] / cleaned_data['serving_size_grams']
cleaned_data['carbs_ratio'] = cleaned_data['carbohydrate'] / cleaned_data['serving_size_grams']
cleaned_data['sugar_ratio'] = cleaned_data['sugars'] / cleaned_data['carbohydrate']
cleaned_data['total_fat_ratio'] = cleaned_data['total_fat'] / cleaned_data['serving_size_grams']

In [None]:
# Step 5: Calculate Additional Features (e.g., Saturated Fat Ratio)
if 'saturated_fat' in cleaned_data.columns:
    cleaned_data['saturated_fat_ratio'] = (
        cleaned_data['saturated_fat']
        .str.replace(r'[^\d.]', '', regex=True)
        .astype(float)
        / cleaned_data['serving_size_grams']
    )
else:
    print("Column 'saturated_fat' is not present in the dataset.")

In [None]:
# Step 6: Save the feature-engineered dataset
feature_engineered_file_path = '/content/nutrition_feature_engineered.csv'
cleaned_data.to_csv(feature_engineered_file_path, index=False)
feature_engineered_file_path

'/content/nutrition_feature_engineered.csv'

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
import pandas as pd

# Load the feature-engineered dataset
file_path = '/content/nutrition_feature_engineered.csv'
nutrition_data = pd.read_csv(file_path)


In [None]:
# Check for infinite values and replace them with NaN
nutrition_data.replace([float('inf'), float('-inf')], float('nan'), inplace=True)


In [None]:
# Select columns for normalization (numeric columns only)
columns_to_normalize = [ 'calories_per_gram', 'protein_ratio', 'carbs_ratio', 'sugar_ratio',
                        'total_fat_ratio', 'saturated_fat_ratio']

# Ensure only existing columns are normalized
columns_to_normalize = [col for col in columns_to_normalize if col in nutrition_data.columns]


In [None]:
# Apply MinMaxScaler
scaler = MinMaxScaler()
nutrition_data[columns_to_normalize] = scaler.fit_transform(nutrition_data[columns_to_normalize])

In [None]:
# Save the normalized dataset
normalized_file_path = '/content/nutrition_normalized.csv'
nutrition_data.to_csv(normalized_file_path, index=False)
normalized_file_path

'/content/nutrition_normalized.csv'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Define the destination path in Google Drive
destination_path = '/content/drive/My Drive/nutrition_normalized.csv'

# Save the preprocessed dataset to Google Drive
nutrition_data.to_csv(destination_path, index=False)
print(f"Preprocessed dataset saved to: {destination_path}")

Preprocessed dataset saved to: /content/drive/My Drive/nutrition_normalized.csv


# ***`BMR/TDEE`***

In [None]:
# Step 1: Collect User Information
print("Welcome to the Daily Caloric Requirement Calculator!")
print("Please answer the following questions to help determine your caloric needs.")

# Collecting personal details
gender = input("Please enter your gender (Male/Female): ").strip().lower()
weight = float(input("Enter your weight in kg: "))
height = float(input("Enter your height in cm: "))
age = int(input("Enter your age in years: "))
activity_level = input("Enter your activity level (choose from 'sedentary', 'light', 'moderate', 'very', 'super'): ").strip().lower()

# Step 2: Calculate BMR based on gender
if gender == "male":
    bmr = 10 * weight + 6.25 * height - 5 * age + 5
elif gender == "female":
    bmr = 10 * weight + 6.25 * height - 5 * age - 161
else:
    raise ValueError("Invalid gender entered. Please enter 'Male' or 'Female'.")

print(f"\nYour Basal Metabolic Rate (BMR) is: {bmr:.2f} calories/day.")

# Step 3: Calculate TDEE based on activity level
activity_multipliers = {
    "sedentary": 1.2,
    "light": 1.375,
    "moderate": 1.55,
    "very": 1.725,
    "super": 1.9
}

if activity_level in activity_multipliers:
    tdee = bmr * activity_multipliers[activity_level]
    print(f"Your Total Daily Energy Expenditure (TDEE) based on activity level is: {tdee:.2f} calories/day.")
else:
    raise ValueError("Invalid activity level entered. Please enter 'sedentary', 'light', 'moderate', 'very', or 'super'.")

# Step 4: Display caloric recommendations
print("\nBased on your TDEE, you can adjust your diet for specific goals:")
print(" - To lose weight, aim for a calorie intake slightly below your TDEE.")
print(" - To maintain weight, aim to match your TDEE.")
print(" - To gain weight, increase your calorie intake above your TDEE.")


Welcome to the Daily Caloric Requirement Calculator!
Please answer the following questions to help determine your caloric needs.
Please enter your gender (Male/Female): male
Enter your weight in kg: 72
Enter your height in cm: 172
Enter your age in years: 21
Enter your activity level (choose from 'sedentary', 'light', 'moderate', 'very', 'super'): very

Your Basal Metabolic Rate (BMR) is: 1695.00 calories/day.
Your Total Daily Energy Expenditure (TDEE) based on activity level is: 2923.88 calories/day.

Based on your TDEE, you can adjust your diet for specific goals:
 - To lose weight, aim for a calorie intake slightly below your TDEE.
 - To maintain weight, aim to match your TDEE.
 - To gain weight, increase your calorie intake above your TDEE.


# ***`Approach # 01`***
# ***(Constraint Optimization using Linear Programming (LP))***

In [None]:
# Step 1: Load the preprocessed dataset
import pandas as pd

# Load the dataset
file_path = "/content/nutrition_feature_engineered.csv"
nutrition_data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print("Dataset Preview:")
display(nutrition_data.head())

# Summary of dataset
print("\nDataset Summary:")
print(nutrition_data.info())

Dataset Preview:


Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,caffeine,theobromine,water,serving_size_grams,calories_per_gram,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,saturated_fat_ratio
0,Cornstarch,100.0,381,0.1,0.1g,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.00 mg,0.00 mg,8.32 g,100.0,3.81,0.0026,0.9127,0.0,0.001,0.001
1,"Nuts, pecans",100.0,691,72.0,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,0.00 mg,0.00 mg,3.52 g,100.0,6.91,0.0917,0.1386,0.286436,0.72,0.062
2,"Eggplant, raw",100.0,25,0.2,0.1g,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.00 mg,0.00 mg,92.30 g,100.0,0.25,0.0098,0.0588,0.60034,0.002,0.001
3,"Teff, uncooked",100.0,367,2.4,0.4g,0,12.00 mg,13.1 mg,0,0,...,0,0,8.82 g,100.0,3.67,0.133,0.7313,0.025161,0.024,0.004
4,"Sherbet, orange",100.0,144,2.0,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,0.00 mg,0.00 mg,66.10 g,100.0,1.44,0.011,0.304,0.8,0.02,0.012



Dataset Summary:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8789 entries, 0 to 8788
Data columns (total 83 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   name                         8789 non-null   object 
 1   serving_size                 8789 non-null   float64
 2   calories                     8789 non-null   int64  
 3   total_fat                    8789 non-null   float64
 4   saturated_fat                8789 non-null   object 
 5   cholesterol                  8789 non-null   object 
 6   sodium                       8789 non-null   object 
 7   choline                      8789 non-null   object 
 8   folate                       8789 non-null   object 
 9   folic_acid                   8789 non-null   object 
 10  niacin                       8789 non-null   object 
 11  pantothenic_acid             8789 non-null   object 
 12  riboflavin                   8789 non-null   object 
 13  

In [None]:
# Step 2: Define the Nutritional Goals
TDEE = tdee  # Total Daily Energy Expenditure in calories

# Macro-nutrient distribution
protein_percentage = 0.30  # 30% of total calories from protein
carbs_percentage = 0.50    # 50% of total calories from carbs
fat_percentage = 0.20      # 20% of total calories from fat

# Calculate the target macronutrient values
protein_target = (TDEE * protein_percentage) / 4  # Protein (1 gram = 4 kcal)
carbs_target = (TDEE * carbs_percentage) / 4      # Carbohydrates (1 gram = 4 kcal)
fat_target = (TDEE * fat_percentage) / 9          # Fat (1 gram = 9 kcal)

# Print the nutritional goals
print(f"Calorie Goal (TDEE): {TDEE} kcal")
print(f"Protein Target: {protein_target:.2f} g")
print(f"Carbs Target: {carbs_target:.2f} g")
print(f"Fat Target: {fat_target:.2f} g")


Calorie Goal (TDEE): 2913.09375 kcal
Protein Target: 218.48 g
Carbs Target: 364.14 g
Fat Target: 64.74 g


##### ***`Extra`***
##### ***Heuristic: Uses a trial-and-error scaling mechanism to adjust serving sizes.***

In [None]:
import numpy as np

# User input for number of meals (3-4 meals per day)
num_meals = int(input("Enter the number of meals you want (e.g., 3 or 4): "))

# Nutritional constraints per meal (user-defined targets)
meal_calories_target = TDEE / num_meals  # Target calories per meal
protein_target_per_meal = protein_target / num_meals
carbs_target_per_meal = carbs_target / num_meals
fat_target_per_meal = fat_target / num_meals

print(f"\nTarget for each meal:")
print(f"Calories per meal: {meal_calories_target} kcal")
print(f"Protein per meal: {protein_target_per_meal} g")
print(f"Carbs per meal: {carbs_target_per_meal} g")
print(f"Fat per meal: {fat_target_per_meal} g")

# List of food items from dataset
food_items = nutrition_data['name'].values  # 'name' column for food items
calories = nutrition_data['calories'].values
protein = nutrition_data['protein'].values
carbs = nutrition_data['carbohydrate'].values
fat = nutrition_data['total_fat'].values
# Loop through each meal and suggest servings iteratively
for meal in range(num_meals):
    print(f"\nMeal {meal + 1}: Please select food items for this meal")

    # Simulated user selection of food items for each meal
    selected_food_indices = []  # List of selected food item indices by user

    # Prompt user to select food items for the meal
    num_food_items = int(input("\nHow many food items do you want to include in this meal? "))
    for i in range(num_food_items):
        while True:
            try:
                food_index = int(input(f"Select food item {i+1}: (Enter index between 0 and {len(food_items)-1}) "))
                if 0 <= food_index < len(food_items):
                    selected_food_indices.append(food_index)
                    break
                else:
                    print("Invalid index. Please try again.")
            except ValueError:
                print("Invalid input. Please enter a valid number.")

    # Nutritional data for selected food items
    selected_calories = calories[selected_food_indices]
    selected_protein = protein[selected_food_indices]
    selected_carbs = carbs[selected_food_indices]
    selected_fat = fat[selected_food_indices]

    # Display selected items
    print(f"\nSelected food items for Meal {meal + 1}:")
    for idx in selected_food_indices:
        print(f"{food_items[idx]}: Calories = {calories[idx]}, Protein = {protein[idx]}, Carbs = {carbs[idx]}, Fat = {fat[idx]}")

    # Start with equal servings and scale iteratively
    servings = [1.0] * len(selected_food_indices)  # Start with 1 serving for all items
    scaling_factor = 1.0

    # Iteratively adjust servings to match target calories
    while True:
        # Calculate total nutritional contributions for current servings
        total_calories = sum(selected_calories[i] * servings[i] for i in range(len(servings)))
        total_protein = sum(selected_protein[i] * servings[i] for i in range(len(servings)))
        total_carbs = sum(selected_carbs[i] * servings[i] for i in range(len(servings)))
        total_fat = sum(selected_fat[i] * servings[i] for i in range(len(servings)))

        # Check if calories are close enough to the target
        if abs(total_calories - meal_calories_target) < 1:
            break

        # Calculate scaling factor to adjust servings
        scaling_factor = meal_calories_target / total_calories
        servings = [serving * scaling_factor for serving in servings]

    # Display final scaled serving suggestions
    print(f"\nSuggested servings for Meal {meal + 1}:")
    for i, serving in enumerate(servings):
        print(f"{food_items[selected_food_indices[i]]}: {serving:.2f} servings")

    # Display nutritional summary for scaled servings
    print(f"\nNutritional summary for Meal {meal + 1}:")
    print(f"Calories: {total_calories:.2f} / {meal_calories_target}")
    print(f"Protein: {total_protein:.2f} / {protein_target_per_meal}")
    print(f"Carbs: {total_carbs:.2f} / {carbs_target_per_meal}")
    print(f"Fat: {total_fat:.2f} / {fat_target_per_meal}")

Enter the number of meals you want (e.g., 3 or 4): 3

Target for each meal:
Calories per meal: 971.03125 kcal
Protein per meal: 72.82734375 g
Carbs per meal: 121.37890625 g
Fat per meal: 21.57847222222222 g

Meal 1: Please select food items for this meal

How many food items do you want to include in this meal? 2
Select food item 1: (Enter index between 0 and 8788) 333
Select food item 2: (Enter index between 0 and 8788) 444

Selected food items for Meal 1:
Cookies, fig bars: Calories = 348, Protein = 3.7, Carbs = 70.9, Fat = 7.3
Emu, raw, oyster: Calories = 141, Protein = 22.81, Carbs = 0.0, Fat = 4.9

Suggested servings for Meal 1:
Cookies, fig bars: 1.99 servings
Emu, raw, oyster: 1.99 servings

Nutritional summary for Meal 1:
Calories: 971.03 / 971.03125
Protein: 52.64 / 72.82734375
Carbs: 140.79 / 121.37890625
Fat: 24.23 / 21.57847222222222

Meal 2: Please select food items for this meal

How many food items do you want to include in this meal? 2
Select food item 1: (Enter index b

#### ***`Proceed`***
#### ***Approach # 01 [Constraint Optimization using Linear Programming (LP)]***

In [None]:
import numpy as np
from scipy.optimize import linprog

# Food item attributes
food_items = nutrition_data['name']
calories = nutrition_data['calories_per_gram']
protein = nutrition_data['protein_ratio']
carbs = nutrition_data['carbs_ratio']
fat = nutrition_data['total_fat_ratio']

# User inputs for targets
TDEE = tdee
protein_target = protein_target
carbs_target = carbs_target
fat_target = fat_target
num_meals = int(input("Enter the number of meals you want per day: "))

# Per-meal targets
meal_calories_target = TDEE / num_meals
protein_target_per_meal = protein_target / num_meals
carbs_target_per_meal = carbs_target / num_meals
fat_target_per_meal = fat_target / num_meals

# Nutritional targets per meal
b_eq = np.array([meal_calories_target, protein_target_per_meal, carbs_target_per_meal, fat_target_per_meal])

# Track previously selected food items
selected_foods = np.zeros(len(food_items))

# Recommendation system for each meal
for meal in range(num_meals):
    while True:
        print(f"\nGenerating recommendation for Meal {meal + 1}...")

        # Linear programming setup
        A_eq = np.array([calories, protein, carbs, fat])
        c = np.ones(len(food_items))  # Minimize total servings for simplicity

        # Penalize previously selected items to encourage variety
        penalty = 10 * selected_foods
        c = c + penalty

        # Bounds for decision variables (servings of food items must be non-negative)
        bounds = [(0, None)] * len(food_items)

        # Solve the Linear Programming problem
        result = linprog(c, A_eq=A_eq, b_eq=b_eq, bounds=bounds, method='highs')

        if result.success:
            print(f"\nRecommended Meal {meal + 1}:")
            for i, servings in enumerate(result.x):
                if servings > 0:  # Only include items with non-zero servings
                    print(f"{food_items[i]}: {servings:.2f} servings")
                    selected_foods[i] += servings  # Update penalty to avoid reuse

            # Nutritional summary for the recommended meal
            total_calories = np.dot(result.x, calories)
            total_protein = np.dot(result.x, protein)
            total_carbs = np.dot(result.x, carbs)
            total_fat = np.dot(result.x, fat)

            print(f"\nNutritional Summary for Meal {meal + 1}:")
            print(f"Calories: {total_calories:.2f} kcal (Target: {meal_calories_target} kcal)")
            print(f"Protein: {total_protein:.2f} g (Target: {protein_target_per_meal} g)")
            print(f"Carbs: {total_carbs:.2f} g (Target: {carbs_target_per_meal} g)")
            print(f"Fat: {total_fat:.2f} g (Target: {fat_target_per_meal} g)")

            # Ask if the user wants to recompute the recommendation
            recompute = input(f"Do you want to recompute Meal {meal + 1}? (yes/no): ").strip().lower()
            if recompute != 'yes':
                break  # Exit the loop if recomputation is not requested
        else:
            print(f"Could not generate a feasible recommendation for Meal {meal + 1}.")
            recompute = input(f"Do you want to recompute Meal {meal + 1}? (yes/no): ").strip().lower()
            if recompute != 'yes':
                print(f"Skipping Meal {meal + 1}.")
                break


Enter the number of meals you want per day: 4

Generating recommendation for Meal 1...

Recommended Meal 1:
KEEBLER, Waffle Cones: 23.45 servings
Soy protein isolate: 9.25 servings
MURRAY, Vanilla Sugar Wafer: 43.69 servings
Beverages, EAS soy protein powder, ABBOTT: 90.77 servings

Nutritional Summary for Meal 1:
Calories: 728.27 kcal (Target: 728.2734375 kcal)
Protein: 54.62 g (Target: 54.6205078125 g)
Carbs: 91.03 g (Target: 91.0341796875 g)
Fat: 16.18 g (Target: 16.183854166666666 g)
Do you want to recompute Meal 1? (yes/no): yes

Generating recommendation for Meal 1...

Recommended Meal 1:
KEEBLER, Waffle Bowls: 100.52 servings
Egg, dried, white: 22.64 servings
Soy protein isolate, potassium type: 20.32 servings
Egg, glucose reduced, stabilized, dried, whole: 24.44 servings

Nutritional Summary for Meal 1:
Calories: 728.27 kcal (Target: 728.2734375 kcal)
Protein: 54.62 g (Target: 54.6205078125 g)
Carbs: 91.03 g (Target: 91.0341796875 g)
Fat: 16.18 g (Target: 16.183854166666666 g)


# ***`APPROACH # 03`***
## ***Content-Based Filtering Using [K-Nearest Neighbors (KNN) or Cosine Similarity]***

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
normalized_dataset = pd.read_csv('/content/nutrition_normalized.csv')
feature_engineered_dataset = pd.read_csv('/content/nutrition_feature_engineered.csv')

# Fill NaN values for numeric columns only
normalized_dataset.fillna(normalized_dataset.select_dtypes(include=[np.number]).mean(), inplace=True)
feature_engineered_dataset.fillna(feature_engineered_dataset.select_dtypes(include=[np.number]).mean(), inplace=True)

# Reset indices for alignment
normalized_dataset.reset_index(drop=True, inplace=True)
feature_engineered_dataset.reset_index(drop=True, inplace=True)

In [None]:
def recommend_meals(user_bmr, user_tdee, meals_per_day, top_n=5):
    """
    Recommends meals based on user BMR and TDEE divided by meals per day,
    ensuring different items for each meal and more realistic macronutrient distribution.

    Parameters:
    - user_bmr: User's Basal Metabolic Rate (BMR).
    - user_tdee: User's Total Daily Energy Expenditure (TDEE).
    - meals_per_day: Number of meals the user wants to eat in a day.
    - top_n: Number of top recommendations per meal.

    Returns:
    - meals: List of DataFrames, each representing a meal.
    """
    # Features used for similarity calculation
    feature_columns = ['calories_per_gram', 'protein_ratio', 'carbs_ratio',
                       'sugar_ratio', 'total_fat_ratio']

    # Validate feature columns exist
    if not all(col in normalized_dataset.columns for col in feature_columns):
        raise ValueError("Some feature columns are missing from the normalized dataset.")

    # Extract the normalized feature data
    normalized_features = normalized_dataset[feature_columns].select_dtypes(include=[np.number])

    # Adjust user dietary profile for one meal
    user_bmr_per_meal = user_bmr / meals_per_day
    user_tdee_per_meal = user_tdee / meals_per_day

    # Realistic macronutrient profile
    # For simplicity, we assume a balanced distribution for protein, carbs, and fat
    # Adjust ratios as a percentage of TDEE
    protein_ratio = 0.30
    carbs_ratio = 0.50
    fat_ratio = 0.20
    sugar_ratio = 0.05   # Minimized sugar ratio (5% or less of daily calories)

    # Calculate grams of each macronutrient for the user per meal
    protein_per_meal = (user_tdee_per_meal * protein_ratio) / 4  # 4 calories per gram of protein
    carbs_per_meal = (user_tdee_per_meal * carbs_ratio) / 4     # 4 calories per gram of carbohydrate
    fat_per_meal = (user_tdee_per_meal * fat_ratio) / 9         # 9 calories per gram of fat
    sugar_per_meal = (user_tdee_per_meal * sugar_ratio) / 4     # Approximation (4 calories per gram of sugar)

    # Construct the user profile based on macronutrient needs
    user_profile = np.array([user_tdee_per_meal / 2000,  # Calorie needs normalized to 2000
                             protein_per_meal / 30,   # Protein per meal (normalized for typical meal)
                             carbs_per_meal / 50,     # Carbs per meal (normalized for typical meal)
                             sugar_per_meal / 10,     # Sugar per meal (normalized)
                             fat_per_meal / 12])      # Fat per meal (normalized)

    # Validate profile length matches feature columns
    if len(user_profile) != len(feature_columns):
        raise ValueError("User profile length does not match the number of feature columns.")

    # Compute cosine similarity between user profile and food items
    similarity_scores = cosine_similarity([user_profile], normalized_features).flatten()

    # Add similarity scores to a copy of the dataset for ranking
    ranked_dataset = normalized_dataset.copy()
    ranked_dataset['similarity'] = similarity_scores

    # List to hold meal recommendations
    meals = []

    # Keep track of already recommended items to avoid duplicates
    recommended_items_indices = set()

    for meal_number in range(1, meals_per_day + 1):
        # Filter out already recommended items
        available_items = ranked_dataset.loc[~ranked_dataset.index.isin(recommended_items_indices)]

        # Retrieve top N recommendations sorted by similarity for each meal
        recommendations = available_items.sort_values(by='similarity', ascending=False).head(top_n)

        # Mark these items as recommended to avoid duplicates in future meals
        recommended_items_indices.update(recommendations.index)

        # Return relevant columns from the feature_engineered dataset for context
        recommended_items = feature_engineered_dataset.loc[recommendations.index, [
            'name', 'calories', 'calories_per_gram', 'protein_ratio',
            'carbs_ratio', 'sugar_ratio', 'total_fat_ratio']]

        # Calculate serving size to meet per-meal calorie requirement
        recommended_items['serving_size'] = user_tdee_per_meal / recommended_items['calories']

        # Add similarity scores for reference
        recommended_items['similarity_score'] = recommendations['similarity'].values

        # Append the meal DataFrame to the list
        meals.append(recommended_items)

    return meals


In [None]:
# Check for NaN values in the normalized dataset
print("NaN values in normalized dataset:")
print(normalized_dataset.isnull().sum())

# Check for NaN values in the feature-engineered dataset
print("\nNaN values in feature-engineered dataset:")
print(feature_engineered_dataset.isnull().sum())


NaN values in normalized dataset:
name                   0
serving_size           0
calories               0
total_fat              0
saturated_fat          0
                      ..
protein_ratio          0
carbs_ratio            0
sugar_ratio            0
total_fat_ratio        0
saturated_fat_ratio    0
Length: 83, dtype: int64

NaN values in feature-engineered dataset:
name                   0
serving_size           0
calories               0
total_fat              0
saturated_fat          0
                      ..
protein_ratio          0
carbs_ratio            0
sugar_ratio            0
total_fat_ratio        0
saturated_fat_ratio    0
Length: 83, dtype: int64


In [None]:
def evaluate_recommendation_system(user_bmr, user_tdee, meals_per_day, top_n=5):
    """
    Evaluates the recommendation system by:
    - Checking if the system produces the desired number of meals.
    - Ensuring each meal meets the calorie requirements within a margin.
    - Providing statistics on the similarity scores.

    Parameters:
    - user_bmr: User's Basal Metabolic Rate (BMR).
    - user_tdee: User's Total Daily Energy Expenditure (TDEE).
    - meals_per_day: Number of meals per day.
    - top_n: Number of top recommendations per meal.

    Returns:
    - None (prints evaluation results).
    """
    print("Evaluating Recommendation System...")

    # Generate meal recommendations
    meal_recommendations = recommend_meals(user_bmr=user_bmr, user_tdee=user_tdee, meals_per_day=meals_per_day, top_n=top_n)

    # Initialize variables for evaluation
    per_meal_calorie_target = user_tdee / meals_per_day
    calorie_margin = 0.1 * per_meal_calorie_target  # 10% margin
    total_meals = len(meal_recommendations)
    similarity_scores = []
    serving_size_validity = True

    # Evaluate each meal
    for i, meal in enumerate(meal_recommendations, start=1):
        print(f"\nEvaluating Meal {i}...")

        # Check if meal has top_n recommendations
        if len(meal) != top_n:
            print(f"WARNING: Meal {i} does not have {top_n} recommendations.")

        # Check serving size calculations and calorie alignment
        for _, row in meal.iterrows():
            recommended_calories = row['calories'] * row['serving_size']
            similarity_scores.append(row['similarity_score'])

            if not (per_meal_calorie_target - calorie_margin <= recommended_calories <= per_meal_calorie_target + calorie_margin):
                serving_size_validity = False
                print(f"  Issue with {row['name']}: Expected calories ~{per_meal_calorie_target}, got {recommended_calories:.2f}.")

    # Summary of evaluation
    print("\n--- Evaluation Summary ---")
    print(f"Number of meals generated: {total_meals} (Expected: {meals_per_day})")
    print(f"Similarity Score Range: {min(similarity_scores):.4f} to {max(similarity_scores):.4f}")
    if serving_size_validity:
        print("All serving sizes calculated correctly and align with calorie goals.")
    else:
        print("Some serving sizes did not align with the expected calorie goals.")


In [None]:
# User inputs
try:
    example_bmr = bmr
    example_tdee = tdee
    meals_per_day = int(input("How many meals do you want to eat in a day? "))

    if meals_per_day <= 0:
        raise ValueError("Number of meals must be greater than zero.")
except ValueError as e:
    print(f"Invalid input: {e}")
    exit()

# Print BMR, TDEE, and meals per day values
print(f"\nUser BMR: {example_bmr} kcal")
print(f"User TDEE: {example_tdee} kcal")
print(f"Meals per day: {meals_per_day}")

# Get recommendations for all meals
meal_recommendations = recommend_meals(user_bmr=example_bmr, user_tdee=example_tdee, meals_per_day=meals_per_day)
# Call the evaluation function
evaluate_recommendation_system(user_bmr=example_bmr, user_tdee=example_tdee, meals_per_day=meals_per_day, top_n=5)
# Display recommendations for each meal
for i, meal in enumerate(meal_recommendations, start=1):
    print(f"\nMeal {i} Recommendations:")
    display(meal)

How many meals do you want to eat in a day? 3

User BMR: 1695.0 kcal
User TDEE: 2923.875 kcal
Meals per day: 3
Evaluating Recommendation System...

Evaluating Meal 1...

Evaluating Meal 2...

Evaluating Meal 3...

--- Evaluation Summary ---
Number of meals generated: 3 (Expected: 3)
Similarity Score Range: 0.8402 to 0.8738
All serving sizes calculated correctly and align with calorie goals.

Meal 1 Recommendations:


Unnamed: 0,name,calories,calories_per_gram,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,serving_size,similarity_score
4754,"Soybeans, steamed, cooked, sprouted, mature seeds",81,0.81,0.0847,0.0653,0.079632,0.045,12.032407,0.873757
5851,"Soybeans, with salt, steamed, cooked, sprouted...",81,0.81,0.0847,0.0653,0.06585,0.045,12.032407,0.872717
6630,"MORNINGSTAR FARMS Tomato & Basil Pizza Burger,...",161,1.61,0.155,0.14,0.185714,0.086,6.053571,0.872171
5616,"MORNINGSTAR FARMS California Turk'y Burger, un...",155,1.55,0.149,0.126,0.142857,0.077,6.287903,0.863831
5743,"MORNINGSTAR FARMS Spicy Black Bean Burger, unp...",168,1.68,0.146,0.191,0.094241,0.061,5.801339,0.858128



Meal 2 Recommendations:


Unnamed: 0,name,calories,calories_per_gram,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,serving_size,similarity_score
7048,"Beverages,, 3-2-1 Plan, whey powder, high prot...",368,3.68,0.2787,0.5,0.4118,0.12,2.648438,0.856241
6544,MORNINGSTAR FARMS Chipotle Black Bean Crumbles...,122,1.22,0.144,0.102,0.137255,0.041,7.98873,0.854547
5227,"Drumstick leaves, without salt, drained, boile...",60,0.6,0.0527,0.1115,0.089686,0.009,16.24375,0.844504
4240,"Drumstick leaves, with salt, drained, boiled, ...",60,0.6,0.0527,0.1115,0.089686,0.009,16.24375,0.844504
2668,"Gravy, ready-to-serve, canned, beef",53,0.53,0.0375,0.0481,0.043659,0.024,18.389151,0.843372



Meal 3 Recommendations:


Unnamed: 0,name,calories,calories_per_gram,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,serving_size,similarity_score
6641,"MORNINGSTAR FARMS Lasagna with Veggie Sausage,...",96,0.96,0.071,0.144,0.118056,0.023,10.152344,0.842193
5333,"McDONALD'S, Premium Grilled Chicken Ranch BLT ...",204,2.04,0.167,0.2191,0.237335,0.054,4.777574,0.840869
5355,"MORNINGSTAR FARMS Breakfast Pattie, unprepared...",195,1.95,0.237,0.126,0.174603,0.083,4.998077,0.840783
7268,"Fast Foods, tomato and spread, with lettuce, g...",182,1.82,0.1734,0.1678,0.183552,0.046,5.355082,0.840521
2354,"WORTHINGTON Chili, unprepared, canned",126,1.26,0.104,0.109,0.119266,0.045,7.735119,0.840242
