In [None]:
from tensorflow.keras import backend as K
K.clear_session()

# ***`Data Preprocessing`***

In [None]:
# Install the 'rarfile' library to handle RAR file extraction in the environment.
!pip install rarfile

Collecting rarfile
  Downloading rarfile-4.2-py3-none-any.whl.metadata (4.4 kB)
Downloading rarfile-4.2-py3-none-any.whl (29 kB)
Installing collected packages: rarfile
Successfully installed rarfile-4.2


In [None]:
# Import necessary libraries for handling files, downloading, and data manipulation
import os
import pandas as pd
import gdown
import rarfile
import shutil

# Download the RAR file from Google Drive
url = 'https://drive.google.com/uc?id=1a6mvg1ewcz8rNxySb5niXxfDgyOaACGx'
output_rar = '/content/fitnesstan-Dataset.rar'
gdown.download(url, output_rar, quiet=False)

# Extract the RAR file to access the dataset
with rarfile.RarFile(output_rar, 'r') as rar_ref:
    rar_ref.extractall('/content')

Downloading...
From: https://drive.google.com/uc?id=1a6mvg1ewcz8rNxySb5niXxfDgyOaACGx
To: /content/fitnesstan-Dataset.rar
100%|██████████| 1.01M/1.01M [00:00<00:00, 56.6MB/s]


### ***Cleaning***

In [None]:
import pandas as pd

# Load the dataset to examine its structure
file_path = '/content/fitnesstan-Dataset/nutrition.csv'
nutrition_data = pd.read_csv(file_path)

# Display the first few rows of the dataset
nutrition_data.head(), nutrition_data.info(), nutrition_data.describe()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8789 entries, 0 to 8788
Data columns (total 77 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   Unnamed: 0                   8789 non-null   int64 
 1   name                         8789 non-null   object
 2   serving_size                 8789 non-null   object
 3   calories                     8789 non-null   int64 
 4   total_fat                    8789 non-null   object
 5   saturated_fat                7199 non-null   object
 6   cholesterol                  8789 non-null   object
 7   sodium                       8789 non-null   object
 8   choline                      8789 non-null   object
 9   folate                       8789 non-null   object
 10  folic_acid                   8789 non-null   object
 11  niacin                       8789 non-null   object
 12  pantothenic_acid             8789 non-null   object
 13  riboflavin                   8789

(   Unnamed: 0             name serving_size  calories total_fat saturated_fat  \
 0           0       Cornstarch        100 g       381      0.1g           NaN   
 1           1     Nuts, pecans        100 g       691       72g          6.2g   
 2           2    Eggplant, raw        100 g        25      0.2g           NaN   
 3           3   Teff, uncooked        100 g       367      2.4g          0.4g   
 4           4  Sherbet, orange        100 g       144        2g          1.2g   
 
   cholesterol    sodium  choline     folate  ...      fat  \
 0           0   9.00 mg   0.4 mg   0.00 mcg  ...   0.05 g   
 1           0   0.00 mg  40.5 mg  22.00 mcg  ...  71.97 g   
 2           0   2.00 mg   6.9 mg  22.00 mcg  ...   0.18 g   
 3           0  12.00 mg  13.1 mg          0  ...   2.38 g   
 4         1mg  46.00 mg   7.7 mg   4.00 mcg  ...   2.00 g   
 
   saturated_fatty_acids monounsaturated_fatty_acids  \
 0               0.009 g                     0.016 g   
 1               6.1

In [None]:
import pandas as pd
import numpy as np

In [None]:
file_path = '/content/fitnesstan-Dataset/nutrition.csv'
nutrition_data = pd.read_csv(file_path)

In [None]:
# Drop the 'Unnamed: 0' column (irrelevant index column)
nutrition_data = nutrition_data.drop(columns=['Unnamed: 0'])

# Identify missing values
missing_values = nutrition_data.isnull().sum()

# Drop columns with more than 50% missing values
nutrition_data = nutrition_data.loc[:, missing_values <= (0.5 * len(nutrition_data))]

# Fill missing values in numerical columns with mean
for column in nutrition_data.select_dtypes(include=['float64', 'int64']).columns:
    nutrition_data[column] = nutrition_data[column].fillna(nutrition_data[column].mean())

# For categorical columns, fill missing with mode
for column in nutrition_data.select_dtypes(include=['object']).columns:
    nutrition_data[column] = nutrition_data[column].fillna(nutrition_data[column].mode()[0])


In [None]:
nutrition_data = nutrition_data.drop_duplicates()

In [None]:
nutrition_data.columns = nutrition_data.columns.str.lower().str.replace(' ', '_').str.replace('[^a-z0-9_]', '')

In [None]:
cleaned_file_path = '/content/nutrition_cleaned.csv'
nutrition_data.to_csv(cleaned_file_path, index=False)
print(f"Cleaned dataset saved at {cleaned_file_path}")


Cleaned dataset saved at /content/nutrition_cleaned.csv


### ***Feature Engineering***

In [None]:
# Import necessary libraries
import pandas as pd

# Load the cleaned dataset
cleaned_file_path = '/content/nutrition_cleaned.csv'
cleaned_data = pd.read_csv(cleaned_file_path)

# Display the first few rows to verify the dataset
cleaned_data.head()


Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,0.1g,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,0.1g,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [None]:
# Step 1: Extract numeric weight (grams) from serving_size
if 'serving_size' in cleaned_data.columns:
    cleaned_data['serving_size_grams'] = (
        cleaned_data['serving_size']
        .str.extract(r'(\d+\.?\d*)')  # Extract numeric values
        .astype(float)
    )
    # Fill missing serving sizes with the median
    cleaned_data['serving_size_grams'] = cleaned_data['serving_size_grams'].fillna(
        cleaned_data['serving_size_grams'].median()
    )
else:
    print("Column 'serving_size' is not present in the dataset.")

# Verify the updated column
cleaned_data[['serving_size', 'serving_size_grams']].head()


Unnamed: 0,serving_size,serving_size_grams
0,100 g,100.0
1,100 g,100.0
2,100 g,100.0
3,100 g,100.0
4,100 g,100.0


In [None]:
# Step 2: Create Calories per Gram
if 'calories' in cleaned_data.columns:
    cleaned_data['calories_per_gram'] = (
        cleaned_data['calories'] / cleaned_data['serving_size_grams']
    )
else:
    print("Column 'calories' is not present in the dataset.")

# Verify the new feature
cleaned_data[['calories', 'serving_size_grams', 'calories_per_gram']].head()


Unnamed: 0,calories,serving_size_grams,calories_per_gram
0,381,100.0,3.81
1,691,100.0,6.91
2,25,100.0,0.25
3,367,100.0,3.67
4,144,100.0,1.44


In [None]:
# Step 3: Convert non-numeric columns to numeric for ratio calculations
for column in cleaned_data.select_dtypes(include=['object']).columns:
    # Skip the 'name' column
    if column == 'name':
        continue
    # Replace non-numeric characters and convert to float, handling errors
    cleaned_data[column] = pd.to_numeric(cleaned_data[column].replace(r'[^\d.]', '', regex=True), errors='coerce')

# Verify the data types and updated columns
cleaned_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8789 entries, 0 to 8788
Data columns (total 78 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   name                         8789 non-null   object 
 1   serving_size                 8789 non-null   int64  
 2   calories                     8789 non-null   int64  
 3   total_fat                    8789 non-null   float64
 4   saturated_fat                8789 non-null   float64
 5   cholesterol                  8789 non-null   int64  
 6   sodium                       8789 non-null   float64
 7   choline                      8789 non-null   float64
 8   folate                       8789 non-null   float64
 9   folic_acid                   8789 non-null   float64
 10  niacin                       8789 non-null   float64
 11  pantothenic_acid             8789 non-null   float64
 12  riboflavin                   8789 non-null   float64
 13  thiamin           

In [None]:
# Step 4: Calculate Macronutrient Ratios
if 'serving_size_grams' in cleaned_data.columns:
    cleaned_data['protein_ratio'] = (
        cleaned_data['protein'] / cleaned_data['serving_size_grams']
    )
    cleaned_data['carbs_ratio'] = (
        cleaned_data['carbohydrate'] / cleaned_data['serving_size_grams']
    )
    cleaned_data['sugar_ratio'] = cleaned_data['sugars'] / cleaned_data['carbohydrate']
    # Handle division by zero for sugar_ratio
    cleaned_data['sugar_ratio'] = cleaned_data['sugar_ratio'].replace(
        [float('inf'), -float('inf')], 0
    ).fillna(0)
    cleaned_data['total_fat_ratio'] = (
        cleaned_data['total_fat'] / cleaned_data['serving_size_grams']
    )

# Verify the calculated ratios
cleaned_data[
    ['protein_ratio', 'carbs_ratio', 'sugar_ratio', 'total_fat_ratio']
].head()


Unnamed: 0,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio
0,0.0026,0.9127,0.0,0.001
1,0.0917,0.1386,0.286436,0.72
2,0.0098,0.0588,0.60034,0.002
3,0.133,0.7313,0.025161,0.024
4,0.011,0.304,0.8,0.02


In [None]:
# Step 5: Additional Features (e.g., Saturated Fat Ratio)
if 'saturated_fat' in cleaned_data.columns:
    # Check if the column is numeric
    if pd.api.types.is_numeric_dtype(cleaned_data['saturated_fat']):
        # If numeric, directly use the values
        cleaned_data['saturated_fat_ratio'] = (
            cleaned_data['saturated_fat'] / cleaned_data['serving_size_grams']
        )
    else:
        # If not numeric, convert to numeric and then calculate the ratio
        cleaned_data['saturated_fat_ratio'] = (
            cleaned_data['saturated_fat']
            .str.replace(r'[^\d.]', '', regex=True)
            .astype(float)
            / cleaned_data['serving_size_grams']
        )
else:
    print("Column 'saturated_fat' is not present in the dataset.")

# Verify the saturated fat ratio if it exists
if 'saturated_fat_ratio' in cleaned_data.columns:
    cleaned_data[['saturated_fat_ratio']].head()

In [None]:
# Step 6: Save the feature-engineered dataset
feature_engineered_file_path = '/content/nutrition_feature_engineered.csv'
cleaned_data.to_csv(feature_engineered_file_path, index=False)

# Output the file path for reference
print(f"Feature-engineered dataset saved at: {feature_engineered_file_path}")


Feature-engineered dataset saved at: /content/nutrition_feature_engineered.csv


### ***Normalization***

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load the feature-engineered dataset
feature_engineered_file_path = '/content/nutrition_feature_engineered.csv'
feature_engineered_data = pd.read_csv(feature_engineered_file_path)

# Display the first few rows to verify the dataset
feature_engineered_data.head()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,caffeine,theobromine,water,serving_size_grams,calories_per_gram,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,saturated_fat_ratio
0,Cornstarch,100,381,0.1,0.1,0,9.0,0.4,0.0,0.0,...,0.0,0.0,8.32,100.0,3.81,0.0026,0.9127,0.0,0.001,0.001
1,"Nuts, pecans",100,691,72.0,6.2,0,0.0,40.5,22.0,0.0,...,0.0,0.0,3.52,100.0,6.91,0.0917,0.1386,0.286436,0.72,0.062
2,"Eggplant, raw",100,25,0.2,0.1,0,2.0,6.9,22.0,0.0,...,0.0,0.0,92.3,100.0,0.25,0.0098,0.0588,0.60034,0.002,0.001
3,"Teff, uncooked",100,367,2.4,0.4,0,12.0,13.1,0.0,0.0,...,0.0,0.0,8.82,100.0,3.67,0.133,0.7313,0.025161,0.024,0.004
4,"Sherbet, orange",100,144,2.0,1.2,1,46.0,7.7,4.0,0.0,...,0.0,0.0,66.1,100.0,1.44,0.011,0.304,0.8,0.02,0.012


In [None]:
# Step 1: Select features for normalization
# Include only the macronutrient ratios and derived metrics
features_to_normalize = [
    'protein_ratio',
    'carbs_ratio',
    'sugar_ratio',
    'total_fat_ratio',
    'calories_per_gram'
]

# Check which of these features are present in the dataset
features_to_normalize = [
    feature for feature in features_to_normalize if feature in feature_engineered_data.columns
]

# Display the features selected for normalization
print("Features to be normalized:", features_to_normalize)


Features to be normalized: ['protein_ratio', 'carbs_ratio', 'sugar_ratio', 'total_fat_ratio', 'calories_per_gram']


In [None]:
# Step 2: Apply MinMaxScaler to the selected features
scaler = MinMaxScaler()

# Create a copy of the dataset to retain the original
normalized_data = feature_engineered_data.copy()

# Normalize only the selected features
normalized_data[features_to_normalize] = scaler.fit_transform(normalized_data[features_to_normalize])

# Verify the normalized features
normalized_data[features_to_normalize].head()


Unnamed: 0,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,calories_per_gram
0,0.002944,0.9127,0.0,0.001,0.422395
1,0.103827,0.1386,0.204347,0.72,0.766075
2,0.011096,0.0588,0.428291,0.002,0.027716
3,0.150589,0.7313,0.01795,0.024,0.406874
4,0.012455,0.304,0.570732,0.02,0.159645


In [None]:
# Step 3: Retain unnormalized columns (e.g., categorical identifiers or raw features)
# No explicit action needed as non-normalized columns are already intact in `normalized_data`

# Display a preview of the dataset to confirm normalization
normalized_data.head()


Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,caffeine,theobromine,water,serving_size_grams,calories_per_gram,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,saturated_fat_ratio
0,Cornstarch,100,381,0.1,0.1,0,9.0,0.4,0.0,0.0,...,0.0,0.0,8.32,100.0,0.422395,0.002944,0.9127,0.0,0.001,0.001
1,"Nuts, pecans",100,691,72.0,6.2,0,0.0,40.5,22.0,0.0,...,0.0,0.0,3.52,100.0,0.766075,0.103827,0.1386,0.204347,0.72,0.062
2,"Eggplant, raw",100,25,0.2,0.1,0,2.0,6.9,22.0,0.0,...,0.0,0.0,92.3,100.0,0.027716,0.011096,0.0588,0.428291,0.002,0.001
3,"Teff, uncooked",100,367,2.4,0.4,0,12.0,13.1,0.0,0.0,...,0.0,0.0,8.82,100.0,0.406874,0.150589,0.7313,0.01795,0.024,0.004
4,"Sherbet, orange",100,144,2.0,1.2,1,46.0,7.7,4.0,0.0,...,0.0,0.0,66.1,100.0,0.159645,0.012455,0.304,0.570732,0.02,0.012


In [None]:
# Step 4: Save the normalized dataset
normalized_file_path = '/content/nutrition_normalized.csv'
normalized_data.to_csv(normalized_file_path, index=False)

# Output the file path for reference
print(f"Normalized dataset saved at: {normalized_file_path}")


Normalized dataset saved at: /content/nutrition_normalized.csv


# ***`BMR/TDEE`***

In [None]:
# Step 1: Collect User Information
print("Welcome to the Daily Caloric Requirement Calculator!")
print("Please answer the following questions to help determine your caloric needs.")

# Collecting personal details
gender = input("Please enter your gender (Male/Female): ").strip().lower()
weight = float(input("Enter your weight in kg: "))
height = float(input("Enter your height in cm: "))
age = int(input("Enter your age in years: "))
activity_level = input("Enter your activity level (choose from 'sedentary', 'light', 'moderate', 'very', 'super'): ").strip().lower()

# Step 2: Calculate BMR based on gender
if gender == "male":
    bmr = 10 * weight + 6.25 * height - 5 * age + 5
elif gender == "female":
    bmr = 10 * weight + 6.25 * height - 5 * age - 161
else:
    raise ValueError("Invalid gender entered. Please enter 'Male' or 'Female'.")

print(f"\nYour Basal Metabolic Rate (BMR) is: {bmr:.2f} calories/day.")

# Step 3: Calculate BMI
height_m = height / 100  # Convert height to meters
bmi = weight / (height_m ** 2)
print(f"Your Body Mass Index (BMI) is: {bmi:.2f}")

# Step 4: Assess BMI and target a healthy range
bmi_category = ""
if bmi < 18.5:
    bmi_category = "underweight"
    print("BMI Category: Underweight")
elif 18.5 <= bmi < 24.9:
    bmi_category = "normal weight"
    print("BMI Category: Normal Weight")

    # Adjust target BMI to fall between 21 and 24
    if bmi < 21:
        target_bmi = 22.5
        target_weight = target_bmi * (height_m ** 2)
        print(f"You are within the normal BMI range, but your BMI ({bmi:.2f}) is on the lower side.")
        print(f"We recommend aiming for a BMI of {target_bmi:.1f}, which corresponds to a weight of {target_weight:.2f} kg.")
    elif bmi > 24:
        target_bmi = 22.5
        target_weight = target_bmi * (height_m ** 2)
        print(f"You are within the normal BMI range, but your BMI ({bmi:.2f}) is on the higher side.")
        print(f"We recommend aiming for a BMI of {target_bmi:.1f}, which corresponds to a weight of {target_weight:.2f} kg.")
    else:
        print("Your BMI is within the optimal range (21-24). No changes are recommended.")

elif 25 <= bmi < 29.9:
    bmi_category = "overweight"
    print("BMI Category: Overweight")
else:
    bmi_category = "obese"
    print("BMI Category: Obese")

# Step 5: Calculate TDEE based on activity level
activity_multipliers = {
    "sedentary": 1.2,
    "light": 1.375,
    "moderate": 1.55,
    "very": 1.725,
    "super": 1.9
}

if activity_level in activity_multipliers:
    tdee = bmr * activity_multipliers[activity_level]

    # Adjust TDEE based on BMI category
    if bmi_category == "underweight" or (bmi < 21 and bmi_category == "normal weight"):
        tdee *= 1.1  # Increase TDEE by 10% for healthy weight gain
        print("Adjusting TDEE for healthy weight gain...")
    elif bmi_category == "overweight" or bmi_category == "obese" or (bmi > 24 and bmi_category == "normal weight"):
        tdee *= 0.9  # Reduce TDEE by 10% for healthy weight loss
        print("Adjusting TDEE for healthy weight loss...")
    else:
        print("No adjustment needed for TDEE.")

    print(f"Your Total Daily Energy Expenditure (TDEE) after adjustment is: {tdee:.2f} calories/day.")
else:
    raise ValueError("Invalid activity level entered. Please enter 'sedentary', 'light', 'moderate', 'very', or 'super'.")

# Step 6: Display caloric recommendations
print("\nBased on your TDEE, you can adjust your diet for specific goals:")
print(" - To lose weight, aim for a calorie intake slightly below your TDEE.")
print(" - To maintain weight, aim to match your TDEE.")
print(" - To gain weight, increase your calorie intake above your TDEE.")


Welcome to the Daily Caloric Requirement Calculator!
Please answer the following questions to help determine your caloric needs.
Please enter your gender (Male/Female): male
Enter your weight in kg: 72
Enter your height in cm: 172
Enter your age in years: 21
Enter your activity level (choose from 'sedentary', 'light', 'moderate', 'very', 'super'): very

Your Basal Metabolic Rate (BMR) is: 1695.00 calories/day.
Your Body Mass Index (BMI) is: 24.34
BMI Category: Normal Weight
You are within the normal BMI range, but your BMI (24.34) is on the higher side.
We recommend aiming for a BMI of 22.5, which corresponds to a weight of 66.56 kg.
Adjusting TDEE for healthy weight loss...
Your Total Daily Energy Expenditure (TDEE) after adjustment is: 2631.49 calories/day.

Based on your TDEE, you can adjust your diet for specific goals:
 - To lose weight, aim for a calorie intake slightly below your TDEE.
 - To maintain weight, aim to match your TDEE.
 - To gain weight, increase your calorie intake

# ***`Approach # 01`***

## ***Constraint Optimization using Linear Programming (LP)***

In [43]:
# Import necessary libraries
import pandas as pd
import numpy as np
from scipy.optimize import linprog

# Load the normalized dataset
normalized_file_path = '/content/nutrition_normalized.csv'
normalized_data = pd.read_csv(normalized_file_path)

# Display the first few rows to verify the dataset
print(normalized_data.head())


              name  serving_size  calories  total_fat  saturated_fat  \
0       Cornstarch           100       381        0.1            0.1   
1     Nuts, pecans           100       691       72.0            6.2   
2    Eggplant, raw           100        25        0.2            0.1   
3   Teff, uncooked           100       367        2.4            0.4   
4  Sherbet, orange           100       144        2.0            1.2   

   cholesterol  sodium  choline  folate  folic_acid  ...  caffeine  \
0            0     9.0      0.4     0.0         0.0  ...       0.0   
1            0     0.0     40.5    22.0         0.0  ...       0.0   
2            0     2.0      6.9    22.0         0.0  ...       0.0   
3            0    12.0     13.1     0.0         0.0  ...       0.0   
4            1    46.0      7.7     4.0         0.0  ...       0.0   

   theobromine  water  serving_size_grams  calories_per_gram  protein_ratio  \
0          0.0   8.32               100.0           0.422395       

In [44]:
try:
    tdee = tdee
    num_meals_per_day = int(input("How many meals do you want in a day? "))
    print(f"\nYour TDEE: {tdee} kcal")
    print(f"Number of meals per day: {num_meals_per_day}")
except ValueError:
    print("Invalid input. Please enter valid numeric values for TDEE and number of meals.")
    tdee = 0
    num_meals_per_day = 0

# Calculate calories per meal
calories_per_meal = tdee / num_meals_per_day
print(f"\nCalories per meal: {calories_per_meal:.2f} kcal")

How many meals do you want in a day? 3

Your TDEE: 2631.4875 kcal
Number of meals per day: 3

Calories per meal: 877.16 kcal


In [45]:
def recommend_balanced_meal(food_data, tdee_meal, macro_ratios, num_items):
    """
    Recommends food items and their serving sizes to meet TDEE and macronutrient goals.

    Parameters:
        food_data: DataFrame containing food items and their nutritional info.
        tdee_meal: Target calorie intake for the meal.
        macro_ratios: Desired macronutrient distribution (tuple of carbs, fats, protein ratios).
        num_items: Number of food items to include in the meal.

    Returns:
        A DataFrame with recommended food items, serving sizes, and macronutrient breakdown.
    """
    # Step 1: Sample a subset of food items for optimization
    sampled_data = food_data.sample(num_items).reset_index(drop=True)

    # Extract nutritional information
    calories = sampled_data["calories"].values / 100  # per gram
    carbs = sampled_data["carbs_ratio"].values        # per gram
    fats = sampled_data["total_fat_ratio"].values     # per gram
    protein = sampled_data["protein_ratio"].values    # per gram

    # Macronutrient targets
    target_carbs = tdee_meal * macro_ratios[0]
    target_fats = tdee_meal * macro_ratios[1]
    target_protein = tdee_meal * macro_ratios[2]

    # Step 2: Define the optimization problem
    num_foods = len(sampled_data)
    bounds = [(30, 120) for _ in range(num_foods)]  # Serving size: min 0.1g, max 300g per item

    # Objective: Minimize serving size deviation and encourage diversity
    c = np.ones(num_foods)  # Penalize larger servings to distribute load

    # Constraints
    A_eq = [calories]
    b_eq = [tdee_meal]

    A_ub = np.vstack([
        carbs - target_carbs / tdee_meal,
        fats - target_fats / tdee_meal,
        protein - target_protein / tdee_meal
    ])
    b_ub = np.zeros(len(A_ub))  # Deviation target

    # Solve the optimization problem
    res = linprog(c, A_eq=A_eq, b_eq=b_eq, bounds=bounds, method="highs")

    if res.success:
        sampled_data["recommended_serving_size"] = res.x
        sampled_data["calorie_contribution"] = sampled_data["recommended_serving_size"] * calories
        sampled_data["carbs_contribution"] = sampled_data["recommended_serving_size"] * carbs
        sampled_data["fats_contribution"] = sampled_data["recommended_serving_size"] * fats
        sampled_data["protein_contribution"] = sampled_data["recommended_serving_size"] * protein

        return sampled_data[[
            "name", "recommended_serving_size", "calorie_contribution",
            "carbs_contribution", "fats_contribution", "protein_contribution"
        ]]
    else:
        return "Optimization failed to find a solution."


In [46]:
# Initialize penalty weights
penalty_weights = pd.Series([1] * len(normalized_data), index=normalized_data.index)
final_meal_plan = pd.DataFrame()

remaining_tdee = tdee
meal_num = 1

for meal_num in range(1, num_meals_per_day + 1):
    try:
        while True:
            recommended_meal = recommend_balanced_meal(
                normalized_data,
                calories_per_meal,
                (0.45, 0.2, 0.35),  # Adjust these ratios as needed
                num_items=4
            )

            if isinstance(recommended_meal, pd.DataFrame):
                print(f"\nMeal {meal_num} Recommendation:")
                print(recommended_meal)

                user_feedback = input("Are you satisfied with this meal? (yes/no): ").strip().lower()

                if user_feedback == 'yes':
                    final_meal_plan = pd.concat([final_meal_plan, recommended_meal], ignore_index=True)
                    break
                else:
                    penalty_weights.loc[recommended_meal.index] += 10  # Penalize rejected items
            else:
                print("Unable to generate a meal. Check constraints.")
                break
    except ValueError:
        print("Invalid input.")



Meal 1 Recommendation:
                                                name  \
0  Cereals ready-to-eat, KELLOGG'S APPLE JACKS, K...   
1  Pork, braised, cooked, separable lean only, bo...   
2  Salad dressing, regular, commercial, thousand ...   
3                  Peas, raw, sprouted, mature seeds   

   recommended_serving_size  calorie_contribution  carbs_contribution  \
0                     86.07              322.7625            75.91374   
1                     30.00               62.4000             0.00000   
2                    120.00              454.8000            17.56800   
3                     30.00               37.2000             8.13300   

   fats_contribution  protein_contribution  
0            2.92638              4.970075  
1            2.79000              9.860734  
2           42.00000              1.480978  
3            0.21000              2.989130  
Are you satisfied with this meal? (yes/no): yes
Unable to generate a meal. Check constraints.

Meal 3 Re

`Overall Insights:`

The meal planning algorithm has improved in two key areas: meal diversity and calorie deviation. Meal diversity is now properly handled, ensuring variety in food choices, which enhances user satisfaction. Calorie deviation is also within an acceptable range, indicating alignment with the target calorie intake. However, the macronutrient distribution remains a challenge. The algorithm still struggles to accurately meet the desired protein, carb, and fat ratios. To improve, the optimization process should be refined to better balance macronutrients, ensuring both calorie and macronutrient goals are consistently achieved.

# ***`Heuristic`***
##### ***Heuristic: Uses a trial-and-error scaling mechanism to adjust serving sizes.***

In [50]:
import pandas as pd
import numpy as np

# Load the normalized dataset (assuming it's already available in your environment)
# Adjust this based on the correct file path in your project
normalized_file_path = '/content/nutrition_normalized.csv'
nutrition_data = pd.read_csv(normalized_file_path)

# Display the first few rows to verify the dataset
nutrition_data.head()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,caffeine,theobromine,water,serving_size_grams,calories_per_gram,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,saturated_fat_ratio
0,Cornstarch,100,381,0.1,0.1,0,9.0,0.4,0.0,0.0,...,0.0,0.0,8.32,100.0,0.422395,0.002944,0.9127,0.0,0.001,0.001
1,"Nuts, pecans",100,691,72.0,6.2,0,0.0,40.5,22.0,0.0,...,0.0,0.0,3.52,100.0,0.766075,0.103827,0.1386,0.204347,0.72,0.062
2,"Eggplant, raw",100,25,0.2,0.1,0,2.0,6.9,22.0,0.0,...,0.0,0.0,92.3,100.0,0.027716,0.011096,0.0588,0.428291,0.002,0.001
3,"Teff, uncooked",100,367,2.4,0.4,0,12.0,13.1,0.0,0.0,...,0.0,0.0,8.82,100.0,0.406874,0.150589,0.7313,0.01795,0.024,0.004
4,"Sherbet, orange",100,144,2.0,1.2,1,46.0,7.7,4.0,0.0,...,0.0,0.0,66.1,100.0,0.159645,0.012455,0.304,0.570732,0.02,0.012


In [51]:
# Input the TDEE and number of meals
TDEE = tdee  # Example TDEE, adjust as needed
num_meals = int(input("Enter the number of meals you want (e.g., 3 or 4): "))

# Set macronutrient percentage ranges (customizable)
protein_percentage = 0.25  # 25% of TDEE
carbs_percentage = 0.50    # 50% of TDEE
fat_percentage = 0.25      # 25% of TDEE

# Calculate macro targets in grams
protein_target = (protein_percentage * TDEE) / 4  # 1g protein = 4 kcal
carbs_target = (carbs_percentage * TDEE) / 4     # 1g carbs = 4 kcal
fat_target = (fat_percentage * TDEE) / 9         # 1g fat = 9 kcal

# Calculate per-meal targets
meal_calories_target = TDEE / num_meals
protein_target_per_meal = protein_target / num_meals
carbs_target_per_meal = carbs_target / num_meals
fat_target_per_meal = fat_target / num_meals

# Print targets for each meal
print(f"\nTarget for each meal:")
print(f"Calories per meal: {meal_calories_target:.2f} kcal")
print(f"Protein per meal: {protein_target_per_meal:.2f} g")
print(f"Carbs per meal: {carbs_target_per_meal:.2f} g")
print(f"Fat per meal: {fat_target_per_meal:.2f} g")

# Extract arrays from dataset
food_items = nutrition_data['name']
calories = nutrition_data['calories']
protein = nutrition_data['protein']
carbs = nutrition_data['carbohydrate']
fat = nutrition_data['total_fat']

# Loop through each meal
for meal in range(num_meals):
    print(f"\nMeal {meal + 1}: Please select food items for this meal")

    # Prompt user to select food items
    selected_food_indices = []
    num_food_items = int(input("How many food items do you want to include in this meal? "))
    for i in range(num_food_items):
        while True:
            try:
                food_index = int(input(f"Select food item {i+1}: (Enter index between 0 and {len(food_items) - 1}) "))
                if 0 <= food_index < len(food_items):
                    selected_food_indices.append(food_index)
                    break
                else:
                    print("Invalid index. Please try again.")
            except ValueError:
                print("Invalid input. Please enter a valid number.")

    # Retrieve nutritional data for selected items
    selected_calories = calories.iloc[selected_food_indices].reset_index(drop=True)
    selected_protein = protein.iloc[selected_food_indices].reset_index(drop=True)
    selected_carbs = carbs.iloc[selected_food_indices].reset_index(drop=True)
    selected_fat = fat.iloc[selected_food_indices].reset_index(drop=True)

    # Display selected items
    print(f"\nSelected food items for Meal {meal + 1}:")
    for idx in selected_food_indices:
        print(f"{food_items[idx]}: Calories = {calories.iloc[idx]}, Protein = {protein.iloc[idx]}, Carbs = {carbs.iloc[idx]}, Fat = {fat.iloc[idx]}")

    # Scale servings to match meal calorie and macro targets
    servings = [1.0] * len(selected_food_indices)
    while True:
        total_calories = sum(selected_calories[i] * servings[i] for i in range(len(servings)))
        total_protein = sum(selected_protein[i] * servings[i] for i in range(len(servings)))
        total_carbs = sum(selected_carbs[i] * servings[i] for i in range(len(servings)))
        total_fat = sum(selected_fat[i] * servings[i] for i in range(len(servings)))

        # Check if calories and macros are within acceptable range
        if abs(total_calories - meal_calories_target) < 5:
            if abs(total_protein - protein_target_per_meal) < 5 and abs(total_carbs - carbs_target_per_meal) < 5 and abs(total_fat - fat_target_per_meal) < 2:
                break

        # Adjust servings proportionally
        scaling_factor = meal_calories_target / total_calories
        servings = [serving * scaling_factor for serving in servings]

    # Display final serving suggestions
    print(f"\nSuggested servings for Meal {meal + 1}:")
    for i, serving in enumerate(servings):
        print(f"{food_items[selected_food_indices[i]]}: {serving:.2f} servings")

    # Display nutritional summary
    print(f"\nNutritional summary for Meal {meal + 1}:")
    print(f"Calories: {total_calories:.2f} / {meal_calories_target}")
    print(f"Protein: {total_protein:.2f} / {protein_target_per_meal}")
    print(f"Carbs: {total_carbs:.2f} / {carbs_target_per_meal}")
    print(f"Fat: {total_fat:.2f} / {fat_target_per_meal}")

Enter the number of meals you want (e.g., 3 or 4): 3

Target for each meal:
Calories per meal: 877.16 kcal
Protein per meal: 54.82 g
Carbs per meal: 109.65 g
Fat per meal: 24.37 g

Meal 1: Please select food items for this meal
How many food items do you want to include in this meal? 4
Select food item 1: (Enter index between 0 and 8788) 88
Select food item 2: (Enter index between 0 and 8788) 88
Select food item 3: (Enter index between 0 and 8788) 77
Select food item 4: (Enter index between 0 and 8788) 88

Selected food items for Meal 1:
Mushrooms, raw, enoki: Calories = 37, Protein = 2.66, Carbs = 7.81, Fat = 0.3
Mushrooms, raw, enoki: Calories = 37, Protein = 2.66, Carbs = 7.81, Fat = 0.3
Figs, uncooked, dried: Calories = 249, Protein = 3.3, Carbs = 63.87, Fat = 0.9
Mushrooms, raw, enoki: Calories = 37, Protein = 2.66, Carbs = 7.81, Fat = 0.3


KeyboardInterrupt: 

# ***`APPROACH # 03`***
## ***Content-Based Filtering Using [Cosine Similarity]***

In [52]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
normalized_dataset = pd.read_csv('/content/nutrition_normalized.csv')
feature_engineered_dataset = pd.read_csv('/content/nutrition_feature_engineered.csv')

# Fill NaN values for numeric columns only
normalized_dataset.fillna(normalized_dataset.select_dtypes(include=[np.number]).mean(), inplace=True)
feature_engineered_dataset.fillna(feature_engineered_dataset.select_dtypes(include=[np.number]).mean(), inplace=True)

# Reset indices for alignment
normalized_dataset.reset_index(drop=True, inplace=True)
feature_engineered_dataset.reset_index(drop=True, inplace=True)

In [53]:
def recommend_meals(user_bmr, user_tdee, meals_per_day, top_n=5):
    """
    Recommends meals based on user BMR and TDEE divided by meals per day,
    ensuring different items for each meal and more realistic macronutrient distribution.

    Parameters:
    - user_bmr: User's Basal Metabolic Rate (BMR).
    - user_tdee: User's Total Daily Energy Expenditure (TDEE).
    - meals_per_day: Number of meals the user wants to eat in a day.
    - top_n: Number of top recommendations per meal.

    Returns:
    - meals: List of DataFrames, each representing a meal.
    """
    # Features used for similarity calculation
    feature_columns = ['calories_per_gram', 'protein_ratio', 'carbs_ratio',
                       'sugar_ratio', 'total_fat_ratio']

    # Validate feature columns exist
    if not all(col in normalized_dataset.columns for col in feature_columns):
        raise ValueError("Some feature columns are missing from the normalized dataset.")

    # Extract the normalized feature data
    normalized_features = normalized_dataset[feature_columns].select_dtypes(include=[np.number])

    # Adjust user dietary profile for one meal
    user_bmr_per_meal = user_bmr / meals_per_day
    user_tdee_per_meal = user_tdee / meals_per_day

    # Realistic macronutrient profile
    # For simplicity, we assume a balanced distribution for protein, carbs, and fat
    # Adjust ratios as a percentage of TDEE
    protein_ratio = 0.30
    carbs_ratio = 0.50
    fat_ratio = 0.20
    sugar_ratio = 0.05   # Minimized sugar ratio (5% or less of daily calories)

    # Calculate grams of each macronutrient for the user per meal
    protein_per_meal = (user_tdee_per_meal * protein_ratio) / 4  # 4 calories per gram of protein
    carbs_per_meal = (user_tdee_per_meal * carbs_ratio) / 4     # 4 calories per gram of carbohydrate
    fat_per_meal = (user_tdee_per_meal * fat_ratio) / 9         # 9 calories per gram of fat
    sugar_per_meal = (user_tdee_per_meal * sugar_ratio) / 4     # Approximation (4 calories per gram of sugar)

    # Construct the user profile based on macronutrient needs
    user_profile = np.array([user_tdee_per_meal / 2000,  # Calorie needs normalized to 2000
                             protein_per_meal / 30,   # Protein per meal (normalized for typical meal)
                             carbs_per_meal / 50,     # Carbs per meal (normalized for typical meal)
                             sugar_per_meal / 10,     # Sugar per meal (normalized)
                             fat_per_meal / 12])      # Fat per meal (normalized)

    # Validate profile length matches feature columns
    if len(user_profile) != len(feature_columns):
        raise ValueError("User profile length does not match the number of feature columns.")

    # Compute cosine similarity between user profile and food items
    similarity_scores = cosine_similarity([user_profile], normalized_features).flatten()

    # Add similarity scores to a copy of the dataset for ranking
    ranked_dataset = normalized_dataset.copy()
    ranked_dataset['similarity'] = similarity_scores

    # List to hold meal recommendations
    meals = []

    # Keep track of already recommended items to avoid duplicates
    recommended_items_indices = set()

    for meal_number in range(1, meals_per_day + 1):
        # Filter out already recommended items
        available_items = ranked_dataset.loc[~ranked_dataset.index.isin(recommended_items_indices)]

        # Retrieve top N recommendations sorted by similarity for each meal
        recommendations = available_items.sort_values(by='similarity', ascending=False).head(top_n)

        # Mark these items as recommended to avoid duplicates in future meals
        recommended_items_indices.update(recommendations.index)

        # Return relevant columns from the feature_engineered dataset for context
        recommended_items = feature_engineered_dataset.loc[recommendations.index, [
            'name', 'calories', 'calories_per_gram', 'protein_ratio',
            'carbs_ratio', 'sugar_ratio', 'total_fat_ratio']]

        # Calculate serving size to meet per-meal calorie requirement
        recommended_items['serving_size'] = user_tdee_per_meal / recommended_items['calories']

        # Add similarity scores for reference
        recommended_items['similarity_score'] = recommendations['similarity'].values

        # Append the meal DataFrame to the list
        meals.append(recommended_items)

    return meals


In [54]:
# Check for NaN values in the normalized dataset
print("NaN values in normalized dataset:")
print(normalized_dataset.isnull().sum())

# Check for NaN values in the feature-engineered dataset
print("\nNaN values in feature-engineered dataset:")
print(feature_engineered_dataset.isnull().sum())


NaN values in normalized dataset:
name                   0
serving_size           0
calories               0
total_fat              0
saturated_fat          0
                      ..
protein_ratio          0
carbs_ratio            0
sugar_ratio            0
total_fat_ratio        0
saturated_fat_ratio    0
Length: 83, dtype: int64

NaN values in feature-engineered dataset:
name                   0
serving_size           0
calories               0
total_fat              0
saturated_fat          0
                      ..
protein_ratio          0
carbs_ratio            0
sugar_ratio            0
total_fat_ratio        0
saturated_fat_ratio    0
Length: 83, dtype: int64


In [55]:
def evaluate_recommendation_system(user_bmr, user_tdee, meals_per_day, top_n=5):
    """
    Evaluates the recommendation system by:
    - Checking if the system produces the desired number of meals.
    - Ensuring each meal meets the calorie requirements within a margin.
    - Providing statistics on the similarity scores.

    Parameters:
    - user_bmr: User's Basal Metabolic Rate (BMR).
    - user_tdee: User's Total Daily Energy Expenditure (TDEE).
    - meals_per_day: Number of meals per day.
    - top_n: Number of top recommendations per meal.

    Returns:
    - None (prints evaluation results).
    """
    print("Evaluating Recommendation System...")

    # Generate meal recommendations
    meal_recommendations = recommend_meals(user_bmr=user_bmr, user_tdee=user_tdee, meals_per_day=meals_per_day, top_n=top_n)

    # Initialize variables for evaluation
    per_meal_calorie_target = user_tdee / meals_per_day
    calorie_margin = 0.1 * per_meal_calorie_target  # 10% margin
    total_meals = len(meal_recommendations)
    similarity_scores = []
    serving_size_validity = True

    # Evaluate each meal
    for i, meal in enumerate(meal_recommendations, start=1):
        print(f"\nEvaluating Meal {i}...")

        # Check if meal has top_n recommendations
        if len(meal) != top_n:
            print(f"WARNING: Meal {i} does not have {top_n} recommendations.")

        # Check serving size calculations and calorie alignment
        for _, row in meal.iterrows():
            recommended_calories = row['calories'] * row['serving_size']
            similarity_scores.append(row['similarity_score'])

            if not (per_meal_calorie_target - calorie_margin <= recommended_calories <= per_meal_calorie_target + calorie_margin):
                serving_size_validity = False
                print(f"  Issue with {row['name']}: Expected calories ~{per_meal_calorie_target}, got {recommended_calories:.2f}.")

    # Summary of evaluation
    print("\n--- Evaluation Summary ---")
    print(f"Number of meals generated: {total_meals} (Expected: {meals_per_day})")
    print(f"Similarity Score Range: {min(similarity_scores):.4f} to {max(similarity_scores):.4f}")
    if serving_size_validity:
        print("All serving sizes calculated correctly and align with calorie goals.")
    else:
        print("Some serving sizes did not align with the expected calorie goals.")


In [56]:
# User inputs
try:
    example_bmr = bmr
    example_tdee = tdee
    meals_per_day = int(input("How many meals do you want to eat in a day? "))

    if meals_per_day <= 0:
        raise ValueError("Number of meals must be greater than zero.")
except ValueError as e:
    print(f"Invalid input: {e}")
    exit()

# Print BMR, TDEE, and meals per day values
print(f"\nUser BMR: {example_bmr} kcal")
print(f"User TDEE: {example_tdee} kcal")
print(f"Meals per day: {meals_per_day}")

# Get recommendations for all meals
meal_recommendations = recommend_meals(user_bmr=example_bmr, user_tdee=example_tdee, meals_per_day=meals_per_day)
# Call the evaluation function
evaluate_recommendation_system(user_bmr=example_bmr, user_tdee=example_tdee, meals_per_day=meals_per_day, top_n=5)
# Display recommendations for each meal
for i, meal in enumerate(meal_recommendations, start=1):
    print(f"\nMeal {i} Recommendations:")
    display(meal)

How many meals do you want to eat in a day? 3

User BMR: 1695.0 kcal
User TDEE: 2631.4875 kcal
Meals per day: 3
Evaluating Recommendation System...

Evaluating Meal 1...

Evaluating Meal 2...

Evaluating Meal 3...

--- Evaluation Summary ---
Number of meals generated: 3 (Expected: 3)
Similarity Score Range: 0.8402 to 0.8738
All serving sizes calculated correctly and align with calorie goals.

Meal 1 Recommendations:


Unnamed: 0,name,calories,calories_per_gram,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,serving_size,similarity_score
4754,"Soybeans, steamed, cooked, sprouted, mature seeds",81,0.81,0.0847,0.0653,0.079632,0.045,10.829167,0.873757
5851,"Soybeans, with salt, steamed, cooked, sprouted...",81,0.81,0.0847,0.0653,0.06585,0.045,10.829167,0.872717
6630,"MORNINGSTAR FARMS Tomato & Basil Pizza Burger,...",161,1.61,0.155,0.14,0.185714,0.086,5.448214,0.872171
5616,"MORNINGSTAR FARMS California Turk'y Burger, un...",155,1.55,0.149,0.126,0.142857,0.077,5.659113,0.863831
5743,"MORNINGSTAR FARMS Spicy Black Bean Burger, unp...",168,1.68,0.146,0.191,0.094241,0.061,5.221205,0.858128



Meal 2 Recommendations:


Unnamed: 0,name,calories,calories_per_gram,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,serving_size,similarity_score
7048,"Beverages,, 3-2-1 Plan, whey powder, high prot...",368,3.68,0.2787,0.5,0.4118,0.12,2.383594,0.856241
6544,MORNINGSTAR FARMS Chipotle Black Bean Crumbles...,122,1.22,0.144,0.102,0.137255,0.041,7.189857,0.854547
5227,"Drumstick leaves, without salt, drained, boile...",60,0.6,0.0527,0.1115,0.089686,0.009,14.619375,0.844504
4240,"Drumstick leaves, with salt, drained, boiled, ...",60,0.6,0.0527,0.1115,0.089686,0.009,14.619375,0.844504
2668,"Gravy, ready-to-serve, canned, beef",53,0.53,0.0375,0.0481,0.043659,0.024,16.550236,0.843372



Meal 3 Recommendations:


Unnamed: 0,name,calories,calories_per_gram,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,serving_size,similarity_score
6641,"MORNINGSTAR FARMS Lasagna with Veggie Sausage,...",96,0.96,0.071,0.144,0.118056,0.023,9.137109,0.842193
5333,"McDONALD'S, Premium Grilled Chicken Ranch BLT ...",204,2.04,0.167,0.2191,0.237335,0.054,4.299816,0.840869
5355,"MORNINGSTAR FARMS Breakfast Pattie, unprepared...",195,1.95,0.237,0.126,0.174603,0.083,4.498269,0.840783
7268,"Fast Foods, tomato and spread, with lettuce, g...",182,1.82,0.1734,0.1678,0.183552,0.046,4.819574,0.840521
2354,"WORTHINGTON Chili, unprepared, canned",126,1.26,0.104,0.109,0.119266,0.045,6.961607,0.840242
