In [None]:
from tensorflow.keras import backend as K
K.clear_session()

# ***`Data Preprocessing`***

In [None]:
# Install the 'rarfile' library to handle RAR file extraction in the environment.
!pip install rarfile

Collecting rarfile
  Downloading rarfile-4.2-py3-none-any.whl.metadata (4.4 kB)
Downloading rarfile-4.2-py3-none-any.whl (29 kB)
Installing collected packages: rarfile
Successfully installed rarfile-4.2


In [None]:
# Import necessary libraries for handling files, downloading, and data manipulation
import os
import pandas as pd
import gdown
import rarfile
import shutil

# Download the RAR file from Google Drive
url = 'https://drive.google.com/uc?id=1a6mvg1ewcz8rNxySb5niXxfDgyOaACGx'
output_rar = '/content/fitnesstan-Dataset.rar'
gdown.download(url, output_rar, quiet=False)

# Extract the RAR file to access the dataset
with rarfile.RarFile(output_rar, 'r') as rar_ref:
    rar_ref.extractall('/content')

Downloading...
From: https://drive.google.com/uc?id=1a6mvg1ewcz8rNxySb5niXxfDgyOaACGx
To: /content/fitnesstan-Dataset.rar
100%|██████████| 1.01M/1.01M [00:00<00:00, 66.9MB/s]


### ***Cleaning***

In [None]:
import pandas as pd

# Load the dataset to examine its structure
file_path = '/content/fitnesstan-Dataset/nutrition.csv'
nutrition_data = pd.read_csv(file_path)

# Display the first few rows of the dataset
nutrition_data.head(), nutrition_data.info(), nutrition_data.describe()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8789 entries, 0 to 8788
Data columns (total 77 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   Unnamed: 0                   8789 non-null   int64 
 1   name                         8789 non-null   object
 2   serving_size                 8789 non-null   object
 3   calories                     8789 non-null   int64 
 4   total_fat                    8789 non-null   object
 5   saturated_fat                7199 non-null   object
 6   cholesterol                  8789 non-null   object
 7   sodium                       8789 non-null   object
 8   choline                      8789 non-null   object
 9   folate                       8789 non-null   object
 10  folic_acid                   8789 non-null   object
 11  niacin                       8789 non-null   object
 12  pantothenic_acid             8789 non-null   object
 13  riboflavin                   8789

(   Unnamed: 0             name serving_size  calories total_fat saturated_fat  \
 0           0       Cornstarch        100 g       381      0.1g           NaN   
 1           1     Nuts, pecans        100 g       691       72g          6.2g   
 2           2    Eggplant, raw        100 g        25      0.2g           NaN   
 3           3   Teff, uncooked        100 g       367      2.4g          0.4g   
 4           4  Sherbet, orange        100 g       144        2g          1.2g   
 
   cholesterol    sodium  choline     folate  ...      fat  \
 0           0   9.00 mg   0.4 mg   0.00 mcg  ...   0.05 g   
 1           0   0.00 mg  40.5 mg  22.00 mcg  ...  71.97 g   
 2           0   2.00 mg   6.9 mg  22.00 mcg  ...   0.18 g   
 3           0  12.00 mg  13.1 mg          0  ...   2.38 g   
 4         1mg  46.00 mg   7.7 mg   4.00 mcg  ...   2.00 g   
 
   saturated_fatty_acids monounsaturated_fatty_acids  \
 0               0.009 g                     0.016 g   
 1               6.1

In [None]:
import pandas as pd
import numpy as np

In [None]:
file_path = '/content/fitnesstan-Dataset/nutrition.csv'
nutrition_data = pd.read_csv(file_path)

In [None]:
# Drop the 'Unnamed: 0' column (irrelevant index column)
nutrition_data = nutrition_data.drop(columns=['Unnamed: 0'])

# Identify missing values
missing_values = nutrition_data.isnull().sum()

# Drop columns with more than 50% missing values
nutrition_data = nutrition_data.loc[:, missing_values <= (0.5 * len(nutrition_data))]

# Fill missing values in numerical columns with mean
for column in nutrition_data.select_dtypes(include=['float64', 'int64']).columns:
    nutrition_data[column] = nutrition_data[column].fillna(nutrition_data[column].mean())

# For categorical columns, fill missing with mode
for column in nutrition_data.select_dtypes(include=['object']).columns:
    nutrition_data[column] = nutrition_data[column].fillna(nutrition_data[column].mode()[0])


In [None]:
nutrition_data = nutrition_data.drop_duplicates()

In [None]:
nutrition_data.columns = nutrition_data.columns.str.lower().str.replace(' ', '_').str.replace('[^a-z0-9_]', '')

In [None]:
cleaned_file_path = '/content/nutrition_cleaned.csv'
nutrition_data.to_csv(cleaned_file_path, index=False)
print(f"Cleaned dataset saved at {cleaned_file_path}")


Cleaned dataset saved at /content/nutrition_cleaned.csv


### ***Feature Engineering***

In [None]:
# Import necessary libraries
import pandas as pd

# Load the cleaned dataset
cleaned_file_path = '/content/nutrition_cleaned.csv'
cleaned_data = pd.read_csv(cleaned_file_path)

# Display the first few rows to verify the dataset
cleaned_data.head()


Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,0.1g,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,0.1g,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [None]:
# Step 1: Extract numeric weight (grams) from serving_size
if 'serving_size' in cleaned_data.columns:
    cleaned_data['serving_size_grams'] = (
        cleaned_data['serving_size']
        .str.extract(r'(\d+\.?\d*)')  # Extract numeric values
        .astype(float)
    )
    # Fill missing serving sizes with the median
    cleaned_data['serving_size_grams'] = cleaned_data['serving_size_grams'].fillna(
        cleaned_data['serving_size_grams'].median()
    )
else:
    print("Column 'serving_size' is not present in the dataset.")

# Verify the updated column
cleaned_data[['serving_size', 'serving_size_grams']].head()


Unnamed: 0,serving_size,serving_size_grams
0,100 g,100.0
1,100 g,100.0
2,100 g,100.0
3,100 g,100.0
4,100 g,100.0


In [None]:
# Step 2: Create Calories per Gram
if 'calories' in cleaned_data.columns:
    cleaned_data['calories_per_gram'] = (
        cleaned_data['calories'] / cleaned_data['serving_size_grams']
    )
else:
    print("Column 'calories' is not present in the dataset.")

# Verify the new feature
cleaned_data[['calories', 'serving_size_grams', 'calories_per_gram']].head()


Unnamed: 0,calories,serving_size_grams,calories_per_gram
0,381,100.0,3.81
1,691,100.0,6.91
2,25,100.0,0.25
3,367,100.0,3.67
4,144,100.0,1.44


In [None]:
# Step 3: Convert non-numeric columns to numeric for ratio calculations
for column in cleaned_data.select_dtypes(include=['object']).columns:
    # Skip the 'name' column
    if column == 'name':
        continue
    # Replace non-numeric characters and convert to float, handling errors
    cleaned_data[column] = pd.to_numeric(cleaned_data[column].replace(r'[^\d.]', '', regex=True), errors='coerce')

# Verify the data types and updated columns
cleaned_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8789 entries, 0 to 8788
Data columns (total 78 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   name                         8789 non-null   object 
 1   serving_size                 8789 non-null   int64  
 2   calories                     8789 non-null   int64  
 3   total_fat                    8789 non-null   float64
 4   saturated_fat                8789 non-null   float64
 5   cholesterol                  8789 non-null   int64  
 6   sodium                       8789 non-null   float64
 7   choline                      8789 non-null   float64
 8   folate                       8789 non-null   float64
 9   folic_acid                   8789 non-null   float64
 10  niacin                       8789 non-null   float64
 11  pantothenic_acid             8789 non-null   float64
 12  riboflavin                   8789 non-null   float64
 13  thiamin           

In [None]:
# Step 4: Calculate Macronutrient Ratios
if 'serving_size_grams' in cleaned_data.columns:
    cleaned_data['protein_ratio'] = (
        cleaned_data['protein'] / cleaned_data['serving_size_grams']
    )
    cleaned_data['carbs_ratio'] = (
        cleaned_data['carbohydrate'] / cleaned_data['serving_size_grams']
    )
    cleaned_data['sugar_ratio'] = cleaned_data['sugars'] / cleaned_data['carbohydrate']
    # Handle division by zero for sugar_ratio
    cleaned_data['sugar_ratio'] = cleaned_data['sugar_ratio'].replace(
        [float('inf'), -float('inf')], 0
    ).fillna(0)
    cleaned_data['total_fat_ratio'] = (
        cleaned_data['total_fat'] / cleaned_data['serving_size_grams']
    )

# Verify the calculated ratios
cleaned_data[
    ['protein_ratio', 'carbs_ratio', 'sugar_ratio', 'total_fat_ratio']
].head()


Unnamed: 0,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio
0,0.0026,0.9127,0.0,0.001
1,0.0917,0.1386,0.286436,0.72
2,0.0098,0.0588,0.60034,0.002
3,0.133,0.7313,0.025161,0.024
4,0.011,0.304,0.8,0.02


In [None]:
# Step 5: Additional Features (e.g., Saturated Fat Ratio)
if 'saturated_fat' in cleaned_data.columns:
    # Check if the column is numeric
    if pd.api.types.is_numeric_dtype(cleaned_data['saturated_fat']):
        # If numeric, directly use the values
        cleaned_data['saturated_fat_ratio'] = (
            cleaned_data['saturated_fat'] / cleaned_data['serving_size_grams']
        )
    else:
        # If not numeric, convert to numeric and then calculate the ratio
        cleaned_data['saturated_fat_ratio'] = (
            cleaned_data['saturated_fat']
            .str.replace(r'[^\d.]', '', regex=True)
            .astype(float)
            / cleaned_data['serving_size_grams']
        )
else:
    print("Column 'saturated_fat' is not present in the dataset.")

# Verify the saturated fat ratio if it exists
if 'saturated_fat_ratio' in cleaned_data.columns:
    cleaned_data[['saturated_fat_ratio']].head()

In [None]:
# Step 6: Save the feature-engineered dataset
feature_engineered_file_path = '/content/nutrition_feature_engineered.csv'
cleaned_data.to_csv(feature_engineered_file_path, index=False)

# Output the file path for reference
print(f"Feature-engineered dataset saved at: {feature_engineered_file_path}")


Feature-engineered dataset saved at: /content/nutrition_feature_engineered.csv


### ***Normalization***

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load the feature-engineered dataset
feature_engineered_file_path = '/content/nutrition_feature_engineered.csv'
feature_engineered_data = pd.read_csv(feature_engineered_file_path)

# Display the first few rows to verify the dataset
feature_engineered_data.head()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,caffeine,theobromine,water,serving_size_grams,calories_per_gram,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,saturated_fat_ratio
0,Cornstarch,100,381,0.1,0.1,0,9.0,0.4,0.0,0.0,...,0.0,0.0,8.32,100.0,3.81,0.0026,0.9127,0.0,0.001,0.001
1,"Nuts, pecans",100,691,72.0,6.2,0,0.0,40.5,22.0,0.0,...,0.0,0.0,3.52,100.0,6.91,0.0917,0.1386,0.286436,0.72,0.062
2,"Eggplant, raw",100,25,0.2,0.1,0,2.0,6.9,22.0,0.0,...,0.0,0.0,92.3,100.0,0.25,0.0098,0.0588,0.60034,0.002,0.001
3,"Teff, uncooked",100,367,2.4,0.4,0,12.0,13.1,0.0,0.0,...,0.0,0.0,8.82,100.0,3.67,0.133,0.7313,0.025161,0.024,0.004
4,"Sherbet, orange",100,144,2.0,1.2,1,46.0,7.7,4.0,0.0,...,0.0,0.0,66.1,100.0,1.44,0.011,0.304,0.8,0.02,0.012


In [None]:
# Step 1: Select features for normalization
# Include only the macronutrient ratios and derived metrics
features_to_normalize = [
    'protein_ratio',
    'carbs_ratio',
    'sugar_ratio',
    'total_fat_ratio',
    'calories_per_gram'
]

# Check which of these features are present in the dataset
features_to_normalize = [
    feature for feature in features_to_normalize if feature in feature_engineered_data.columns
]

# Display the features selected for normalization
print("Features to be normalized:", features_to_normalize)


Features to be normalized: ['protein_ratio', 'carbs_ratio', 'sugar_ratio', 'total_fat_ratio', 'calories_per_gram']


In [None]:
# Step 2: Apply MinMaxScaler to the selected features
scaler = MinMaxScaler()

# Create a copy of the dataset to retain the original
normalized_data = feature_engineered_data.copy()

# Normalize only the selected features
normalized_data[features_to_normalize] = scaler.fit_transform(normalized_data[features_to_normalize])

# Verify the normalized features
normalized_data[features_to_normalize].head()


Unnamed: 0,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,calories_per_gram
0,0.002944,0.9127,0.0,0.001,0.422395
1,0.103827,0.1386,0.204347,0.72,0.766075
2,0.011096,0.0588,0.428291,0.002,0.027716
3,0.150589,0.7313,0.01795,0.024,0.406874
4,0.012455,0.304,0.570732,0.02,0.159645


In [None]:
# Step 3: Retain unnormalized columns (e.g., categorical identifiers or raw features)
# No explicit action needed as non-normalized columns are already intact in `normalized_data`

# Display a preview of the dataset to confirm normalization
normalized_data.head()


Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,caffeine,theobromine,water,serving_size_grams,calories_per_gram,protein_ratio,carbs_ratio,sugar_ratio,total_fat_ratio,saturated_fat_ratio
0,Cornstarch,100,381,0.1,0.1,0,9.0,0.4,0.0,0.0,...,0.0,0.0,8.32,100.0,0.422395,0.002944,0.9127,0.0,0.001,0.001
1,"Nuts, pecans",100,691,72.0,6.2,0,0.0,40.5,22.0,0.0,...,0.0,0.0,3.52,100.0,0.766075,0.103827,0.1386,0.204347,0.72,0.062
2,"Eggplant, raw",100,25,0.2,0.1,0,2.0,6.9,22.0,0.0,...,0.0,0.0,92.3,100.0,0.027716,0.011096,0.0588,0.428291,0.002,0.001
3,"Teff, uncooked",100,367,2.4,0.4,0,12.0,13.1,0.0,0.0,...,0.0,0.0,8.82,100.0,0.406874,0.150589,0.7313,0.01795,0.024,0.004
4,"Sherbet, orange",100,144,2.0,1.2,1,46.0,7.7,4.0,0.0,...,0.0,0.0,66.1,100.0,0.159645,0.012455,0.304,0.570732,0.02,0.012


In [None]:
# Step 4: Save the normalized dataset
normalized_file_path = '/content/nutrition_normalized.csv'
normalized_data.to_csv(normalized_file_path, index=False)

# Output the file path for reference
print(f"Normalized dataset saved at: {normalized_file_path}")


Normalized dataset saved at: /content/nutrition_normalized.csv


# ***`BMR/TDEE`***

In [31]:
# Step 1: Collect User Information
print("Welcome to the Daily Caloric Requirement Calculator!")
print("Please answer the following questions to help determine your caloric needs.")

# Collecting personal details
gender = input("Please enter your gender (Male/Female): ").strip().lower()
weight = float(input("Enter your weight in kg: "))
height = float(input("Enter your height in cm: "))
age = int(input("Enter your age in years: "))
activity_level = input("Enter your activity level (choose from 'sedentary', 'light', 'moderate', 'very', 'super'): ").strip().lower()

# Step 2: Calculate BMR based on gender
if gender == "male":
    bmr = 10 * weight + 6.25 * height - 5 * age + 5
elif gender == "female":
    bmr = 10 * weight + 6.25 * height - 5 * age - 161
else:
    raise ValueError("Invalid gender entered. Please enter 'Male' or 'Female'.")

print(f"\nYour Basal Metabolic Rate (BMR) is: {bmr:.2f} calories/day.")

# Step 3: Calculate BMI
height_m = height / 100  # Convert height to meters
bmi = weight / (height_m ** 2)
print(f"Your Body Mass Index (BMI) is: {bmi:.2f}")

# Step 4: Assess BMI and target a healthy range
bmi_category = ""
if bmi < 18.5:
    bmi_category = "underweight"
    print("BMI Category: Underweight")
elif 18.5 <= bmi < 24.9:
    bmi_category = "normal weight"
    print("BMI Category: Normal Weight")

    # Adjust target BMI to fall between 21 and 24
    if bmi < 21:
        target_bmi = 22.5
        target_weight = target_bmi * (height_m ** 2)
        print(f"You are within the normal BMI range, but your BMI ({bmi:.2f}) is on the lower side.")
        print(f"We recommend aiming for a BMI of {target_bmi:.1f}, which corresponds to a weight of {target_weight:.2f} kg.")
    elif bmi > 24:
        target_bmi = 22.5
        target_weight = target_bmi * (height_m ** 2)
        print(f"You are within the normal BMI range, but your BMI ({bmi:.2f}) is on the higher side.")
        print(f"We recommend aiming for a BMI of {target_bmi:.1f}, which corresponds to a weight of {target_weight:.2f} kg.")
    else:
        print("Your BMI is within the optimal range (21-24). No changes are recommended.")

elif 25 <= bmi < 29.9:
    bmi_category = "overweight"
    print("BMI Category: Overweight")
else:
    bmi_category = "obese"
    print("BMI Category: Obese")

# Step 5: Calculate TDEE based on activity level
activity_multipliers = {
    "sedentary": 1.2,
    "light": 1.375,
    "moderate": 1.55,
    "very": 1.725,
    "super": 1.9
}

if activity_level in activity_multipliers:
    tdee = bmr * activity_multipliers[activity_level]

    # Adjust TDEE based on BMI category
    if bmi_category == "underweight" or (bmi < 21 and bmi_category == "normal weight"):
        tdee *= 1.1  # Increase TDEE by 10% for healthy weight gain
        print("Adjusting TDEE for healthy weight gain...")
    elif bmi_category == "overweight" or bmi_category == "obese" or (bmi > 24 and bmi_category == "normal weight"):
        tdee *= 0.9  # Reduce TDEE by 10% for healthy weight loss
        print("Adjusting TDEE for healthy weight loss...")
    else:
        print("No adjustment needed for TDEE.")

    print(f"Your Total Daily Energy Expenditure (TDEE) after adjustment is: {tdee:.2f} calories/day.")
else:
    raise ValueError("Invalid activity level entered. Please enter 'sedentary', 'light', 'moderate', 'very', or 'super'.")

# Step 6: Display caloric recommendations
print("\nBased on your TDEE, you can adjust your diet for specific goals:")
print(" - To lose weight, aim for a calorie intake slightly below your TDEE.")
print(" - To maintain weight, aim to match your TDEE.")
print(" - To gain weight, increase your calorie intake above your TDEE.")


Welcome to the Daily Caloric Requirement Calculator!
Please answer the following questions to help determine your caloric needs.
Please enter your gender (Male/Female): male
Enter your weight in kg: 72
Enter your height in cm: 172
Enter your age in years: 21
Enter your activity level (choose from 'sedentary', 'light', 'moderate', 'very', 'super'): very

Your Basal Metabolic Rate (BMR) is: 1695.00 calories/day.
Your Body Mass Index (BMI) is: 24.34
BMI Category: Normal Weight
You are within the normal BMI range, but your BMI (24.34) is on the higher side.
We recommend aiming for a BMI of 22.5, which corresponds to a weight of 66.56 kg.
Adjusting TDEE for healthy weight loss...
Your Total Daily Energy Expenditure (TDEE) after adjustment is: 2631.49 calories/day.

Based on your TDEE, you can adjust your diet for specific goals:
 - To lose weight, aim for a calorie intake slightly below your TDEE.
 - To maintain weight, aim to match your TDEE.
 - To gain weight, increase your calorie intake

# ***`KNNs Implementation`***

In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsRegressor

# Step 1: Load and preprocess dataset
def load_and_preprocess_data(file_path):
    # Load dataset
    data = pd.read_csv(file_path)

    # Select relevant features
    features = ["calories_per_gram", "protein_ratio", "carbs_ratio", "total_fat_ratio"]
    target = "serving_size_grams"

    # Normalize features
    scaler = MinMaxScaler()
    X = data[features]
    y = data[target]
    X_scaled = scaler.fit_transform(X)

    return data, X_scaled, y, scaler, features, target

# Step 2: Train KNN Regressor
def train_knn_model(X_scaled, y):
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
    knn = KNeighborsRegressor(n_neighbors=5)
    knn.fit(X_train, y_train)
    return knn

# Step 3: Predict and Adjust Serving Sizes
def predict_diet(knn_model, scaler, food_data, selected_items, tdee_meal, features):
    # Filter data for selected items
    selected_data = food_data[food_data["name"].isin(selected_items)]

    # Extract and scale input features
    X_selected = selected_data[features]
    X_scaled_selected = scaler.transform(X_selected)

    # Predict scaled serving sizes
    predicted_serving_sizes = knn_model.predict(X_scaled_selected)

    # Scale calorie and macronutrient contributions
    selected_data["predicted_serving_size"] = predicted_serving_sizes
    selected_data["calorie_contribution"] = (
        selected_data["calories"] * predicted_serving_sizes / 100
    )
    selected_data["protein_contribution"] = (
        selected_data["protein_ratio"] * predicted_serving_sizes
    )
    selected_data["carbs_contribution"] = (
        selected_data["carbs_ratio"] * predicted_serving_sizes
    )
    selected_data["fat_contribution"] = (
        selected_data["total_fat_ratio"] * predicted_serving_sizes
    )

    # Scale serving sizes to meet TDEE target
    total_calories = selected_data["calorie_contribution"].sum()
    scaling_factor = tdee_meal / total_calories
    selected_data["scaled_serving_size"] = selected_data["predicted_serving_size"] * scaling_factor

    # Recalculate contributions based on scaled serving sizes
    selected_data["scaled_calorie_contribution"] = (
        selected_data["calories"] * selected_data["scaled_serving_size"] / 100
    )
    selected_data["scaled_protein_contribution"] = (
        selected_data["protein_ratio"] * selected_data["scaled_serving_size"]
    )
    selected_data["scaled_carbs_contribution"] = (
        selected_data["carbs_ratio"] * selected_data["scaled_serving_size"]
    )
    selected_data["scaled_fat_contribution"] = (
        selected_data["total_fat_ratio"] * selected_data["scaled_serving_size"]
    )

    return selected_data[[
        "name", "scaled_serving_size", "scaled_calorie_contribution",
        "scaled_protein_contribution", "scaled_carbs_contribution", "scaled_fat_contribution"
    ]]




In [33]:
def diet_recommendation_interactive_pipeline(file_path, tdee):
    # Step 1: Load and preprocess data
    data, X_scaled, y, scaler, features, target = load_and_preprocess_data(file_path)

    # Step 2: Train the KNN model
    knn_model = train_knn_model(X_scaled, y)

    # Step 3: User interaction for meals and items
    num_meals = int(input("Enter the number of meals per day: "))
    meal_calories = tdee / num_meals  # Divide TDEE into equal meals

    print(f"Your TDEE is {tdee} calories. Each meal should have approximately {meal_calories:.2f} calories.")
    print("Available items are indexed from 1 to", len(data), "in the dataset.")
    print(data[["name"]].reset_index().rename(columns={"index": "ID"}).head())  # Display sample items

    all_meals_results = []

    for meal_num in range(1, num_meals + 1):
        print(f"\nMeal {meal_num}:")
        selected_indices = input("Enter the indices of selected food items for this meal (comma-separated): ")
        selected_indices = [int(idx.strip()) - 1 for idx in selected_indices.split(",")]

        # Get selected items
        selected_items = data.iloc[selected_indices]["name"].tolist()

        # Predict serving sizes and breakdowns
        meal_results = predict_diet(knn_model, scaler, data, selected_items, meal_calories, features)
        all_meals_results.append((meal_num, meal_results))

        print(f"\nMeal {meal_num} Results:")
        print(meal_results)

    # Combine results for all meals
    return all_meals_results

# Example usage
file_path = '/content/nutrition_normalized.csv'
tdee = tdee  # Total TDEE in calories

# Run the interactive pipeline
all_meals_results = diet_recommendation_interactive_pipeline(file_path, tdee)


Enter the number of meals per day: 4
Your TDEE is 2631.4875 calories. Each meal should have approximately 657.87 calories.
Available items are indexed from 1 to 8789 in the dataset.
   ID             name
0   0       Cornstarch
1   1     Nuts, pecans
2   2    Eggplant, raw
3   3   Teff, uncooked
4   4  Sherbet, orange

Meal 1:
Enter the indices of selected food items for this meal (comma-separated): 2,4,5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data["predicted_serving_size"] = predicted_serving_sizes
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data["calorie_contribution"] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data["protein_contribution"] = (
A value is trying to be set on a copy of a slice from a


Meal 1 Results:
              name  scaled_serving_size  scaled_calorie_contribution  \
1     Nuts, pecans            54.731437                   378.194231   
3   Teff, uncooked            54.731437                   200.864374   
4  Sherbet, orange            54.731437                    78.813270   

   scaled_protein_contribution  scaled_carbs_contribution  \
1                     5.682601                   7.585777   
3                     8.241940                  40.025100   
4                     0.681664                  16.638357   

   scaled_fat_contribution  
1                39.406635  
3                 1.313554  
4                 1.094629  

Meal 2:
Enter the indices of selected food items for this meal (comma-separated): 55,666,7777


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data["predicted_serving_size"] = predicted_serving_sizes
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data["calorie_contribution"] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data["protein_contribution"] = (
A value is trying to be set on a copy of a slice from a


Meal 2 Results:
                                                   name  scaled_serving_size  \
54                                Tomatoes, raw, orange           353.694556   
665                                            Yam, raw           353.694556   
7776  Cereals, without salt, microwaved, cooked with...           353.694556   

      scaled_calorie_contribution  scaled_protein_contribution  \
54                      56.591129                     4.645445   
665                    417.359577                     6.127182   
7776                   183.921169                     7.528824   

      scaled_carbs_contribution  scaled_fat_contribution  
54                    11.247487                 0.707389  
665                   98.610042                 0.707389  
7776                  35.723150                 1.414778  

Meal 3:
Enter the indices of selected food items for this meal (comma-separated): 3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data["predicted_serving_size"] = predicted_serving_sizes
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data["calorie_contribution"] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data["protein_contribution"] = (
A value is trying to be set on a copy of a slice from a


Meal 3 Results:
            name  scaled_serving_size  scaled_calorie_contribution  \
2  Eggplant, raw            2631.4875                   657.871875   

   scaled_protein_contribution  scaled_carbs_contribution  \
2                    29.199023                 154.731465   

   scaled_fat_contribution  
2                 5.262975  

Meal 4:
Enter the indices of selected food items for this meal (comma-separated): 888

Meal 4 Results:
                             name  scaled_serving_size  \
887  Snack, original bar, BALANCE           158.523343   

     scaled_calorie_contribution  scaled_protein_contribution  \
887                   657.871875                    50.256495   

     scaled_carbs_contribution  scaled_fat_contribution  
887                  77.248425                19.022801  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data["predicted_serving_size"] = predicted_serving_sizes
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data["calorie_contribution"] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data["protein_contribution"] = (
A value is trying to be set on a copy of a slice from a