In [1]:
import pandas as pd
import numpy as np
import yaml

try:
    with open("../config.yaml", "r") as file:
        config = yaml.safe_load(file)
except:
    print("Yaml configuration file not found!")

df = pd.read_csv(config['output_data']['file'])

In [2]:
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import pandas as pd

# --- Step 1: Encode categorical and numeric data ---
ohe = OneHotEncoder(sparse_output=False, drop="first", handle_unknown="ignore")

categorical_columns = df.select_dtypes(include="object").columns.tolist()
numerical_columns = df.select_dtypes(include="number").columns.tolist()

# Fit encoder
ohe.fit(df[categorical_columns])

categorical_trans_np = ohe.transform(df[categorical_columns])
categorical_trans_df = pd.DataFrame(
    categorical_trans_np,
    columns=ohe.get_feature_names_out(),
    index=df.index
)

df_numerical = df[numerical_columns]
df_trans = pd.concat([categorical_trans_df, df_numerical], axis=1)

# Fit scaler
scaler = StandardScaler()
scaler.fit(df_trans)

df_trans_final = pd.DataFrame(
    scaler.transform(df_trans),
    columns=df_trans.columns,
    index=df_trans.index
)

# Fit nearest neighbors
nn = NearestNeighbors(n_neighbors=1)
nn.fit(df_trans_final)

# --- Step 2: Collect user inputs ---
print("Please enter your details below:\n")

age = float(input("Age (years): "))
gender = input("Gender (Male/Female): ").strip().title()
weight_kg = float(input("Weight (kg): "))
height_m = float(input("Height (m): "))
max_bpm = float(input("Max BPM: "))
avg_bpm = float(input("Average BPM: "))
resting_bpm = float(input("Resting BPM: "))
session_duration_hours = float(input("Workout duration (hours): "))
calories_burned = float(input("Calories burned: "))
water_intake_liters = float(input("Water intake (liters): "))
carbs = float(input("Carbohydrates (g): "))
proteins = float(input("Proteins (g): "))
fats = float(input("Fats (g): "))
calories = float(input("Total calories consumed: "))
workout_type = input("Workout type (Cardio/Strength/Yoga/Hiit): ").strip().title()
meal_type = input("Meal type (Breakfast/Lunch/Dinner/Snack): ").strip().title()
diet_type = input("Diet type (Balanced/Keto/Low-Carb/Paleo): ").strip().title()


# --- Step 3: Derived metrics ---
gender_factor = 1 if gender == "Male" else 0
bmi = weight_kg / (height_m ** 2)
fat_percentage = (1.20 * bmi) + (0.23 * age) - (10.8 * gender_factor) - 5.4
lean_mass_kg = weight_kg * (1 - fat_percentage / 100)
total_calories = (carbs * 4) + (proteins * 4) + (fats * 9)
pct_carbs = (carbs * 4 / total_calories) * 100 if total_calories > 0 else 0
protein_per_kg = proteins / weight_kg if weight_kg > 0 else 0
cal_balance = calories - calories_burned

# --- Step 4: Build full user record ---
values = {
    "age": age,
    "gender": gender,
    "weight_kg": weight_kg,
    "height_m": height_m,
    "max_bpm": max_bpm,
    "avg_bpm": avg_bpm,
    "resting_bpm": resting_bpm,
    "session_duration_hours": session_duration_hours,
    "calories_burned": calories_burned,
    "workout_type": workout_type,
    "water_intake_liters": water_intake_liters,
    "carbs": carbs,
    "proteins": proteins,
    "fats": fats,
    "calories": calories,
    "meal_type": meal_type,
    "diet_type": diet_type,
    "bmi": bmi,
    "fat_percentage": fat_percentage,
    "lean_mass_kg": lean_mass_kg,
    "pct_carbs": pct_carbs,
    "protein_per_kg": protein_per_kg,
    "cal_balance": cal_balance
}

new_input_df = pd.DataFrame([values])

# --- Step 5: Apply same transformations ---
cat_input = pd.DataFrame(
    ohe.transform(new_input_df[categorical_columns]),
    columns=ohe.get_feature_names_out(),
    index=new_input_df.index
)
num_input = new_input_df[numerical_columns]
input_trans = pd.concat([cat_input, num_input], axis=1)
input_scaled = pd.DataFrame(
    scaler.transform(input_trans),
    columns=df_trans.columns,
    index=new_input_df.index
)

distance, index = nn.kneighbors(input_scaled)
nearest_row = df.iloc[index[0][0]]

# --- Step 7: Display results ---
print("\n Closest Match Found!")
print(f"Similarity Distance: {distance[0][0]:.4f}")
print("\nMost Similar User Profile:")
print(nearest_row)


Please enter your details below:



Age (years):  34
Gender (Male/Female):  Male
Weight (kg):  65
Height (m):  1.62
Max BPM:  188
Average BPM:  157
Resting BPM:  69
Workout duration (hours):  1
Calories burned:  1080
Water intake (liters):  1
Carbohydrates (g):  267
Proteins (g):  106
Fats (g):  71
Total calories consumed:  1806
Workout type (Cardio/Strength/Yoga/Hiit):  Strength
Meal type (Breakfast/Lunch/Dinner/Snack):  Lunch
Diet type (Balanced/Keto/Low-Carb/Paleo):  Vegan



 Closest Match Found!
Similarity Distance: 34085.2072

Most Similar User Profile:
age                              55
gender                         Male
weight_kg                        58
height_m                       1.86
max_bpm                       187.0
avg_bpm                      158.74
resting_bpm                   64.96
session_duration_hours         1.09
calories_burned              1440.0
workout_type                   Hiit
fat_percentage            16.170085
water_intake_liters               3
bmi                           16.89
carbs                        171.82
proteins                      67.01
fats                          44.21
calories                     1644.0
meal_type                 Breakfast
diet_type                  Low-Carb
pct_carbs                  0.507889
protein_per_kg             1.146842
cal_balance                   204.0
lean_mass_kg              48.981819
Name: 19896, dtype: object


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 23 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   age                     20000 non-null  int64  
 1   gender                  20000 non-null  object 
 2   weight_kg               20000 non-null  int64  
 3   height_m                20000 non-null  float64
 4   max_bpm                 20000 non-null  float64
 5   avg_bpm                 20000 non-null  float64
 6   resting_bpm             20000 non-null  float64
 7   session_duration_hours  20000 non-null  float64
 8   calories_burned         20000 non-null  float64
 9   workout_type            20000 non-null  object 
 10  fat_percentage          20000 non-null  float64
 11  water_intake_liters     20000 non-null  int64  
 12  bmi                     20000 non-null  float64
 13  carbs                   20000 non-null  float64
 14  proteins                20000 non-null