## Diet Recommendation System

In [35]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.cluster import KMeans
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [36]:
df = pd.read_csv('static/Food__Nutrition__dataset/food.csv')

In [37]:
df.head(5)

Unnamed: 0,Category,Description,Nutrient Data Bank Number,Data.Alpha Carotene,Data.Ash,Data.Beta Carotene,Data.Beta Cryptoxanthin,Data.Carbohydrate,Data.Cholesterol,Data.Choline,...,Data.Major Minerals.Potassium,Data.Major Minerals.Sodium,Data.Major Minerals.Zinc,Data.Vitamins.Vitamin A - IU,Data.Vitamins.Vitamin A - RAE,Data.Vitamins.Vitamin B12,Data.Vitamins.Vitamin B6,Data.Vitamins.Vitamin C,Data.Vitamins.Vitamin E,Data.Vitamins.Vitamin K
0,BUTTER,"BUTTER,WITH SALT",1001,0,2.11,158,0,0.06,215,19,...,24,576,0.09,2499,684,0.17,0.003,0.0,2.32,7.0
1,BUTTER,"BUTTER,WHIPPED,WITH SALT",1002,0,2.11,158,0,0.06,219,19,...,26,827,0.05,2499,684,0.13,0.003,0.0,2.32,7.0
2,BUTTER OIL,"BUTTER OIL,ANHYDROUS",1003,0,0.0,193,0,0.0,256,22,...,5,2,0.01,3069,840,0.01,0.001,0.0,2.8,8.6
3,CHEESE,"CHEESE,BLUE",1004,0,5.11,74,0,2.34,75,15,...,256,1395,2.66,763,198,1.22,0.166,0.0,0.25,2.4
4,CHEESE,"CHEESE,BRICK",1005,0,3.18,76,0,2.79,94,15,...,136,560,2.6,1080,292,1.26,0.065,0.0,0.26,2.5


In [38]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7413 entries, 0 to 7412
Data columns (total 48 columns):
 #   Column                                                   Non-Null Count  Dtype  
---  ------                                                   --------------  -----  
 0   Category                                                 7413 non-null   object 
 1   Description                                              7413 non-null   object 
 2   Nutrient Data Bank Number                                7413 non-null   int64  
 3   Data.Alpha Carotene                                      7413 non-null   int64  
 4   Data.Ash                                                 7413 non-null   float64
 5   Data.Beta Carotene                                       7413 non-null   int64  
 6   Data.Beta Cryptoxanthin                                  7413 non-null   int64  
 7   Data.Carbohydrate                                        7413 non-null   float64
 8   Data.Cholesterol            

In [39]:
df.shape

(7413, 48)

In [40]:
df.columns

Index(['Category', 'Description', 'Nutrient Data Bank Number',
       'Data.Alpha Carotene', 'Data.Ash', 'Data.Beta Carotene',
       'Data.Beta Cryptoxanthin', 'Data.Carbohydrate', 'Data.Cholesterol',
       'Data.Choline', 'Data.Fiber', 'Data.Kilocalories',
       'Data.Lutein and Zeaxanthin', 'Data.Lycopene', 'Data.Manganese',
       'Data.Niacin', 'Data.Pantothenic Acid', 'Data.Protein',
       'Data.Refuse Percentage', 'Data.Retinol', 'Data.Riboflavin',
       'Data.Selenium', 'Data.Sugar Total', 'Data.Thiamin', 'Data.Water',
       'Data.Fat.Monosaturated Fat', 'Data.Fat.Polysaturated Fat',
       'Data.Fat.Saturated Fat', 'Data.Fat.Total Lipid',
       'Data.Household Weights.1st Household Weight',
       'Data.Household Weights.1st Household Weight Description',
       'Data.Household Weights.2nd Household Weight',
       'Data.Household Weights.2nd Household Weight Description',
       'Data.Major Minerals.Calcium', 'Data.Major Minerals.Copper',
       'Data.Major Minerals.Iro

## Data Preprocessing

In [41]:
# Checking the Missing Values
df.isnull().sum()

Category                                                      0
Description                                                   0
Nutrient Data Bank Number                                     0
Data.Alpha Carotene                                           0
Data.Ash                                                      0
Data.Beta Carotene                                            0
Data.Beta Cryptoxanthin                                       0
Data.Carbohydrate                                             0
Data.Cholesterol                                              0
Data.Choline                                                  0
Data.Fiber                                                    0
Data.Kilocalories                                             0
Data.Lutein and Zeaxanthin                                    0
Data.Lycopene                                                 0
Data.Manganese                                                0
Data.Niacin                             

In [42]:
# Data.Household Weights.1st Household Weight Description,Data.Household Weights.2nd Household Weight Description --Missing values

In [43]:
# checking duplicate values
df.duplicated().sum()

np.int64(0)

In [44]:
# List of columns to drop
columns_to_drop = [
    'Data.Household Weights.1st Household Weight Description', 
    'Data.Household Weights.2nd Household Weight Description',
    'Data.Household Weights.1st Household Weight',
    'Data.Household Weights.2nd Household Weight',
    'Nutrient Data Bank Number'
]

# Drop the columns
df_cleaned = df.drop(columns=columns_to_drop)
# Save as CSV
df_cleaned.to_csv('df_cleaned.csv', index=False)

In [45]:
df_cleaned.isnull().sum()

Category                          0
Description                       0
Data.Alpha Carotene               0
Data.Ash                          0
Data.Beta Carotene                0
Data.Beta Cryptoxanthin           0
Data.Carbohydrate                 0
Data.Cholesterol                  0
Data.Choline                      0
Data.Fiber                        0
Data.Kilocalories                 0
Data.Lutein and Zeaxanthin        0
Data.Lycopene                     0
Data.Manganese                    0
Data.Niacin                       0
Data.Pantothenic Acid             0
Data.Protein                      0
Data.Refuse Percentage            0
Data.Retinol                      0
Data.Riboflavin                   0
Data.Selenium                     0
Data.Sugar Total                  0
Data.Thiamin                      0
Data.Water                        0
Data.Fat.Monosaturated Fat        0
Data.Fat.Polysaturated Fat        0
Data.Fat.Saturated Fat            0
Data.Fat.Total Lipid        

## Getting Insights From Data

In [46]:
df_cleaned.describe(exclude='object')

Unnamed: 0,Data.Alpha Carotene,Data.Ash,Data.Beta Carotene,Data.Beta Cryptoxanthin,Data.Carbohydrate,Data.Cholesterol,Data.Choline,Data.Fiber,Data.Kilocalories,Data.Lutein and Zeaxanthin,...,Data.Major Minerals.Potassium,Data.Major Minerals.Sodium,Data.Major Minerals.Zinc,Data.Vitamins.Vitamin A - IU,Data.Vitamins.Vitamin A - RAE,Data.Vitamins.Vitamin B12,Data.Vitamins.Vitamin B6,Data.Vitamins.Vitamin C,Data.Vitamins.Vitamin E,Data.Vitamins.Vitamin K
count,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,...,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0
mean,21.210711,1.852459,159.043437,8.776744,21.785381,37.162822,20.673546,1.993147,219.655875,127.406178,...,268.348172,331.590719,1.875125,767.568191,99.43707,1.172903,0.269547,9.075651,0.842837,9.448604
std,269.714183,2.993228,1126.285026,154.18486,27.123491,119.738438,45.48199,4.292873,171.668713,1029.635386,...,404.91622,977.046544,4.193682,3871.307652,761.653061,4.512816,0.565116,63.443284,4.169756,66.067619
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.83,0.0,0.0,0.49,0.0,0.0,0.0,82.0,0.0,...,103.0,31.0,0.19,0.0,0.0,0.0,0.03,0.0,0.0,0.0
50%,0.0,1.24,0.0,0.0,9.29,2.0,0.0,0.3,181.0,0.0,...,210.0,86.0,0.77,33.0,0.0,0.01,0.11,0.0,0.05,0.0
75%,0.0,2.2,1.0,0.0,30.59,60.0,20.0,2.3,331.0,0.0,...,328.0,428.0,2.46,280.0,24.0,0.83,0.33,3.5,0.39,1.7
max,14251.0,99.8,42891.0,7923.0,100.0,3100.0,1388.0,79.0,902.0,39550.0,...,16500.0,38758.0,181.61,100000.0,30000.0,98.89,12.0,2400.0,149.4,1714.5


In [47]:
df_cleaned.tail(100)

Unnamed: 0,Category,Description,Data.Alpha Carotene,Data.Ash,Data.Beta Carotene,Data.Beta Cryptoxanthin,Data.Carbohydrate,Data.Cholesterol,Data.Choline,Data.Fiber,...,Data.Major Minerals.Potassium,Data.Major Minerals.Sodium,Data.Major Minerals.Zinc,Data.Vitamins.Vitamin A - IU,Data.Vitamins.Vitamin A - RAE,Data.Vitamins.Vitamin B12,Data.Vitamins.Vitamin B6,Data.Vitamins.Vitamin C,Data.Vitamins.Vitamin E,Data.Vitamins.Vitamin K
7313,QUAIL,"QUAIL,COOKED,TOTAL EDIBLE",0,0.80,0,0,0.0,86,66,0.0,...,216,52,3.10,234,70,0.36,0.62,2.3,0.70,4.2
7314,PHEASANT,"PHEASANT,CKD,TOTAL EDIBLE",0,1.30,0,0,0.0,89,86,0.0,...,271,43,1.37,190,57,0.72,0.75,2.3,0.27,4.9
7315,EGGS,"EGGS,SCRMBLD,FRZ MIXTURE",0,1.10,246,0,7.5,65,193,0.0,...,147,162,0.14,410,21,0.17,0.01,0.0,0.84,1.8
7316,DOVE,"DOVE,CKD (INCL SQUAB)",0,1.10,0,0,0.0,116,64,0.0,...,256,57,3.83,95,28,0.41,0.57,2.9,0.06,4.0
7317,PORK,"PORK,ORIENTAL STYLE,DEHYD",0,2.60,0,0,1.4,67,0,0.0,...,139,685,1.15,0,0,0.30,0.20,0.0,0.36,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7408,FROG LEGS,"FROG LEGS,RAW",0,1.40,0,0,0.0,50,65,0.0,...,285,58,1.00,50,15,0.40,0.12,0.0,1.00,0.1
7409,MACKEREL,"MACKEREL,SALTED",0,13.40,0,0,0.0,95,95,0.0,...,520,4450,1.10,157,47,12.00,0.41,0.0,2.38,7.8
7410,SCALLOP,"SCALLOP,(BAY&SEA),CKD,STMD",0,3.06,0,0,0.0,53,81,0.0,...,476,265,3.00,100,30,1.30,0.10,0.0,1.50,0.2
7411,SNAIL,"SNAIL,RAW",0,1.30,0,0,2.0,50,65,0.0,...,382,70,1.00,100,30,0.50,0.13,0.0,5.00,0.1


In [48]:
df_cleaned.columns

Index(['Category', 'Description', 'Data.Alpha Carotene', 'Data.Ash',
       'Data.Beta Carotene', 'Data.Beta Cryptoxanthin', 'Data.Carbohydrate',
       'Data.Cholesterol', 'Data.Choline', 'Data.Fiber', 'Data.Kilocalories',
       'Data.Lutein and Zeaxanthin', 'Data.Lycopene', 'Data.Manganese',
       'Data.Niacin', 'Data.Pantothenic Acid', 'Data.Protein',
       'Data.Refuse Percentage', 'Data.Retinol', 'Data.Riboflavin',
       'Data.Selenium', 'Data.Sugar Total', 'Data.Thiamin', 'Data.Water',
       'Data.Fat.Monosaturated Fat', 'Data.Fat.Polysaturated Fat',
       'Data.Fat.Saturated Fat', 'Data.Fat.Total Lipid',
       'Data.Major Minerals.Calcium', 'Data.Major Minerals.Copper',
       'Data.Major Minerals.Iron', 'Data.Major Minerals.Magnesium',
       'Data.Major Minerals.Phosphorus', 'Data.Major Minerals.Potassium',
       'Data.Major Minerals.Sodium', 'Data.Major Minerals.Zinc',
       'Data.Vitamins.Vitamin A - IU', 'Data.Vitamins.Vitamin A - RAE',
       'Data.Vitamins.Vitami

In [49]:
# from sklearn.manifold import TSNE

# # Step 3: Dimensionality Reduction using t-SNE
# tsne = TSNE(n_components=2, random_state=90)
# X_tsne = tsne.fit_transform(X_scaled)

# # Step 4: Create a DataFrame for plotting
# df_tsne = pd.DataFrame(data=X_tsne, columns=['TSNE1', 'TSNE2'])
# df_tsne['Cluster'] = df_filtered['Cluster']

# # Step 5: Visualize the clusters using t-SNE
# plt.figure(figsize=(10, 8))
# sns.scatterplot(data=df_tsne, x='TSNE1', y='TSNE2', hue='Cluster', palette='viridis', s=100, alpha=0.7)
# plt.title('Visualising the Clusters for recommendation ')
# plt.xlabel('t-SNE Component 1')
# plt.ylabel('t-SNE Component 2')
# plt.legend(title='cluster_mapping')
# plt.grid(True)
# plt.show()


In [50]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import numpy as np

# Assume df_cleaned is your cleaned DataFrame
df_filtered = df_cleaned.copy()

# Step 1: Identify numeric columns for scaling
nutritional_columns = df_filtered.select_dtypes(include=['float64', 'int64']).columns  # Only numeric columns

# Step 2: Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_filtered[nutritional_columns])

# Step 3: K-Means Clustering
kmeans = KMeans(n_clusters=10, random_state=90)
df_filtered['Cluster'] = kmeans.fit_predict(X_scaled)

# Step 4: Define mappings for user goals
goal_mapping = {
    "maintain weight": 0,
    "muscle gain": 1,
    "fat burn with lean muscle retention": 2,
    "enhance athletic performance": 3,
    "improve immunity": 4,
    "bone and joint health": 5,
    "gut health": 6,
    "mental wellness": 7,
    "energy boost": 8,
    "recovery from illness/surgery": 9
}

# Function to recommend foods based on user input
def recommend_diet(user_goal, user_activity):
    # Step 5: Check user goal mapping
    user_goal = user_goal.strip().lower()
    if user_goal not in goal_mapping:
        print("Invalid goal specified.")
        return

    # Get the cluster number based on user goal
    goal_cluster = goal_mapping[user_goal]

    # Step 6: Filter dataset based on user goal
    recommended_foods = df_filtered[df_filtered['Cluster'] == goal_cluster]

    # Step 7: Randomize and select a few recommendations
    if not recommended_foods.empty:
        # Randomly select a subset of recommendations (e.g., 5)
        random_recommendations = recommended_foods.sample(n=min(5, len(recommended_foods)), random_state=1)

        # Step 8: Display recommendations
        print("Recommended Foods based on your inputs:")
        # Displaying relevant information
        print(random_recommendations[['Category', 'Description', 'Data.Protein', 'Data.Kilocalories']])
    else:
        print("No recommendations available based on your preferences.")

# Example user input
user_goal = "enhance athletic performance"  # Example user goal
user_activity = "Moderately Active"  # Example user activity level (not used in this logic)

# Call the recommendation function
recommend_diet(user_goal, user_activity)


Recommended Foods based on your inputs:
     Category                                        Description  \
5094     LAMB                               LAMB,GROUND,CKD,BRLD   
7078   WALRUS                    WALRUS,MEAT,RAW (ALASKA NATIVE)   
4448     CLAM                      CLAM,MXD SP,CKD,BREADED&FRIED   
2755     PORK  PORK,CURED,HAM W/ NAT JUICES,WHL,BNLESS,LN & F...   
799   CHICKEN   CHICKEN,BROILERS OR FRYERS,LEG,MEAT&SKN,CKD,STWD   

      Data.Protein  Data.Kilocalories  
5094         24.75                283  
7078         19.20                199  
4448         14.24                202  
2755         19.38                112  
799          24.17                220  


In [51]:
import pickle

# Assuming kmeans is your trained KMeans model and scaler is your StandardScaler
with open('kmeans_model.pkl', 'wb') as f:
    pickle.dump(kmeans, f)

with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
