## Diet Recommendation System

In [32]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

In [33]:
df = pd.read_csv('static/Food__Nutrition__dataset/food.csv')

In [34]:
df.head(5)

Unnamed: 0,Category,Description,Nutrient Data Bank Number,Data.Alpha Carotene,Data.Ash,Data.Beta Carotene,Data.Beta Cryptoxanthin,Data.Carbohydrate,Data.Cholesterol,Data.Choline,...,Data.Major Minerals.Potassium,Data.Major Minerals.Sodium,Data.Major Minerals.Zinc,Data.Vitamins.Vitamin A - IU,Data.Vitamins.Vitamin A - RAE,Data.Vitamins.Vitamin B12,Data.Vitamins.Vitamin B6,Data.Vitamins.Vitamin C,Data.Vitamins.Vitamin E,Data.Vitamins.Vitamin K
0,BUTTER,"BUTTER,WITH SALT",1001,0,2.11,158,0,0.06,215,19,...,24,576,0.09,2499,684,0.17,0.003,0.0,2.32,7.0
1,BUTTER,"BUTTER,WHIPPED,WITH SALT",1002,0,2.11,158,0,0.06,219,19,...,26,827,0.05,2499,684,0.13,0.003,0.0,2.32,7.0
2,BUTTER OIL,"BUTTER OIL,ANHYDROUS",1003,0,0.0,193,0,0.0,256,22,...,5,2,0.01,3069,840,0.01,0.001,0.0,2.8,8.6
3,CHEESE,"CHEESE,BLUE",1004,0,5.11,74,0,2.34,75,15,...,256,1395,2.66,763,198,1.22,0.166,0.0,0.25,2.4
4,CHEESE,"CHEESE,BRICK",1005,0,3.18,76,0,2.79,94,15,...,136,560,2.6,1080,292,1.26,0.065,0.0,0.26,2.5


In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7413 entries, 0 to 7412
Data columns (total 48 columns):
 #   Column                                                   Non-Null Count  Dtype  
---  ------                                                   --------------  -----  
 0   Category                                                 7413 non-null   object 
 1   Description                                              7413 non-null   object 
 2   Nutrient Data Bank Number                                7413 non-null   int64  
 3   Data.Alpha Carotene                                      7413 non-null   int64  
 4   Data.Ash                                                 7413 non-null   float64
 5   Data.Beta Carotene                                       7413 non-null   int64  
 6   Data.Beta Cryptoxanthin                                  7413 non-null   int64  
 7   Data.Carbohydrate                                        7413 non-null   float64
 8   Data.Cholesterol            

In [36]:
df.shape

(7413, 48)

In [37]:
df.columns

Index(['Category', 'Description', 'Nutrient Data Bank Number',
       'Data.Alpha Carotene', 'Data.Ash', 'Data.Beta Carotene',
       'Data.Beta Cryptoxanthin', 'Data.Carbohydrate', 'Data.Cholesterol',
       'Data.Choline', 'Data.Fiber', 'Data.Kilocalories',
       'Data.Lutein and Zeaxanthin', 'Data.Lycopene', 'Data.Manganese',
       'Data.Niacin', 'Data.Pantothenic Acid', 'Data.Protein',
       'Data.Refuse Percentage', 'Data.Retinol', 'Data.Riboflavin',
       'Data.Selenium', 'Data.Sugar Total', 'Data.Thiamin', 'Data.Water',
       'Data.Fat.Monosaturated Fat', 'Data.Fat.Polysaturated Fat',
       'Data.Fat.Saturated Fat', 'Data.Fat.Total Lipid',
       'Data.Household Weights.1st Household Weight',
       'Data.Household Weights.1st Household Weight Description',
       'Data.Household Weights.2nd Household Weight',
       'Data.Household Weights.2nd Household Weight Description',
       'Data.Major Minerals.Calcium', 'Data.Major Minerals.Copper',
       'Data.Major Minerals.Iro

## Data Preprocessing

In [38]:
# Checking the Missing Values
df.isnull().sum()

Category                                                      0
Description                                                   0
Nutrient Data Bank Number                                     0
Data.Alpha Carotene                                           0
Data.Ash                                                      0
Data.Beta Carotene                                            0
Data.Beta Cryptoxanthin                                       0
Data.Carbohydrate                                             0
Data.Cholesterol                                              0
Data.Choline                                                  0
Data.Fiber                                                    0
Data.Kilocalories                                             0
Data.Lutein and Zeaxanthin                                    0
Data.Lycopene                                                 0
Data.Manganese                                                0
Data.Niacin                             

In [39]:
# Data.Household Weights.1st Household Weight Description,Data.Household Weights.2nd Household Weight Description --Missing values

In [40]:
# checking duplicate values
df.duplicated().sum()

np.int64(0)

In [41]:
# List of columns to drop
columns_to_drop = [
    'Data.Household Weights.1st Household Weight Description', 
    'Data.Household Weights.2nd Household Weight Description',
    'Data.Household Weights.1st Household Weight',
    'Data.Household Weights.2nd Household Weight'
]

# Drop the columns
data = df.drop(columns=columns_to_drop)


In [42]:
data.isnull().sum()

Category                          0
Description                       0
Nutrient Data Bank Number         0
Data.Alpha Carotene               0
Data.Ash                          0
Data.Beta Carotene                0
Data.Beta Cryptoxanthin           0
Data.Carbohydrate                 0
Data.Cholesterol                  0
Data.Choline                      0
Data.Fiber                        0
Data.Kilocalories                 0
Data.Lutein and Zeaxanthin        0
Data.Lycopene                     0
Data.Manganese                    0
Data.Niacin                       0
Data.Pantothenic Acid             0
Data.Protein                      0
Data.Refuse Percentage            0
Data.Retinol                      0
Data.Riboflavin                   0
Data.Selenium                     0
Data.Sugar Total                  0
Data.Thiamin                      0
Data.Water                        0
Data.Fat.Monosaturated Fat        0
Data.Fat.Polysaturated Fat        0
Data.Fat.Saturated Fat      

## Getting Insights From Data

In [43]:
data.describe(exclude='object')

Unnamed: 0,Nutrient Data Bank Number,Data.Alpha Carotene,Data.Ash,Data.Beta Carotene,Data.Beta Cryptoxanthin,Data.Carbohydrate,Data.Cholesterol,Data.Choline,Data.Fiber,Data.Kilocalories,...,Data.Major Minerals.Potassium,Data.Major Minerals.Sodium,Data.Major Minerals.Zinc,Data.Vitamins.Vitamin A - IU,Data.Vitamins.Vitamin A - RAE,Data.Vitamins.Vitamin B12,Data.Vitamins.Vitamin B6,Data.Vitamins.Vitamin C,Data.Vitamins.Vitamin E,Data.Vitamins.Vitamin K
count,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,...,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0,7413.0
mean,14116.44368,21.210711,1.852459,159.043437,8.776744,21.785381,37.162822,20.673546,1.993147,219.655875,...,268.348172,331.590719,1.875125,767.568191,99.43707,1.172903,0.269547,9.075651,0.842837,9.448604
std,8767.416214,269.714183,2.993228,1126.285026,154.18486,27.123491,119.738438,45.48199,4.292873,171.668713,...,404.91622,977.046544,4.193682,3871.307652,761.653061,4.512816,0.565116,63.443284,4.169756,66.067619
min,1001.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,8121.0,0.0,0.83,0.0,0.0,0.49,0.0,0.0,0.0,82.0,...,103.0,31.0,0.19,0.0,0.0,0.0,0.03,0.0,0.0,0.0
50%,12539.0,0.0,1.24,0.0,0.0,9.29,2.0,0.0,0.3,181.0,...,210.0,86.0,0.77,33.0,0.0,0.01,0.11,0.0,0.05,0.0
75%,18424.0,0.0,2.2,1.0,0.0,30.59,60.0,20.0,2.3,331.0,...,328.0,428.0,2.46,280.0,24.0,0.83,0.33,3.5,0.39,1.7
max,93600.0,14251.0,99.8,42891.0,7923.0,100.0,3100.0,1388.0,79.0,902.0,...,16500.0,38758.0,181.61,100000.0,30000.0,98.89,12.0,2400.0,149.4,1714.5


In [44]:
data.describe(include='object')

Unnamed: 0,Category,Description
count,7413,7413
unique,1183,7409
top,BEEF,"BABYFOOD,MEAT,BF,STR"
freq,457,2


   Nutrient Data Bank Number  Data.Alpha Carotene  Data.Ash  \
0                       1001                    0      2.11   
1                       1002                    0      2.11   
2                       1003                    0      0.00   
3                       1004                    0      5.11   
4                       1005                    0      3.18   

   Data.Beta Carotene  Data.Beta Cryptoxanthin  Data.Carbohydrate  \
0                 158                        0               0.06   
1                 158                        0               0.06   
2                 193                        0               0.00   
3                  74                        0               2.34   
4                  76                        0               2.79   

   Data.Cholesterol  Data.Choline  Data.Fiber  Data.Kilocalories  ...  \
0               215            19         0.0                717  ...   
1               219            19         0.0               