## Food Recommender

#### Importing Libraies

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Importing Datasets

#### 1st Dataset

In [2]:
nutritions = pd.read_csv('nutritions.csv', usecols=['Food Name','Category Name', 'Zinc', 'Vitamin B1', 'Vitamin B12',
       'Vitamin B2', 'Vitamin B3', 'Vitamin B5', 'Vitamin B6', 'Vitamin A (IU)', 'Vitamin D','Vitamin A RAE', 'Iron', 'Vitamin C'])
nutritions.head(3)

Unnamed: 0,Food Name,Category Name,Iron,Vitamin A (IU),Vitamin A RAE,Vitamin B1,Vitamin B12,Vitamin B2,Vitamin B3,Vitamin B5,Vitamin B6,Vitamin C,Zinc,Vitamin D
0,Acerola,Fruits,0.0002,767.0,3.8e-05,2e-05,0.0,6e-05,0.0004,0.00031,1e-05,1.678,0.0001,
1,Apple,Fruits,0.00012,54.0,3e-06,2e-05,0.0,3e-05,9e-05,6e-05,4e-05,0.0046,4e-05,0.0
2,Apricot,Fruits,0.00039,1279.0,9.6e-05,3e-05,0.0,4e-05,0.0006,0.00024,5e-05,0.01,0.0002,0.0


In [3]:
nutritions['Vitamin A'] = nutritions['Vitamin A (IU)'] + nutritions['Vitamin A RAE']

In [4]:
nutritions['Vitamin B'] = nutritions['Vitamin B1']+nutritions['Vitamin B2']+nutritions['Vitamin B3']+nutritions['Vitamin B12']+nutritions['Vitamin B5']+nutritions['Vitamin B6']

### Dataset1 Cleaning

#### Duplicates Removal

In [5]:
nutritions.duplicated().sum()

0

#### Drop Unwanted Cols

In [6]:
nutritions.drop(columns=['Vitamin A (IU)', 'Vitamin A RAE', 'Vitamin B1', 'Vitamin B12', 'Vitamin B2', 'Vitamin B3', 'Vitamin B5', 'Vitamin B6'], inplace=True)

#### Columns Information

In [7]:
nutritions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1174 entries, 0 to 1173
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Food Name      1174 non-null   object 
 1   Category Name  1174 non-null   object 
 2   Iron           1153 non-null   float64
 3   Vitamin C      1124 non-null   float64
 4   Zinc           1108 non-null   float64
 5   Vitamin D      829 non-null    float64
 6   Vitamin A      1055 non-null   float64
 7   Vitamin B      949 non-null    float64
dtypes: float64(6), object(2)
memory usage: 73.5+ KB


### Dataset1 Statistics

In [8]:
nutritions.describe()

Unnamed: 0,Iron,Vitamin C,Zinc,Vitamin D,Vitamin A,Vitamin B
count,1153.0,1124.0,1108.0,829.0,1055.0,949.0
mean,0.00287,0.010601,0.0015,7.752714e-07,990.181178,0.004572
std,0.007132,0.055625,0.003209,8.890438e-06,4620.797616,0.013204
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.00038,0.0,0.00022,0.0,0.0,0.0008
50%,0.001,0.0005,0.00064,0.0,50.000015,0.00191
75%,0.0025,0.0063,0.0016,0.0,360.000048,0.004832
max,0.124,1.678,0.079,0.00025,100000.03,0.16197


In [9]:
nutritions[['Iron', 'Vitamin A','Vitamin B', 'Vitamin C', 'Zinc', 'Vitamin D']].corr()

Unnamed: 0,Iron,Vitamin A,Vitamin B,Vitamin C,Zinc,Vitamin D
Iron,1.0,0.133164,0.201854,0.047527,0.325492,-0.000573
Vitamin A,0.133164,1.0,0.067172,0.046547,0.031041,0.656369
Vitamin B,0.201854,0.067172,1.0,-0.014468,0.150544,0.114969
Vitamin C,0.047527,0.046547,-0.014468,1.0,-0.021086,-0.022014
Zinc,0.325492,0.031041,0.150544,-0.021086,1.0,0.007069
Vitamin D,-0.000573,0.656369,0.114969,-0.022014,0.007069,1.0


## Data Preprocessing

In [10]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import ColumnTransformer

ct = ColumnTransformer(transformers=[('scaler',MinMaxScaler(),['Iron', 'Vitamin A', 'Vitamin B', 'Vitamin C', 'Zinc', 'Vitamin D'])],
                       remainder='passthrough',sparse_threshold=0)

nutritions = pd.DataFrame(ct.fit_transform(nutritions))
nutritions.rename(columns={0:'Iron', 1:'Vitamin A', 2:'Vitamin B', 3:'Vitamin C', 4:'Zinc', 5:'Vitamin D', 6:'Food', 7:'Category'}, inplace=True)

In [11]:
nutritions.sort_values(by='Iron',ascending=False,ignore_index=True).loc[:9,['Food','Category']]

Unnamed: 0,Food,Category
0,Dried thyme,Spices
1,Marjoram,Spices
2,Cumin,Spices
3,Turmeric,Spices
4,Dried dill weed,Spices
5,Baby food,Baby Foods
6,Bay leaf,Spices
7,Star anise,Spices
8,Oregano,Spices
9,Poultry seasoning,Spices


In [12]:
nutritions.sort_values(by='Vitamin A',ascending=False,ignore_index=True).loc[:9,['Food','Category']]

Unnamed: 0,Food,Category
0,Cod liver oil,Seafood
1,Paprika,Spices
2,Cayenne pepper,Spices
3,Pasilla peppers,Vegetables
4,Beef Liver,Meat
5,Chili powder spice,Spices
6,Grape leaves,Greens
7,Sweet potato chips,"Meals, Entrees, and Side Dishes"
8,Poblano,Vegetables
9,Sweet potato,Vegetables


In [13]:
nutritions.sort_values(by='Vitamin B',ascending=False,ignore_index=True).loc[:9,['Food','Category']]

Unnamed: 0,Food,Category
0,Candy bar,Sweets
1,Mortadella,Meat
2,Pea,Vegetables
3,Powerade,Beverages
4,Pineapple cake,Baked Products
5,Blueberry pie,Baked Products
6,Yeast,Baked Products
7,Baby food,Baby Foods
8,Kellogg's Frosted Flakes Cereal,"Meals, Entrees, and Side Dishes"
9,Bran cereal,"Meals, Entrees, and Side Dishes"


In [14]:
nutritions.sort_values(by='Vitamin C',ascending=False,ignore_index=True).loc[:9,['Food','Category']]

Unnamed: 0,Food,Category
0,Acerola,Fruits
1,Rose hip,Fruits
2,Chili Pepper,Vegetables
3,Guava,Fruits
4,Thyme,Greens
5,Cloudberry,Fruits
6,Chili pepper,Vegetables
7,Drumstick tree,Greens
8,Parsley,Greens
9,Dried parsely,Spices


In [15]:
nutritions.sort_values(by='Vitamin D',ascending=False,ignore_index=True).loc[:9,['Food','Category']]

Unnamed: 0,Food,Category
0,Cod liver oil,Seafood
1,Maitake mushrooms,Mushrooms
2,Trout,Seafood
3,Swordfish,Seafood
4,Sockeye salmon,Seafood
5,Smoked salmon,Seafood
6,Steelhead trout,Seafood
7,Sturgeon,Seafood
8,Salmon,Seafood
9,Fish oil,Oils and Sauces


In [16]:
nutritions.sort_values(by='Zinc',ascending=False,ignore_index=True).loc[:9,['Food','Category']]

Unnamed: 0,Food,Category
0,Oysters,Seafood
1,Lucky Charms Cereal,"Meals, Entrees, and Side Dishes"
2,General Mills Cinnamon Toast Crunch,"Meals, Entrees, and Side Dishes"
3,Kix Cereal,"Meals, Entrees, and Side Dishes"
4,General Mills Cheerios,"Meals, Entrees, and Side Dishes"
5,Cap'n Crunch Cereal,"Meals, Entrees, and Side Dishes"
6,General Mills Cocoa Puffs,"Meals, Entrees, and Side Dishes"
7,General Mills Count Chocula,"Meals, Entrees, and Side Dishes"
8,General Mills Trix Cereal,"Meals, Entrees, and Side Dishes"
9,Cookie Crisp Cereal,"Meals, Entrees, and Side Dishes"


#### 2nd Dataset

In [17]:
deficiency = pd.read_csv('deficiency.csv')
deficiency.head()

Unnamed: 0,Age,Gender,Diet Type,Living Environment,Night Blindness,Dry Eyes,Bleeding Gums,Fatigue,Tingling Sensation,Low Sun Exposure,Reduced Memory Capacity,Shortness of Breath,Loss of Appetite,Fast Heart Rate,Brittle Nails,Weight Loss,Reduced Wound Healing Capacity,Skin Condition,Predicted Deficiency
0,62,Male,Non-Vegetarian,Rural,1,0,0,0,1,1,0,0,0,0,0,1,1,Normal,Zinc
1,52,Male,Vegetarian,Urban,0,0,0,0,0,1,0,0,0,0,0,1,1,Normal,Vitamin B12
2,53,Female,Vegetarian,Rural,0,0,0,1,0,0,0,0,0,0,0,0,0,Normal,Vitamin A
3,40,Male,Non-Vegetarian,Rural,0,0,0,0,0,0,1,1,0,1,0,0,0,Pale/Yellow Skin,No Deficiency
4,17,Female,Non-Vegetarian,Urban,0,0,0,1,1,0,0,0,1,1,0,0,0,Normal,Vitamin D


## Dataset Cleaning

#### Duplicates Removal

In [18]:
deficiency.duplicated().sum()

1

In [19]:
deficiency.drop_duplicates(inplace=True)

#### Drop Unwanted Cols

In [20]:
deficiency.reset_index(inplace=True)
deficiency.drop(columns=['index'], inplace=True)

## Column Information

In [21]:
deficiency.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 999 entries, 0 to 998
Data columns (total 19 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   Age                             999 non-null    int64 
 1   Gender                          999 non-null    object
 2   Diet Type                       999 non-null    object
 3   Living Environment              999 non-null    object
 4   Night Blindness                 999 non-null    int64 
 5   Dry Eyes                        999 non-null    int64 
 6   Bleeding Gums                   999 non-null    int64 
 7   Fatigue                         999 non-null    int64 
 8   Tingling Sensation              999 non-null    int64 
 9   Low Sun Exposure                999 non-null    int64 
 10  Reduced Memory Capacity         999 non-null    int64 
 11  Shortness of Breath             999 non-null    int64 
 12  Loss of Appetite                999 non-null    in

#### Drop Duplicates

In [22]:
# deficiency.duplicated().sum()

In [23]:
# deficiency.drop_duplicates(inplace=True)
# deficiency.reset_index(inplace=True)

## Statistics

In [24]:
deficiency.describe() 

Unnamed: 0,Age,Night Blindness,Dry Eyes,Bleeding Gums,Fatigue,Tingling Sensation,Low Sun Exposure,Reduced Memory Capacity,Shortness of Breath,Loss of Appetite,Fast Heart Rate,Brittle Nails,Weight Loss,Reduced Wound Healing Capacity
count,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
mean,39.692693,0.1001,0.16016,0.127127,0.419419,0.155155,0.31031,0.151151,0.176176,0.146146,0.184184,0.142142,0.108108,0.324324
std,16.969874,0.300284,0.366938,0.333282,0.493711,0.362234,0.462852,0.358376,0.381161,0.35343,0.387828,0.349371,0.310672,0.468356
min,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,25.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,54.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,69.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [25]:
deficiency[['Age','Night Blindness','Dry Eyes', 'Bleeding Gums', 'Fatigue', 'Tingling Sensation','Low Sun Exposure',
        'Reduced Memory Capacity', 'Shortness of Breath', 'Loss of Appetite', 'Fast Heart Rate', 'Brittle Nails', 'Weight Loss',
       'Reduced Wound Healing Capacity']].corr()

Unnamed: 0,Age,Night Blindness,Dry Eyes,Bleeding Gums,Fatigue,Tingling Sensation,Low Sun Exposure,Reduced Memory Capacity,Shortness of Breath,Loss of Appetite,Fast Heart Rate,Brittle Nails,Weight Loss,Reduced Wound Healing Capacity
Age,1.0,-0.019716,-0.032478,-0.036491,-0.016652,0.036942,-0.005962,-0.034039,0.02325,0.013343,0.016526,-0.006484,-0.0184,0.000324
Night Blindness,-0.019716,1.0,-0.018333,-0.017148,-0.019884,0.022887,-0.04348,-0.029005,0.003348,-0.043568,-0.020808,-0.021148,0.055736,-0.045829
Dry Eyes,-0.032478,-0.018333,1.0,-0.060142,0.016001,0.023936,-0.033332,-0.024263,-0.037169,0.020217,0.010777,-0.013621,-0.020193,0.012291
Bleeding Gums,-0.036491,-0.017148,-0.060142,1.0,-0.050338,0.019051,0.003836,0.023522,-0.042391,-0.004768,-0.018538,0.008157,0.051002,-0.03973
Fatigue,-0.016652,-0.019884,0.016001,-0.050338,1.0,0.022355,0.048145,-0.030198,0.043567,0.0733,0.025259,-0.072948,-0.034606,-0.025531
Tingling Sensation,0.036942,0.022887,0.023936,0.019051,0.022355,1.0,-0.024492,-0.0419,-0.00223,-0.005108,0.010352,-0.016089,-0.006738,0.039747
Low Sun Exposure,-0.005962,-0.04348,-0.033332,0.003836,0.048145,-0.024492,1.0,0.025028,0.041946,0.028756,-0.011706,-0.006593,0.038231,-0.030232
Reduced Memory Capacity,-0.034039,-0.029005,-0.024263,0.023522,-0.030198,-0.0419,0.025028,1.0,0.076269,0.007372,0.008566,0.0203,-0.029918,-0.011778
Shortness of Breath,0.02325,0.003348,-0.037169,-0.042391,0.043567,-0.00223,0.041946,0.076269,1.0,-0.042558,0.003956,-0.03775,-0.00869,-0.000455
Loss of Appetite,0.013343,-0.043568,0.020217,-0.004768,0.0733,-0.005108,0.028756,0.007372,-0.042558,1.0,-0.013823,-0.022338,-0.016278,-0.02634


## Data Preprocessing

In [26]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler

ct2 = ColumnTransformer(transformers=[('onehot', OneHotEncoder(drop='first',sparse_output=False), ['Gender','Diet Type','Living Environment','Skin Condition'])], remainder='passthrough', sparse_threshold=0)

ct2

## Data Splitting

In [27]:
from sklearn.model_selection import train_test_split

X = deficiency[['Age','Gender','Diet Type','Living Environment','Night Blindness','Dry Eyes','Bleeding Gums', 'Fatigue', 'Tingling Sensation',
                'Low Sun Exposure','Reduced Memory Capacity','Shortness of Breath','Loss of Appetite', 'Fast Heart Rate','Brittle Nails',
                'Weight Loss','Reduced Wound Healing Capacity','Skin Condition']]

le = LabelEncoder()
Y = le.fit_transform(deficiency['Predicted Deficiency'])

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=789)

X_train.shape, y_train.shape

((799, 18), (799,))

In [28]:
X_test.shape, y_test.shape

((200, 18), (200,))

## Model Pipeline

In [29]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import HistGradientBoostingRegressor
pipe = Pipeline([('ct', ct2), ('model', HistGradientBoostingRegressor(max_iter=1098, max_leaf_nodes=27, min_samples_leaf=1, l2_regularization=0.27))])

## Pipeline Training

In [30]:
pipe.fit(X_train,y_train)

## Pipeline Evaluation

In [31]:
from sklearn.metrics import r2_score, mean_squared_error as mse
y = pipe.predict(X)

print(f'Score: {pipe.score(X,Y)}')
print(f'MSE: {mse(Y, np.round(y,0))}')

Score: 0.8328392769278261
MSE: 0.6796796796796797


## Food Recommender

In [40]:
deficit = le.classes_
def food_recommender(obj):
    x = pd.DataFrame(obj)
    y = pipe.predict(x)
    deficiency = deficit[np.int64(np.round(y,0)[0])]
    print(f'Your deficiency: {deficiency}')
    if deficiency != 'No Deficiency':
        foods = nutritions.sort_values(by=deficiency,ascending=False,ignore_index=True).loc[:9,['Food','Category']]
        print(f'Foods Recommended: ')
        count = 1
        for i in foods.values:
            print(f'{count}. {i[0]} ({i[1]})')
            count += 1

#### Example - 1

In [41]:
obj = {'Age':[25],'Gender':['Male'],'Diet Type':['Non-Vegetarian'],'Living Environment':['Rural'],
       'Night Blindness':[1],'Dry Eyes':[0],'Bleeding Gums':[0],'Fatigue':[0],'Tingling Sensation':[1],
       'Low Sun Exposure':[1],'Reduced Memory Capacity':[1],'Shortness of Breath':[0],'Loss of Appetite':[0],
       'Fast Heart Rate':[0],'Brittle Nails':[0],'Weight Loss':[1],'Reduced Wound Healing Capacity':[1],'Skin Condition':['Normal']
      }

food_recommender(obj)

Your deficiency: Vitamin A
Foods Recommended: 
1. Cod liver oil (Seafood)
2. Paprika (Spices)
3. Cayenne pepper (Spices)
4. Pasilla peppers (Vegetables)
5. Beef Liver (Meat)
6. Chili powder spice (Spices)
7. Grape leaves (Greens)
8. Sweet potato chips (Meals, Entrees, and Side Dishes)
9. Poblano (Vegetables)
10. Sweet potato (Vegetables)


#### Example - 2

In [42]:
obj = {'Age':[60],'Gender':['Female'],'Diet Type':['Non-Vegetarian'],'Living Environment':['Rural'],
       'Night Blindness':[0],'Dry Eyes':[0],'Bleeding Gums':[0],'Fatigue':[0],'Tingling Sensation':[1],
       'Low Sun Exposure':[1],'Reduced Memory Capacity':[1],'Shortness of Breath':[0],'Loss of Appetite':[0],
       'Fast Heart Rate':[0],'Brittle Nails':[0],'Weight Loss':[0],'Reduced Wound Healing Capacity':[1],'Skin Condition':['Normal']
      }
food_recommender(obj)

Your deficiency: Vitamin D
Foods Recommended: 
1. Cod liver oil (Seafood)
2. Maitake mushrooms (Mushrooms)
3. Trout (Seafood)
4. Swordfish (Seafood)
5. Sockeye salmon (Seafood)
6. Smoked salmon (Seafood)
7. Steelhead trout (Seafood)
8. Sturgeon (Seafood)
9. Salmon (Seafood)
10. Fish oil (Oils and Sauces)


#### Ezample - 3

In [43]:
obj = {'Age':[45],'Gender':['Female'],'Diet Type':['Vegetarian'],'Living Environment':['Urban'],
       'Night Blindness':[0],'Dry Eyes':[0],'Bleeding Gums':[0],'Fatigue':[0],'Tingling Sensation':[1],
       'Low Sun Exposure':[1],'Reduced Memory Capacity':[0],'Shortness of Breath':[0],'Loss of Appetite':[1],
       'Fast Heart Rate':[0],'Brittle Nails':[0],'Weight Loss':[0],'Reduced Wound Healing Capacity':[1],'Skin Condition':['Normal']
      }
food_recommender(obj)

Your deficiency: Vitamin C
Foods Recommended: 
1. Acerola (Fruits)
2. Rose hip (Fruits)
3. Chili Pepper (Vegetables)
4. Guava (Fruits)
5. Thyme (Greens)
6. Cloudberry (Fruits)
7. Chili pepper (Vegetables)
8. Drumstick tree (Greens)
9. Parsley (Greens)
10. Dried parsely (Spices)
