In [3]:
import os
import cv2
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import PIL
from PIL import Image
from pathlib import Path
import tensorflow as tf
import keras as kr

In [4]:
filePath = Path('../input/food41/images')

In [5]:
imagePaths=list(filePath.glob(r'**/*.jpg'))
labels=[os.path.split(os.path.split(x)[0])[1] for x in imagePaths ]

In [6]:
len(labels)

101000

In [7]:
labels=pd.Series(labels,name="Label")

In [8]:
imagePaths=pd.Series(imagePaths,name='FilePath').astype('str')

In [9]:
imagePaths

0             ../input/food41/images/macarons/2428554.jpg
1             ../input/food41/images/macarons/3842133.jpg
2             ../input/food41/images/macarons/1963752.jpg
3                ../input/food41/images/macarons/1075.jpg
4             ../input/food41/images/macarons/2094416.jpg
                               ...                       
100995    ../input/food41/images/french_fries/3663095.jpg
100996    ../input/food41/images/french_fries/2580653.jpg
100997     ../input/food41/images/french_fries/338259.jpg
100998    ../input/food41/images/french_fries/1327248.jpg
100999    ../input/food41/images/french_fries/1347228.jpg
Name: FilePath, Length: 101000, dtype: object

In [10]:
data=pd.concat([imagePaths,labels],axis=1)

In [11]:
data['FilePath'][101]

'../input/food41/images/macarons/2976100.jpg'

In [None]:
Image.open(data['FilePath'][1])

In [13]:
cv2.imread('../input/food41/images/macarons/2976100.jpg').shape

(512, 512, 3)

In [14]:
data['Label'].value_counts()

Label
french_fries         1000
macarons             1000
french_toast         1000
lobster_bisque       1000
prime_rib            1000
                     ... 
beef_carpaccio       1000
poutine              1000
hot_and_sour_soup    1000
seaweed_salad        1000
foie_gras            1000
Name: count, Length: 101, dtype: int64

In [15]:
category_samples=[]

for category in data['Label'].unique():
    category_slice = data.query("Label == @category")
    category_samples.append(category_slice.sample(1000, random_state=1))
image_df = pd.concat(category_samples, axis=0).sample(frac=1.0, random_state=1).reset_index(drop=True)

In [16]:
image_df['Label'].value_counts()

Label
club_sandwich     1000
bruschetta        1000
french_toast      1000
bread_pudding     1000
beef_carpaccio    1000
                  ... 
gnocchi           1000
samosa            1000
tacos             1000
gyoza             1000
beignets          1000
Name: count, Length: 101, dtype: int64

In [17]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(image_df, train_size=0.8, shuffle=True, random_state=1)

In [18]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout

# Load the EfficientNetB0 model with pre-trained weights from ImageNet
base_model = EfficientNetB0(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Freeze the base model layers to retain pre-trained weights
base_model.trainable = False

# Add custom layers on top of the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Reduce (7, 7, 1280) to (1280)
x = Dense(512, activation="relu")(x)  # Fully connected layer with 512 neurons
x = Dropout(0.5)(x)  # Dropout to prevent overfitting
x = Dense(256, activation="relu")(x)  # Fully connected layer with 256 neurons
#x = Dropout(0.5)(x)  # Dropout to prevent overfitting
output = Dense(101, activation="softmax")(x)  # Output layer for 101 classes

# Create the final model
model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# Print the model summary
model.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [24]:
# Define EarlyStopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",  # Monitor validation loss
    patience=5,  # Stop after 5 epochs of no improvement
    restore_best_weights=True  # Restore the best weights
)

# Train the model
history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        )
    ]
)

Epoch 1/100
[1m2020/2020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m365s[0m 179ms/step - accuracy: 0.6435 - loss: 1.3114 - val_accuracy: 0.6327 - val_loss: 1.4459
Epoch 2/100
[1m2020/2020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m266s[0m 131ms/step - accuracy: 0.6492 - loss: 1.2856 - val_accuracy: 0.6298 - val_loss: 1.4593
Epoch 3/100
[1m2020/2020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 133ms/step - accuracy: 0.6534 - loss: 1.2736 - val_accuracy: 0.6351 - val_loss: 1.4376
Epoch 4/100
[1m2020/2020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 124ms/step - accuracy: 0.6614 - loss: 1.2448 - val_accuracy: 0.6300 - val_loss: 1.4609
Epoch 5/100
[1m2020/2020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 120ms/step - accuracy: 0.6661 - loss: 1.2193 - val_accuracy: 0.6328 - val_loss: 1.4457
Epoch 6/100
[1m2020/2020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 123ms/step - accuracy: 0.6654 - loss: 1.2237 - val_accuracy: 0.6332 - val

In [25]:
results = model.evaluate(test_images, verbose=0)
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

Test Accuracy: 62.91%


In [26]:
model.save('NutriModel2.h5')

In [29]:
from sklearn.metrics import confusion_matrix,classification_report

In [30]:
predictions = np.argmax(model.predict(test_images), axis=1)

cm = confusion_matrix(test_images.labels, predictions)
clr = classification_report(test_images.labels, predictions, target_names=test_images.class_indices, zero_division=0)

[1m632/632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 99ms/step


array([[ 78,   1,  11, ...,   5,   0,   6],
       [  1, 114,   0, ...,   0,   0,   0],
       [  9,   0, 118, ...,   0,   3,   1],
       ...,
       [  3,   1,   0, ..., 117,   0,   2],
       [  0,   1,   0, ...,   1,  49,   1],
       [  4,   0,   1, ...,   3,   0, 137]])

In [32]:
# Predict the Class

In [41]:
test_df['FilePath']

68572          ../input/food41/images/gnocchi/3373171.jpg
1866     ../input/food41/images/bread_pudding/2689011.jpg
42554     ../input/food41/images/french_fries/1926251.jpg
8445         ../input/food41/images/miso_soup/2267034.jpg
15292           ../input/food41/images/ravioli/325437.jpg
                               ...                       
50191     ../input/food41/images/clam_chowder/3316401.jpg
1039      ../input/food41/images/creme_brulee/2853514.jpg
1128     ../input/food41/images/caprese_salad/3358534.jpg
21100     ../input/food41/images/filet_mignon/3269932.jpg
61053      ../input/food41/images/caesar_salad/450238.jpg
Name: FilePath, Length: 20200, dtype: object

In [13]:
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.efficientnet import preprocess_input

# Load the trained model
model = load_model(r"/kaggle/input/nutrimodel/tensorflow2/default/1/NutriModel2.h5")  

In [53]:
train_images.class_indices

{'apple_pie': 0,
 'baby_back_ribs': 1,
 'baklava': 2,
 'beef_carpaccio': 3,
 'beef_tartare': 4,
 'beet_salad': 5,
 'beignets': 6,
 'bibimbap': 7,
 'bread_pudding': 8,
 'breakfast_burrito': 9,
 'bruschetta': 10,
 'caesar_salad': 11,
 'cannoli': 12,
 'caprese_salad': 13,
 'carrot_cake': 14,
 'ceviche': 15,
 'cheese_plate': 16,
 'cheesecake': 17,
 'chicken_curry': 18,
 'chicken_quesadilla': 19,
 'chicken_wings': 20,
 'chocolate_cake': 21,
 'chocolate_mousse': 22,
 'churros': 23,
 'clam_chowder': 24,
 'club_sandwich': 25,
 'crab_cakes': 26,
 'creme_brulee': 27,
 'croque_madame': 28,
 'cup_cakes': 29,
 'deviled_eggs': 30,
 'donuts': 31,
 'dumplings': 32,
 'edamame': 33,
 'eggs_benedict': 34,
 'escargots': 35,
 'falafel': 36,
 'filet_mignon': 37,
 'fish_and_chips': 38,
 'foie_gras': 39,
 'french_fries': 40,
 'french_onion_soup': 41,
 'french_toast': 42,
 'fried_calamari': 43,
 'fried_rice': 44,
 'frozen_yogurt': 45,
 'garlic_bread': 46,
 'gnocchi': 47,
 'greek_salad': 48,
 'grilled_cheese_sa

In [54]:
classLabels=list(train_images.class_indices.keys())

In [56]:
classLabels[40]

'french_fries'

In [61]:

image_path = "../input/food41/images/gnocchi/3373171.jpg"


img = load_img(image_path, target_size=(224, 224)) 
img_array = img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array = preprocess_input(img_array)


predictions = model.predict(img_array)


predicted_class_index = np.argmax(predictions, axis=1)[0]

predicted_class_label = classLabels[predicted_class_index]

print(f"Predicted class label: {predicted_class_label}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Predicted class label: ravioli


In [62]:
np.argmax(predictions, axis=1)[0]

82

In [22]:
# Nutrition Dataset
import pandas as pd
import numpy as np

In [23]:
nutri_data=pd.read_csv(r"/kaggle/input/food-101-nutritional-information/nutrition.csv")

In [31]:
nutri_data

Unnamed: 0,label,weight,calories,protein,carbohydrates,fats,fiber,sugars,sodium
0,apple_pie,80,240,2,36,10,2,16,120
1,apple_pie,100,300,3,45,12,2,20,150
2,apple_pie,120,360,4,54,14,3,24,180
3,apple_pie,150,450,5,68,18,3,30,225
4,apple_pie,200,600,6,90,24,4,40,300
...,...,...,...,...,...,...,...,...,...
500,waffles,100,300,6,40,12,2,10,300
501,waffles,150,450,9,60,18,3,15,450
502,waffles,200,600,12,80,24,4,20,600
503,waffles,250,750,15,100,30,5,25,750


In [35]:
nutri_info=nutri_data[(nutri_data['label'] == 'waffles') & (nutri_data['weight'] == 100)]

In [36]:
nutri_info['weight']

500    100
Name: weight, dtype: int64

In [27]:

desired_weight = 200

# Scale the nutritional values proportionally
scaled_df = nutri_info.copy()
scale_factor = desired_weight / nutri_info["weight"].values[0]# Ratio of desired weight to original weight
# Scale all relevant columns
columns_to_scale = ["calories", "protein", "carbohydrates", "fats", "fiber", "sugars", "sodium"]
scaled_df[columns_to_scale] = np.ceil(nutri_info[columns_to_scale] * scale_factor)
scaled_df["weight"] = desired_weight  # Update the weight column to the desired weight

# Print the scaled DataFrame
print(scaled_df)

       label  weight  calories  protein  carbohydrates  fats  fiber  sugars  \
1  apple_pie     200     600.0      6.0           90.0  24.0    4.0    40.0   

   sodium  
1   300.0  


# FineTuning Model B0

In [16]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint

In [17]:
# Load the saved model
model = load_model("/kaggle/input/modelcheckpoint/keras/default/1/model_checkpoint (2).keras")

# Print the model summary to inspect its structure
model.summary()

In [18]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,
    validation_split=0.2
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input
)

train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='FilePath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)

val_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='FilePath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='FilePath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=False
)

Found 64640 validated image filenames belonging to 101 classes.
Found 16160 validated image filenames belonging to 101 classes.
Found 20200 validated image filenames belonging to 101 classes.


In [19]:
# Unfreeze layers starting from block6
set_trainable = False
for layer in model.layers:
    if "block4" in layer.name:  # Start unfreezing from block6
        set_trainable = True
    layer.trainable = set_trainable

In [20]:
# Compile the model with a lower learning rate
model.compile(
    optimizer='adamW',  # Lower learning rate for fine-tuning
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

In [21]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Define the ReduceLROnPlateau callback
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',       # Monitor validation loss
    factor=0.1,               # Reduce learning rate by a factor of 0.1
    patience=3,               # Wait for 3 epochs with no improvement
    verbose=1,                # Print messages when learning rate is reduced
    min_lr=1e-6               # Minimum learning rate
)

In [22]:
# Save the model during training
checkpoint = ModelCheckpoint(
    filepath=r'/kaggle/working/model_checkpoint.keras',  # Save weights and optimizer state
    save_weights_only=False,  # Save the entire model (architecture + weights + optimizer state)
    save_best_only=False,  # Save at every epoch
    verbose=1
)
# Define EarlyStopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",  # Monitor validation loss
    patience=5,  # Stop after 5 epochs of no improvement
    restore_best_weights=True  # Restore the best weights
)

# Train the model
history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        ),
        checkpoint,
        reduce_lr
    ]
)

Epoch 1/100


  self._warn_if_super_not_called()


[1m2020/2020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 208ms/step - accuracy: 0.7925 - loss: 0.7563
Epoch 1: saving model to /kaggle/working/model_checkpoint.keras
[1m2020/2020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m596s[0m 266ms/step - accuracy: 0.7925 - loss: 0.7563 - val_accuracy: 0.6996 - val_loss: 1.2192 - learning_rate: 0.0010
Epoch 2/100
[1m2020/2020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step - accuracy: 0.7824 - loss: 0.7931
Epoch 2: saving model to /kaggle/working/model_checkpoint.keras
[1m2020/2020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m304s[0m 150ms/step - accuracy: 0.7824 - loss: 0.7931 - val_accuracy: 0.7092 - val_loss: 1.2055 - learning_rate: 0.0010
Epoch 3/100
[1m2020/2020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step - accuracy: 0.7954 - loss: 0.7466
Epoch 3: saving model to /kaggle/working/model_checkpoint.keras
[1m2020/2020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 147ms/step - 

In [23]:
model.save("efficientnetb0_transfer.keras")

In [24]:
results = model.evaluate(test_images, verbose=0)
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

Test Accuracy: 76.39%


# Load and Test Model

In [1]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint

In [19]:
model = load_model("/kaggle/input/final_model/keras/default/1/efficientnetb0_transfer (1).keras")

In [20]:
results = model.evaluate(test_images, verbose=0)
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

  self._warn_if_super_not_called()


Test Accuracy: 76.39%


# Nutritional Information -- Add Additional Rows

In [21]:
import pandas as pd
import numpy as np

In [22]:
df=pd.read_csv(r"/kaggle/input/food-101-nutritional-information/nutrition.csv")

In [23]:
df.head(10)

Unnamed: 0,label,weight,calories,protein,carbohydrates,fats,fiber,sugars,sodium
0,apple_pie,80,240,2,36,10,2,16,120
1,apple_pie,100,300,3,45,12,2,20,150
2,apple_pie,120,360,4,54,14,3,24,180
3,apple_pie,150,450,5,68,18,3,30,225
4,apple_pie,200,600,6,90,24,4,40,300
5,baby_back_ribs,150,540,38,15,33,1,12,720
6,baby_back_ribs,200,720,50,20,44,2,16,960
7,baby_back_ribs,250,900,63,25,55,2,20,1200
8,baby_back_ribs,300,1080,75,30,66,3,24,1440
9,baby_back_ribs,350,1260,88,35,77,3,28,1680


# Addition rows wrt weight 

In [24]:

data = df.drop_duplicates(subset=['label'], keep='first')

weight_range = range(50, 501, 20)

def scale_nutrition(row, new_weight):
    factor = new_weight / row['weight']  # Scaling factor
    return {
        "label": row['label'],
        "weight": new_weight,
        "calories": np.ceil(row['calories'] * factor),
        "protein": np.ceil(row['protein'] * factor),
        "carbohydrates": np.ceil(row['carbohydrates'] * factor),
        "fats": np.ceil(row['fats'] * factor),
        "fiber": np.ceil(row['fiber'] * factor),
        "sugars": np.ceil(row['sugars'] * factor),
        "sodium": np.ceil(row['sodium'] * factor)
    }

scaled_data = []
for _, row in data.iterrows():
    for new_weight in weight_range:
        scaled_row = scale_nutrition(row, new_weight)
        scaled_data.append(scaled_row)

scaled_df = pd.DataFrame(scaled_data)

scaled_df.to_csv('scaled_nutrition_dataset_unique_labels.csv', index=False)

print(scaled_df.head())

       label  weight  calories  protein  carbohydrates  fats  fiber  sugars  \
0  apple_pie      50     150.0      2.0           23.0   7.0    2.0    10.0   
1  apple_pie      70     210.0      2.0           32.0   9.0    2.0    14.0   
2  apple_pie      90     270.0      3.0           41.0  12.0    3.0    18.0   
3  apple_pie     110     330.0      3.0           50.0  14.0    3.0    22.0   
4  apple_pie     130     390.0      4.0           59.0  17.0    4.0    26.0   

   sodium  
0    75.0  
1   105.0  
2   135.0  
3   165.0  
4   195.0  


In [25]:
scaled_df[scaled_df['label']=='waffles']

Unnamed: 0,label,weight,calories,protein,carbohydrates,fats,fiber,sugars,sodium
2300,waffles,50,150.0,3.0,20.0,6.0,1.0,5.0,150.0
2301,waffles,70,210.0,5.0,28.0,9.0,2.0,7.0,210.0
2302,waffles,90,270.0,6.0,36.0,11.0,2.0,9.0,270.0
2303,waffles,110,330.0,7.0,44.0,14.0,3.0,11.0,330.0
2304,waffles,130,390.0,8.0,52.0,16.0,3.0,13.0,390.0
2305,waffles,150,450.0,9.0,60.0,18.0,3.0,15.0,450.0
2306,waffles,170,510.0,11.0,68.0,21.0,4.0,17.0,510.0
2307,waffles,190,570.0,12.0,76.0,23.0,4.0,19.0,570.0
2308,waffles,210,630.0,13.0,84.0,26.0,5.0,21.0,630.0
2309,waffles,230,690.0,14.0,92.0,28.0,5.0,23.0,690.0


In [26]:
scaled_df

Unnamed: 0,label,weight,calories,protein,carbohydrates,fats,fiber,sugars,sodium
0,apple_pie,50,150.0,2.0,23.0,7.0,2.0,10.0,75.0
1,apple_pie,70,210.0,2.0,32.0,9.0,2.0,14.0,105.0
2,apple_pie,90,270.0,3.0,41.0,12.0,3.0,18.0,135.0
3,apple_pie,110,330.0,3.0,50.0,14.0,3.0,22.0,165.0
4,apple_pie,130,390.0,4.0,59.0,17.0,4.0,26.0,195.0
...,...,...,...,...,...,...,...,...,...
2318,waffles,410,1230.0,25.0,164.0,50.0,9.0,41.0,1230.0
2319,waffles,430,1290.0,26.0,172.0,52.0,9.0,43.0,1290.0
2320,waffles,450,1350.0,27.0,180.0,54.0,9.0,45.0,1350.0
2321,waffles,470,1410.0,29.0,188.0,57.0,10.0,47.0,1410.0


In [29]:
import pandas as pd


# Define labeling functions
def label_diabetes(row):
    if row["sugars"] >= 10 or row["carbohydrates"] >= 30:
        return 0  # Not Suitable
    else:
        return 1  # Suitable

def label_hypertension(row):
    if row["sodium"] >= 400 or row["fats"] >= 15:
        return 0  # Not Suitable
    else:
        return 1  # Suitable

def label_heart_disease(row):
    if (
        row["sodium"] > 500 or
        row["fats"] > 10 or
        row["carbohydrates"] > 50
    ):
        return 0  # Not Suitable
    else:
        return 1  # Suitable
 

def label_kidney_disease(row):
    if row["sodium"] >= 300 or row["protein"] >= 20:
        return 0  # Not Suitable
    else:
        return 1  # Suitable

# Apply the labeling functions
scaled_df["diabetes_label"] = scaled_df.apply(label_diabetes, axis=1)
scaled_df["hypertension_label"] = scaled_df.apply(label_hypertension, axis=1)
scaled_df["heart_disease_label"] = scaled_df.apply(label_heart_disease, axis=1)
scaled_df["kidney_disease_label"] = scaled_df.apply(label_kidney_disease, axis=1)

# Display the labeled dataset
print(scaled_df[["label", "weight", "diabetes_label", "hypertension_label", "heart_disease_label", "kidney_disease_label"]])

          label  weight  diabetes_label  hypertension_label  \
0     apple_pie      50               0                   1   
1     apple_pie      70               0                   1   
2     apple_pie      90               0                   1   
3     apple_pie     110               0                   1   
4     apple_pie     130               0                   0   
...         ...     ...             ...                 ...   
2318    waffles     410               0                   0   
2319    waffles     430               0                   0   
2320    waffles     450               0                   0   
2321    waffles     470               0                   0   
2322    waffles     490               0                   0   

      heart_disease_label  kidney_disease_label  
0                       1                     1  
1                       1                     1  
2                       0                     1  
3                       0                  

In [30]:
scaled_df.head(30)

Unnamed: 0,label,weight,calories,protein,carbohydrates,fats,fiber,sugars,sodium,diabetes_label,hypertension_label,heart_disease_label,kidney_disease_label
0,apple_pie,50,150.0,2.0,23.0,7.0,2.0,10.0,75.0,0,1,1,1
1,apple_pie,70,210.0,2.0,32.0,9.0,2.0,14.0,105.0,0,1,1,1
2,apple_pie,90,270.0,3.0,41.0,12.0,3.0,18.0,135.0,0,1,0,1
3,apple_pie,110,330.0,3.0,50.0,14.0,3.0,22.0,165.0,0,1,0,1
4,apple_pie,130,390.0,4.0,59.0,17.0,4.0,26.0,195.0,0,0,0,1
5,apple_pie,150,450.0,4.0,68.0,19.0,4.0,30.0,225.0,0,0,0,1
6,apple_pie,170,510.0,5.0,77.0,22.0,5.0,34.0,255.0,0,0,0,1
7,apple_pie,190,570.0,5.0,86.0,24.0,5.0,38.0,285.0,0,0,0,1
8,apple_pie,210,630.0,6.0,95.0,27.0,6.0,42.0,315.0,0,0,0,0
9,apple_pie,230,690.0,6.0,104.0,29.0,6.0,46.0,345.0,0,0,0,0


In [31]:
from sklearn.preprocessing import LabelEncoder

# Initialize the label encoder
label_encoder = LabelEncoder()

# Encode the 'label' column
scaled_df["label_encoded"] = label_encoder.fit_transform(scaled_df["label"])

print(scaled_df[["label", "label_encoded"]])

          label  label_encoded
0     apple_pie              7
1     apple_pie              7
2     apple_pie              7
3     apple_pie              7
4     apple_pie              7
...         ...            ...
2318    waffles            100
2319    waffles            100
2320    waffles            100
2321    waffles            100
2322    waffles            100

[2323 rows x 2 columns]


# Train a Machine Learning model

In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

In [33]:

features = ["weight", "calories", "protein", "carbohydrates", "fats", "fiber", "sugars", "sodium", "label_encoded"]


targets = ["diabetes_label", "hypertension_label", "heart_disease_label", "kidney_disease_label"]


X = scaled_df[features]
y = scaled_df[targets]  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [34]:

base_model = RandomForestClassifier(random_state=42)


multi_output_model = MultiOutputClassifier(base_model)


multi_output_model.fit(X_train, y_train)

In [35]:

y_pred = multi_output_model.predict(X_test)


y_pred_df = pd.DataFrame(y_pred, columns=targets)


for i, target in enumerate(targets):
    print(f"Results for {target}:")
    print(f"Accuracy: {accuracy_score(y_test.iloc[:, i], y_pred_df.iloc[:, i])}")
    print(classification_report(y_test.iloc[:, i], y_pred_df.iloc[:, i]))
    print("-" * 50)

Results for diabetes_label:
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       289
           1       1.00      1.00      1.00       176

    accuracy                           1.00       465
   macro avg       1.00      1.00      1.00       465
weighted avg       1.00      1.00      1.00       465

--------------------------------------------------
Results for hypertension_label:
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       364
           1       1.00      1.00      1.00       101

    accuracy                           1.00       465
   macro avg       1.00      1.00      1.00       465
weighted avg       1.00      1.00      1.00       465

--------------------------------------------------
Results for heart_disease_label:
Accuracy: 0.9978494623655914
              precision    recall  f1-score   support

           0       1.00      1.00      

In [36]:
# Display feature importance for each target variable
for i, target in enumerate(targets):
    print(f"Feature Importance for {target}:")
    feature_importances = pd.DataFrame({
        "Feature": features,
        "Importance": multi_output_model.estimators_[i].feature_importances_
    }).sort_values(by="Importance", ascending=False)
    print(feature_importances)
    print("-" * 50)

Feature Importance for diabetes_label:
         Feature  Importance
3  carbohydrates    0.551822
6         sugars    0.226195
5          fiber    0.082086
1       calories    0.067424
2        protein    0.022078
7         sodium    0.015801
4           fats    0.014709
0         weight    0.013192
8  label_encoded    0.006693
--------------------------------------------------
Feature Importance for hypertension_label:
         Feature  Importance
1       calories    0.329762
0         weight    0.217403
4           fats    0.192378
7         sodium    0.172978
2        protein    0.028883
3  carbohydrates    0.022305
5          fiber    0.016716
6         sugars    0.012478
8  label_encoded    0.007098
--------------------------------------------------
Feature Importance for heart_disease_label:
         Feature  Importance
4           fats    0.363479
1       calories    0.291915
0         weight    0.140905
7         sodium    0.104017
2        protein    0.029854
3  carbohydrates  

In [37]:
# New data for prediction
new_data = pd.DataFrame({
    "label": ["apple_pie"],  # Categorical column
    "weight": [700],
    "calories": [2100],
    "protein": [17.5],
    "carbohydrates": [315.0],
    "fats": [87.5],
    "fiber": [17.5],
    "sugars": [140.0],
    "sodium": [1050.0]
})

In [38]:
scaled_df.head(3)

Unnamed: 0,label,weight,calories,protein,carbohydrates,fats,fiber,sugars,sodium,diabetes_label,hypertension_label,heart_disease_label,kidney_disease_label,label_encoded
0,apple_pie,50,150.0,2.0,23.0,7.0,2.0,10.0,75.0,0,1,1,1,7
1,apple_pie,70,210.0,2.0,32.0,9.0,2.0,14.0,105.0,0,1,1,1,7
2,apple_pie,90,270.0,3.0,41.0,12.0,3.0,18.0,135.0,0,1,0,1,7


In [39]:
new_data

Unnamed: 0,label,weight,calories,protein,carbohydrates,fats,fiber,sugars,sodium
0,apple_pie,700,2100,17.5,315.0,87.5,17.5,140.0,1050.0


In [40]:
# Encode the 'label' column using the same LabelEncoder
new_data["label_encoded"] = label_encoder.transform(new_data["label"])

In [41]:
features = ["weight", "calories", "protein", "carbohydrates", "fats", "fiber", "sugars", "sodium", "label_encoded"]
X_new = new_data[features]

In [42]:
# Predict the target variables for the new data
y_pred = multi_output_model.predict(X_new)

# Convert predictions to a DataFrame for better readability
predictions = pd.DataFrame(y_pred, columns=["diabetes_label", "hypertension_label", "heart_disease_label", "kidney_disease_label"])

print(predictions)

   diabetes_label  hypertension_label  heart_disease_label  \
0               0                   0                    0   

   kidney_disease_label  
0                     0  


In [44]:
import pickle
with open("/kaggle/working/random_forest_model.pkl", "wb") as file:
    pickle.dump(multi_output_model, file)

In [45]:
with open(r"/kaggle/working/label_encoder.pkl", "wb") as file:
    pickle.dump(label_encoder, file)

In [50]:
first_occurrence = df.groupby("label").first().reset_index()
print("First Occurrence of Each Label:")
print(first_occurrence)

First Occurrence of Each Label:
                 label  weight  calories  protein  carbohydrates  fats  fiber  \
0             Cupcakes      50       200        3             25    10      1   
1       Fish and Chips     200       500       20             40    25      3   
2       Grilled Cheese     150       400       15             30    20      2   
3    Hot and Sour Soup     200       150        8             15     5      2   
4         Lobster Roll     200       400       20             30    20      2   
..                 ...     ...       ...      ...            ...   ...    ...   
96               tacos     100       250       10             20    12      3   
97            takoyaki     100       200        8             25     8      1   
98            tiramisu     100       300        6             30    15      1   
99        tuna_tartare     100       150       20              2     5      0   
100            waffles     100       300        6             40    12      2

In [51]:

first_occurrence = df.groupby("label").first().reset_index()

with open("/kaggle/working/unique_nutri_data.pkl", "wb") as file:
    pickle.dump(first_occurrence, file)


In [3]:
from openai import OpenAI

client = OpenAI(
    base_url="https://models.inference.ai.azure.com",
    api_key='',
)

response = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "",
        },
        {
            "role": "user",
            "content": "label	weight	calories	protein	carbohydrates	fats	fiber	sugars	sodium	apple_pie	700	2100	17.5	315.0	87.5	17.5	140.0	1050.0 can a patient with pre existing diseases like diabetes, hypertension,heart disease and kidney disease can eat this food item ,what can be the implications. summarize it",
        }
    ],
    model="gpt-4o",
    temperature=1,
    max_tokens=4096,
    top_p=1
)

print(response.choices[0].message.content)

The food item "apple pie" with the given nutritional information is high in calories (2100 kcal), carbohydrates (315 g), sugars (140 g), fats (87.5 g), and sodium (1050 mg). This makes it generally unsuitable for individuals with pre-existing conditions like diabetes, hypertension, heart disease, and kidney disease. Here are the potential implications for each condition:

1. **Diabetes**: The high sugar and carbohydrate content can cause significant blood sugar spikes, worsening blood glucose control.

2. **Hypertension**: The high sodium content (1050 mg) can increase blood pressure, putting strain on the cardiovascular system.

3. **Heart Disease**: The high fat content (87.5 g), possibly including unhealthy saturated and trans fats, can contribute to worsening cholesterol levels and cardiovascular risk.

4. **Kidney Disease**: The elevated sodium level can strain the kidneys and worsen fluid retention, blood pressure, and overall kidney function.

### Summary:
For patients with diab