In [41]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout

# 1- Data Preparation

In [42]:
# Load data

file_path_users = 'C:/Users/DELL/Desktop/backend-nutrition-count/recommendation_system_dataset/user_Profiles.csv'
user_profiles = pd.read_csv(file_path_users )
print(user_profiles.head())
file_path_items = 'C:/Users/DELL/Desktop/backend-nutrition-count/recommendation_system_dataset/dataset.csv'
items = pd.read_csv(file_path_items)
print(items.head())
file_path_iteractions ='C:/Users/DELL/Desktop/backend-nutrition-count/recommendation_system_dataset/recent_activity.csv'
interactions = pd.read_csv(file_path_iteractions)
print(interactions.head())

  User_Id  Veg_Non   Nutrient  \
0  User_1  non-veg   chloride   
1  User_2      veg   chloride   
2  User_3      veg  magnesium   
3  User_4      veg  vitamin_e   
4  User_5  non-veg  vitamin_c   

                                             Disease  \
0     anemia kidney_disease goitre rickets pregnancy   
1                                             goitre   
2    cancer hypertension goitre heart_disease scurvy   
3   cancer kidney_disease obesity anemia heart_di...   
4     anemia kidney_disease goitre rickets pregnancy   

                                                Diet  
0   type_a_diet high_protien_diet low_carb_diet l...  
1                         high_fiber_diet vegan_diet  
2   high_fiber_diet ketogenic_diet high_protien_diet  
3   high_fiber_diet ketogenic_diet gluten_free_di...  
4   type_a_diet high_protien_diet low_carb_diet l...  
    Meal_Id                        Name catagory  \
0  meal_id1         summer squash salad    salad   
1  meal_id2        chicken min

In [43]:

# Encode categorical features
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()
gender_encoder = LabelEncoder()
category_encoder = LabelEncoder()
disease_encoder = LabelEncoder()
veg_encoder = LabelEncoder()
nutrient_encoder = LabelEncoder()
price_encoder = LabelEncoder()
# Ensure both columns are of the same type (e.g., strings)
interactions['User_Id'] = interactions['User_Id'].astype(str)
user_profiles['User_Id'] = user_profiles['User_Id'].astype(str)
interactions['Meal_Id'] = interactions['Meal_Id'].astype(str)
items['Meal_Id'] = items['Meal_Id'].astype(str)
# Combine unique user IDs from both DataFrames
combined_user_ids = pd.concat([interactions['User_Id'], user_profiles['User_Id']]).unique()
user_encoder.fit(combined_user_ids)
combined_meal_ids = pd.concat([items['Meal_Id'] ,interactions['Meal_Id']]).unique()
item_encoder.fit(combined_meal_ids)
# encoding
interactions['User_Id'] = user_encoder.transform(interactions['User_Id'])
interactions['Meal_Id'] = item_encoder.transform(interactions['Meal_Id'])
user_profiles['User_Id'] = user_encoder.transform(user_profiles['User_Id'])
user_profiles['Disease'] = disease_encoder.fit_transform(user_profiles['Disease'])
user_profiles['Veg_Non'] = veg_encoder.fit_transform(user_profiles['Veg_Non'])
user_profiles['Nutrient'] = nutrient_encoder.fit_transform(user_profiles['Nutrient'])
items['Meal_Id'] = item_encoder.transform(items['Meal_Id'])
items['Price'] = price_encoder.fit_transform(items['Price'])
items['catagory'] = category_encoder.fit_transform(items['catagory'])

# Split data
train, test = train_test_split(interactions, test_size=0.2, random_state=42)

# 2- Build Neural Collaborative Filtering (NCF) Model

In [44]:

# Parameters
num_users = len(user_encoder.classes_)
num_items = len(item_encoder.classes_)
embedding_dim = 50

# Inputs
user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')

# Embeddings
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim, name='user_embedding')(user_input)
item_embedding = Embedding(input_dim=num_items, output_dim=embedding_dim, name='item_embedding')(item_input)

# Flatten
user_vector = Flatten()(user_embedding)
item_vector = Flatten()(item_embedding)

# Concatenate
concat = Concatenate()([user_vector, item_vector])

# Fully connected layers
dense = Dense(128, activation='relu')(concat)
output = Dense(1)(dense)

# Model
ncf_model = Model(inputs=[user_input, item_input], outputs=output)
ncf_model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
ncf_model.fit([train['User_Id'], train['Meal_Id']], train['Rated'], epochs=5, batch_size=32, validation_split=0.2)


Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 0.2730 - val_loss: 0.3748
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - loss: 0.2587 - val_loss: 0.3675
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - loss: 0.2455 - val_loss: 0.3605
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 0.2333 - val_loss: 0.3538
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - loss: 0.2218 - val_loss: 0.3475


<keras.src.callbacks.history.History at 0x1cf0c714970>

#  Build Content-Based Model

In [45]:
# Inputs
user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')
veg_input = Input(shape=(1,), name='veg_input')
nutrient_input = Input(shape=(1,), name='nutrient_input')
category_input = Input(shape=(1,), name='category_input')
price_input = Input(shape=(1,), name='price_input')
disease_input = Input(shape=(1,), name='disease_input')

# Embeddings
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim, name='user_embedding')(user_input)
item_embedding = Embedding(input_dim=num_items, output_dim=embedding_dim, name='item_embedding')(item_input)
nutrient_embedding = Embedding(input_dim=len(nutrient_encoder.classes_), output_dim=10, name='nutrient_embedding')(nutrient_input)
category_embedding = Embedding(input_dim=len(category_encoder.classes_), output_dim=10, name='category_embedding')(category_input)
veg_embedding = Embedding(input_dim=len(veg_encoder.classes_), output_dim=10, name='veg_embedding')(veg_input)
price_embedding = Embedding(input_dim=len(price_encoder.classes_), output_dim=10, name='price_embedding')(price_input)
disease_embedding = Embedding(input_dim=len(disease_encoder.classes_), output_dim=10, name='disease_embedding')(disease_input)

# Flatten
user_vector = Flatten()(user_embedding)
item_vector = Flatten()(item_embedding)
nutrient_vector = Flatten()(nutrient_embedding)
category_vector = Flatten()(category_embedding)
veg_vector = Flatten()(veg_embedding)
price_vector = Flatten()(price_embedding)
disease_vector = Flatten()(disease_embedding)

# Concatenate all features
concat = Concatenate()([user_vector, item_vector, nutrient_vector, category_vector, price_vector, veg_vector, disease_vector])

# Fully connected layers
dense = Dense(128, activation='relu')(concat)
dropout = Dropout(0.5)(dense)
output = Dense(1)(dropout)

# Model
content_model = Model(inputs=[user_input, item_input, nutrient_input, category_input, price_input, veg_input, disease_input], outputs=output)
content_model.compile(optimizer='adam', loss='mean_squared_error')

# Prepare input data
train_user_profiles = user_profiles.set_index('User_Id').loc[train['User_Id']].reset_index()
train_item_descriptions = items.set_index('Meal_Id').loc[train['Meal_Id']].reset_index()

# Train the model
content_model.fit(
    [train['User_Id'], train['Meal_Id'], train_user_profiles['Nutrient'], train_user_profiles['Veg_Non'], train_user_profiles['Disease'],
     train_item_descriptions['catagory'], train_item_descriptions['Price']],
    train['Liked'], epochs=5, batch_size=32, validation_split=0.2)


Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 0.3027 - val_loss: 0.3904
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - loss: 0.2881 - val_loss: 0.3831
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - loss: 0.2712 - val_loss: 0.3763
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - loss: 0.2561 - val_loss: 0.3699
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - loss: 0.2484 - val_loss: 0.3642


<keras.src.callbacks.history.History at 0x1cf11b190a0>

# Hybrid Integration

In [46]:
# Inputs
user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')
veg_input = Input(shape=(1,), name='veg_input')
nutrient_input = Input(shape=(1,), name='nutrient_input')
category_input = Input(shape=(1,), name='category_input')
price_input = Input(shape=(1,), name='price_input')
disease_input = Input(shape=(1,), name='disease_input')



# Embeddings
user_embedding_ncf = Embedding(input_dim=num_users, output_dim=embedding_dim, name='user_embedding_ncf')(user_input)
item_embedding_ncf = Embedding(input_dim=num_items, output_dim=embedding_dim, name='item_embedding_ncf')(item_input)
user_vector_ncf = Flatten()(user_embedding_ncf)
item_vector_ncf = Flatten()(item_embedding_ncf)
concat_ncf = Concatenate()([user_vector_ncf, item_vector_ncf])
dense_ncf = Dense(128, activation='relu')(concat_ncf)




# Content-based embeddings

user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim, name='user_embedding')(user_input)
item_embedding = Embedding(input_dim=num_items, output_dim=embedding_dim, name='item_embedding')(item_input)
nutrient_embedding = Embedding(input_dim=len(nutrient_encoder.classes_), output_dim=10, name='nutrient_embedding')(nutrient_input)
category_embedding = Embedding(input_dim=len(category_encoder.classes_), output_dim=10, name='category_embedding')(category_input)
veg_embedding = Embedding(input_dim=len(veg_encoder.classes_), output_dim=10, name='veg_embedding')(veg_input)
price_embedding = Embedding(input_dim=len(price_encoder.classes_), output_dim=10, name='price_embedding')(price_input)
disease_embedding = Embedding(input_dim=len(disease_encoder.classes_), output_dim=10, name='disease_embedding')(disease_input)
user_vector = Flatten()(user_embedding)
item_vector = Flatten()(item_embedding)
nutrient_vector = Flatten()(nutrient_embedding)
category_vector = Flatten()(category_embedding)
veg_vector = Flatten()(veg_embedding)
price_vector = Flatten()(price_embedding)
disease_vector = Flatten()(disease_embedding)
concat_content = Concatenate()([user_vector, item_vector, nutrient_vector, category_vector, price_vector, veg_vector, disease_vector])
dense_content = Dense(128, activation='relu')(concat_content)

# Combine NCF and content-based models
combined = Concatenate()([dense_ncf, dense_content])
output = Dense(1)(combined)

# Hybrid model
hybrid_model =  Model(inputs=[user_input, item_input, nutrient_input, category_input, price_input, veg_input, disease_input], outputs=output)
hybrid_model.compile(optimizer='adam', loss='mean_squared_error')

# Train the hybrid model
hybrid_model.fit(
    [train['User_Id'], train['Meal_Id'], train_user_profiles['Nutrient'], train_user_profiles['Veg_Non'], train_user_profiles['Disease'],
     train_item_descriptions['catagory'], train_item_descriptions['Price']],
    train['Liked'], epochs=5, batch_size=32, validation_split=0.2)


Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 0.3004 - val_loss: 0.3735
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.2714 - val_loss: 0.3626
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - loss: 0.2450 - val_loss: 0.3524
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - loss: 0.2210 - val_loss: 0.3430
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - loss: 0.1987 - val_loss: 0.3343


<keras.src.callbacks.history.History at 0x1cf14115bb0>

# Evaluation

In [47]:
# Prepare test data
train_user_profiles = user_profiles.set_index('User_Id').loc[train['User_Id']].reset_index()
train_item_descriptions = items.set_index('Meal_Id').loc[train['Meal_Id']].reset_index()
# Evaluate the hybrid model
test_loss = hybrid_model.fit(
    [train['User_Id'], train['Meal_Id'], train_user_profiles['Nutrient'], train_user_profiles['Veg_Non'], train_user_profiles['Disease'],
     train_item_descriptions['catagory'], train_item_descriptions['Price']],
    train['Liked'], epochs=5, batch_size=32, validation_split=0.2)
print(f'Test Loss: {test_loss}')

Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step - loss: 0.1780 - val_loss: 0.3265
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.1586 - val_loss: 0.3194
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - loss: 0.1404 - val_loss: 0.3129
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - loss: 0.1232 - val_loss: 0.3072
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - loss: 0.1070 - val_loss: 0.3022
Test Loss: <keras.src.callbacks.history.History object at 0x000001CF14105070>


In [73]:
# Example single data point from unseen data
single_example = {
    'User_Id': 'User_4',  # Replace with actual user ID
    'Meal_Id': 'Meal_2',  # Replace with actual meal ID
    'Nutrient': 'Protein',  # Replace with actual nutrient info
    'Veg_Non': 'Non-Veg',  # Replace with actual veg/non-veg info
    'Disease': 'Diabetes',  # Replace with actual disease info
    'catagory': 'Main Course',  # Replace with actual category
    'Price': 10.99  # Replace with actual price
}

def encode_if_exists(encoder, value):
    if value in encoder.classes_:
        return encoder.transform([value])
    else:
        return np.array([len(encoder.classes_)])  # Handle unknown with a special index

# Encode the single data point
user_id_encoded = encode_if_exists(user_encoder, single_example['User_Id'])
meal_id_encoded = encode_if_exists(item_encoder, single_example['Meal_Id'])
nutrient_encoded = encode_if_exists(nutrient_encoder, single_example['Nutrient'])
veg_non_encoded = encode_if_exists(veg_encoder, single_example['Veg_Non'])
disease_encoded = encode_if_exists(disease_encoder, single_example['Disease'])
category_encoded = encode_if_exists(category_encoder, single_example['catagory'])
price_encoded = np.array([single_example['Price']])

# Predict using the model
single_prediction = content_model.predict([
    user_id_encoded,
    meal_id_encoded,
    nutrient_encoded,
    veg_non_encoded,
    disease_encoded,
    category_encoded,
    price_encoded
])

#print(f"Predicted like for the example: {single_prediction[0][0]}")

# If predicted rating is greater than 0.5, find liked meals
if single_prediction[0][0] < 0.5:
    # Assuming meal_Liked is a list of Meal_Ids that are liked
    meal_Liked = interactions[interactions['Liked'] == 1]['Meal_Id'].tolist()

    print("Meals liked by the user:")
    for meal_id in meal_Liked:
        description = items.loc[items['Meal_Id'] == meal_id, 'description'].values
        if description.size > 0:
            print(f"Meal ID: {meal_id}, Description: {description[0]}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Meals liked by the user:
Meal ID: 276, Description: almonds, eggs, granulated sugar, bittersweet chocolate, unsalted butter, flour, baking powder, castor sugar, icing sugar
Meal ID: 123, Description: apples, milk, sugar, green cardamoms, almonds (blanched), pistachios (blanched)
Meal ID: 200, Description: raw papaya, carrot, french bean diamond, cherry tomato, garlic, crush mix chilli, somtam dressing, peanuts (crushed), peanuts
Meal ID: 256, Description: unsalted butter, brown sugar, chocolate, chocolate chips, eggs, flour, cocoa powder, baking powder
Meal ID: 89, Description: khoya, sweetener (optional), almonds (crushed)
Meal ID: 100, Description: whole wheat flour (atta), refined flour (maida), garlic cloves (crushed), salt, red chilli powder, chaat masala, cumin powder, tomato puree, ghee
Meal ID: 262, Description: जामुन, चीनी, मिर्च, लहसुन की कलियां, जीरा पाउडर, फिश फिलेट, सूजी, नमक, पालक
Meal ID: 236, Descri