<a href="https://colab.research.google.com/github/lekh-ch/Training-models/blob/main/NCF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
pip install pandas tensorflow numpy scikit-learn




In [3]:
import pandas as pd

# Load dataset from CSV
df = pd.read_csv("/content/nutrition.csv")

# Display the first few rows
print(df.head())


   Unnamed: 0             name serving_size  calories total_fat saturated_fat  \
0           0       Cornstarch        100 g       381      0.1g           NaN   
1           1     Nuts, pecans        100 g       691       72g          6.2g   
2           2    Eggplant, raw        100 g        25      0.2g           NaN   
3           3   Teff, uncooked        100 g       367      2.4g          0.4g   
4           4  Sherbet, orange        100 g       144        2g          1.2g   

  cholesterol    sodium  choline     folate  ...      fat  \
0           0   9.00 mg   0.4 mg   0.00 mcg  ...   0.05 g   
1           0   0.00 mg  40.5 mg  22.00 mcg  ...  71.97 g   
2           0   2.00 mg   6.9 mg  22.00 mcg  ...   0.18 g   
3           0  12.00 mg  13.1 mg          0  ...   2.38 g   
4         1mg  46.00 mg   7.7 mg   4.00 mcg  ...   2.00 g   

  saturated_fatty_acids monounsaturated_fatty_acids  \
0               0.009 g                     0.016 g   
1               6.180 g             

In [4]:
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Ensure necessary columns exist
required_columns = ['name', 'serving_size', 'calories']
assert all(col in df.columns for col in required_columns), "Dataset is missing required columns!"

# Encode food names as numerical IDs
food_encoder = LabelEncoder()
df['food_id'] = food_encoder.fit_transform(df['name'])

# Simulated User IDs (Random for now, replace with actual data if available)
df['user_id'] = np.random.randint(0, 1000, df.shape[0])  # Assuming 1000 users

# Implicit feedback (Assume 1 if consumed, change if explicit ratings are available)
df['rating'] = 1


In [5]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense
from tensorflow.keras.models import Model

num_users = df['user_id'].nunique()
num_food_items = df['food_id'].nunique()

# Define Input Layers
user_input = Input(shape=(1,))
food_input = Input(shape=(1,))

# Embedding Layers
user_embedding = Embedding(input_dim=num_users, output_dim=32)(user_input)
food_embedding = Embedding(input_dim=num_food_items, output_dim=32)(food_input)

# Flatten Layers
user_vec = Flatten()(user_embedding)
food_vec = Flatten()(food_embedding)

# Concatenate User & Food Embeddings
concat = Concatenate()([user_vec, food_vec])

# Fully Connected Layers
fc1 = Dense(128, activation='relu')(concat)
fc2 = Dense(64, activation='relu')(fc1)
fc3 = Dense(32, activation='relu')(fc2)
output = Dense(1, activation='sigmoid')(fc3)  # Predict whether food is heart-healthy

# Compile Model
model = Model(inputs=[user_input, food_input], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()


In [6]:
# Prepare Data for Training
user_ids = df['user_id'].values
food_ids = df['food_id'].values
labels = df['rating'].values  # All 1s (Implicit feedback)

# Train Model
model.fit([user_ids, food_ids], labels, epochs=10, batch_size=32)


Epoch 1/10
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.9607 - loss: 0.2436
Epoch 2/10
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 1.0000 - loss: 8.0968e-05
Epoch 3/10
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 1.0000 - loss: 2.6367e-05
Epoch 4/10
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 1.0000 - loss: 1.2591e-05
Epoch 5/10
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 1.0000 - loss: 7.1611e-06
Epoch 6/10
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 1.0000 - loss: 4.5941e-06
Epoch 7/10
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 1.0000 - loss: 3.1023e-06
Epoch 8/10
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 1.0000 - loss: 2.2316e-06
Epoch 9/10


<keras.src.callbacks.history.History at 0x7a8299cc4b50>

In [7]:
def recommend_foods(user_id, top_n=5):
    food_ids = np.array(range(num_food_items))
    scores = model.predict([np.array([user_id] * num_food_items), food_ids])

    # Sort food items by predicted score
    food_scores = list(zip(food_ids, scores.flatten()))
    food_scores.sort(key=lambda x: x[1], reverse=True)

    # Get top N recommended foods
    recommended_foods = [food_encoder.inverse_transform([food_id])[0] for food_id, _ in food_scores[:top_n]]
    return recommended_foods

# Get recommendations for a sample user
print(recommend_foods(user_id=10))


[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
['Fish, dry heat, cooked, tilapia', 'Cereals ready-to-eat, HONEY GRAHAM OH!S, QUAKER', 'Babyfood, strained, bananas and pineapple with tapioca, fruit', 'Beverages, with aspartame, low calorie, dry powdered mix, fruit-flavored drink', 'Fish, dry heat, cooked, mixed species, yellowtail']


In [14]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense
from tensorflow.keras.models import Model
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("/content/nutrition.csv")

# Ensure necessary columns exist
required_columns = ['name', 'serving_size', 'calories']
assert all(col in df.columns for col in required_columns), "Dataset is missing required columns!"

# Step 1: Filter Heart-Healthy Foods
def is_heart_healthy(food_name):
    """Returns True if the food is heart-healthy, False otherwise."""
    unhealthy_keywords = [
        'alcohol', 'beer', 'wine', 'whiskey', 'vodka', 'tequila',  # Alcoholic drinks
        'beef', 'pork', 'bacon', 'sausage', 'fried', 'burger',      # Red/processed meat
        'candy', 'cookies', 'cake', 'soda', 'syrup', 'sugar',       # Sugary foods
        'butter', 'cheese', 'cream', 'mayonnaise',                  # High-fat dairy
        'chips', 'crackers', 'fast food', 'processed'               # Processed/junk food
    ]

    return not any(keyword in food_name.lower() for keyword in unhealthy_keywords)

# Apply filter
df = df[df['name'].apply(is_heart_healthy)]

# Remove missing values
df = df.dropna()

# Encode food names as numerical IDs (starting from 0)
food_encoder = LabelEncoder()
df['food_id'] = food_encoder.fit_transform(df['name'])

# Ensure User IDs start from 0
df['user_id'] = np.random.randint(0, 1000, df.shape[0])  # Assign random user IDs
df['user_id'] -= df['user_id'].min()  # Normalize user IDs to start from 0

# Assign implicit rating (1 = liked/healthy)
df['rating'] = 1

# Ensure Data Types Are Correct
df['user_id'] = df['user_id'].astype(int)
df['food_id'] = df['food_id'].astype(int)
df['rating'] = df['rating'].astype(float)

# Define User & Food Counts (Adding +1 to prevent indexing errors)
num_users = df['user_id'].max() + 1  # Max ID + 1 ensures all IDs fit within the range
num_food_items = df['food_id'].max() + 1  # Max ID + 1 ensures all IDs fit within range

# Step 2: Check Shapes Before Training
print(f"Total Unique Users: {num_users}")
print(f"Total Unique Foods: {num_food_items}")
print(f"Dataset Size: {df.shape}")

# Build Neural Collaborative Filtering Model
user_input = Input(shape=(1,))
food_input = Input(shape=(1,))

user_embedding = Embedding(input_dim=num_users, output_dim=32)(user_input)
food_embedding = Embedding(input_dim=num_food_items, output_dim=32)(food_input)

user_vec = Flatten()(user_embedding)
food_vec = Flatten()(food_embedding)

concat = Concatenate()([user_vec, food_vec])

fc1 = Dense(128, activation='relu')(concat)
fc2 = Dense(64, activation='relu')(fc1)
fc3 = Dense(32, activation='relu')(fc2)
output = Dense(1, activation='sigmoid')(fc3)  # Predict whether the food is heart-healthy

# Compile Model
model = Model(inputs=[user_input, food_input], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 3: Prepare Data for Training
user_ids = df['user_id'].values
food_ids = df['food_id'].values
labels = df['rating'].values.reshape(-1, 1)  # Ensure labels are correctly shaped

# Final Check Before Training
print(f"Final User IDs Shape: {user_ids.shape}")
print(f"Final Food IDs Shape: {food_ids.shape}")
print(f"Final Labels Shape: {labels.shape}")

# Train Model (Batch Size Adjusted)
model.fit([user_ids, food_ids], labels, epochs=10, batch_size=16)

# -------------------------------
# 🍽️ Generate 7-Day Meal Plan (Only Heart-Healthy Foods)
# -------------------------------
def recommend_foods(user_id, top_n=28):
    """Generates top heart-healthy food recommendations."""
    food_ids = np.array(range(num_food_items))
    scores = model.predict([np.array([user_id] * num_food_items), food_ids])

    # Sort foods by predicted score
    food_scores = list(zip(food_ids, scores.flatten()))
    food_scores.sort(key=lambda x: x[1], reverse=True)

    # Get top recommended foods
    recommended_foods = [food_encoder.inverse_transform([food_id])[0] for food_id, _ in food_scores[:top_n]]
    return recommended_foods

def generate_meal_plan(user_id):
    """Creates a 7-day meal plan with breakfast, lunch, snack, and dinner."""
    foods = recommend_foods(user_id, top_n=28)  # 7 days * 4 meals/day

    # Split into meal categories
    meal_plan = {
        "Day 1": {"Breakfast": foods[0], "Lunch": foods[1], "Snack": foods[2], "Dinner": foods[3]},
        "Day 2": {"Breakfast": foods[4], "Lunch": foods[5], "Snack": foods[6], "Dinner": foods[7]},
        "Day 3": {"Breakfast": foods[8], "Lunch": foods[9], "Snack": foods[10], "Dinner": foods[11]},
        "Day 4": {"Breakfast": foods[12], "Lunch": foods[13], "Snack": foods[14], "Dinner": foods[15]},
        "Day 5": {"Breakfast": foods[16], "Lunch": foods[17], "Snack": foods[18], "Dinner": foods[19]},
        "Day 6": {"Breakfast": foods[20], "Lunch": foods[21], "Snack": foods[22], "Dinner": foods[23]},
        "Day 7": {"Breakfast": foods[24], "Lunch": foods[25], "Snack": foods[26], "Dinner": foods[27]},
    }

    return meal_plan

# Example: Get meal plan for a sample user
user_id = 10  # Change as needed
meal_plan = generate_meal_plan(user_id)

# Print Meal Plan
for day, meals in meal_plan.items():
    print(f"\n{day}")
    for meal_type, food in meals.items():
        print(f"  {meal_type}: {food}")


Total Unique Users: 1000
Total Unique Foods: 4163
Dataset Size: (4163, 80)
Final User IDs Shape: (4163,)
Final Food IDs Shape: (4163,)
Final Labels Shape: (4163, 1)
Epoch 1/10
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9843 - loss: 0.2109
Epoch 2/10
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 1.0000 - loss: 7.9962e-05
Epoch 3/10
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 1.0000 - loss: 2.8166e-05
Epoch 4/10
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 1.0000 - loss: 1.4118e-05
Epoch 5/10
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 1.0000 - loss: 8.2831e-06
Epoch 6/10
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 1.0000 - loss: 5.4708e-06
Epoch 7/10
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accurac