<a href="https://colab.research.google.com/github/rishimae/ml_kusinaiready/blob/main/ml_kusinaiready.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Cleaning

## Dish Dataset

In [2]:
import pandas as pd


# Load the dataset from a GitHub raw URL
file_url = 'https://raw.githubusercontent.com/rishimae/ml_kusinaiready/refs/heads/main/dishes_dataset.csv'  # Replace with your actual raw URL
df = pd.read_csv(file_url)


# Clean column names
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')


# Check for missing values
print("Missing values:\n", df.isnull().sum())


# Clean 'dishID': Ensure it's an integer
df['dishid'] = pd.to_numeric(df['dishid'], errors='coerce').astype('Int64')


# Clean 'dishname': Strip whitespace and standardize case
df['dishname'] = df['dishname'].str.strip().str.title()


# Clean 'prep_time': Convert to numeric
df['prep_time'] = pd.to_numeric(df['prep_time'], errors='coerce')


# Clean 'ingre_list': Strip whitespace, sort ingredients, and join
df['ingre_list'] = df['ingre_list'].apply(lambda x: ', '.join(sorted([ingredient.strip() for ingredient in x.split(',')])))


# Clean 'num_servings': Extract lower and upper bounds
df['num_servings'] = df['num_servings'].apply(lambda x: 1 if '2-3' in x else 0)


# Clean 'nutri_guide': Ensure consistent formatting (optional)
df['nutri_guide'] = df['nutri_guide'].str.strip()


# Clean 'skills_needed': Strip whitespace and standardize case
df['skills_needed'] = df['skills_needed'].str.strip().str.title()


# Define unique age groups
unique_age_groups = ['Kids', 'Teens', 'Adults', 'Elders']


# Create binary columns for each age group
for age_group in unique_age_groups:
  df[f'age_{age_group.lower()}'] = df['age_range'].apply(lambda x: 1 if age_group in x else 0)


# Define a list of all possible meal types
all_meal_types = ['Appetizer', 'Soup', 'Vegetable Dishes', 'Vegetable with Seafood', 'Vegetable with Meat', 'Dessert']


# Create binary columns for each meal type
for meal in all_meal_types:
  df[f'meal_{meal.lower().replace(" ", "_")}'] = df['meal_type'].apply(lambda x: 1 if meal in x else 0)


# Drop the original meal_type column if you no longer need it
df.drop(columns=['meal_type'], inplace=True)


# Optionally drop the original 'age_range' and 'meal_type' columns if not needed
df.drop(columns=['age_range'], inplace=True)

# Display the cleaned dataset
print("\nCleaned dataset:\n", df)


# Save the cleaned dataset to a new CSV file
cleaned_file_path = 'cleaned_dishes.csv'  # Specify the desired output file name
df.to_csv(cleaned_file_path, index=False)





Missing values:
 dishid           0
dishname         0
prep_time        0
ingre_list       0
num_servings     0
nutri_guide      0
skills_needed    0
age_range        0
meal_type        0
dtype: int64

Cleaned dataset:
     dishid               dishname  prep_time  \
0      101  Grilled Chicken Salad         30   
1      102    Spaghetti Bolognese         45   
2      103    Vegetarian Stir Fry         20   
3      104             Beef Tacos         25   
4      105      Pancake Breakfast         15   
..     ...                    ...        ...   
57     158         Beef Caldereta         90   
58     159               Salpicao         40   
59     160       Camaron Rebosado         30   
60     161                  Suman         60   
61     162            Arroz Caldo         60   

                                           ingre_list  num_servings  \
0    Chicken breast, Lettuce, Olive oil, Pepper, Salt             0   
1   Garlic, Ground beef, Onion, Spaghetti, Tomato ...        

## User Dataset

In [9]:
# Load the dataset from a GitHub raw URL
file_url = 'https://raw.githubusercontent.com/rishimae/ml_kusinaiready/refs/heads/main/users_dataset.csv'  # Replace with your actual raw URL
df = pd.read_csv(file_url)

# Clean column names
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

# Check for missing values
print("Missing values:\n", df.isnull().sum())

# Clean 'userID': Ensure it's an integer
df['userid'] = pd.to_numeric(df['userid'], errors='coerce').astype('Int64')

# Convert 'family_size' to binary (1 for 2-3, 0 otherwise)
df['family_size'] = df['family_size'].apply(lambda x: 1 if '2-3' in x else 0)

# Clean 'cooking_skills': Strip whitespace and standardize case
df['cooking_skills'] = df['cooking_skills'].str.strip().str.lower().str.title()

# Define unique age groups for creating binary columns
unique_age_groups = ['Kids', 'Teens', 'Adults', 'Elders']

# Create binary columns for each age group
for age_group in unique_age_groups:
    df[f'age_{age_group.lower()}'] = df['age_range'].apply(lambda x: 1 if age_group in x else 0)

# Define a list of all possible meal preferences
all_meal_types = ['Appetizer', 'Soup', 'Vegetable Dishes', 'Vegetable with Seafood', 'Vegetable with Meat', 'Dessert']

# Create binary columns for each meal type preference
for meal in all_meal_types:
    df[f'preference_{meal.lower().replace(" ", "_")}'] = df['meal_preferences'].apply(lambda x: 1 if meal in x else 0)

# Clean 'allergies': Normalize case and remove whitespace
df['allergies'] = df['allergies'].str.strip().str.lower().str.title()

# Drop the original 'age_range' and 'meal_preferences' columns if not needed
df.drop(columns=['age_range', 'meal_preferences'], inplace=True)

# Display the cleaned dataset
print("\nCleaned dataset:\n", df)

# Save the cleaned dataset to a new CSV file
cleaned_file_path = 'cleaned_users_dataset.csv'  # Specify the desired output file name
df.to_csv(cleaned_file_path, index=False)

# Output the path of the cleaned file
print(f"Cleaned data saved to: {cleaned_file_path}")

Missing values:
 userid              0
family_size         0
cooking_skills      0
age_range           0
meal_preferences    0
allergies           2
dtype: int64

Cleaned dataset:
     userid  family_size cooking_skills                     allergies  \
0        1            1       Beginner               Peanut, Chicken   
1        2            1       Advanced              Egg, Soy, Peanut   
2        3            0   Intermediate  Shellfish, Chicken, Soy, Egg   
3        4            1       Beginner       Meat, Chicken, Soy, Egg   
4        5            0   Intermediate                           NaN   
5        6            1       Beginner               Peanut, Chicken   
6        7            0   Intermediate               Shellfish, Milk   
7        8            1       Advanced                           NaN   
8        9            0   Intermediate              Dairy, Egg, Milk   
9       10            1       Beginner                        Gluten   
10      11            0    

# Data Preparation

## with Content-based Filtering

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

# Load cleaned user and dish datasets
users_df = pd.read_csv('cleaned_users_dataset.csv')
dishes_df = pd.read_csv('cleaned_dishes.csv')

# Step 1: Encode categorical variables
label_encoder = LabelEncoder()
users_df['family_size_encoded'] = label_encoder.fit_transform(users_df['family_size'])
users_df['cooking_skills_encoded'] = label_encoder.fit_transform(users_df['cooking_skills'])

# Drop original non-numeric columns
user_features = users_df.drop(columns=['userid', 'cooking_skills', 'allergies', 'family_size'])

# Drop non-numeric columns from dish features
dish_features = dishes_df.drop(columns=['dishid', 'dishname', 'ingre_list', 'nutri_guide'])

# Convert categorical features in dish_features to numeric using one-hot encoding
dish_features = pd.get_dummies(dish_features, drop_first=True)

# Step 2: Align Columns in Both Feature Matrices
# Print columns to identify mismatches
print("User Feature Columns:\n", user_features.columns)
print("\nDish Feature Columns:\n", dish_features.columns)

# Get the complete list of columns in both matrices
all_columns = set(user_features.columns).union(set(dish_features.columns))

# Add missing columns to both matrices and fill with zeros
for col in all_columns:
    if col not in user_features.columns:
        user_features[col] = 0
    if col not in dish_features.columns:
        dish_features[col] = 0

# Ensure the same column order in both matrices
user_features = user_features[sorted(all_columns)]
dish_features = dish_features[sorted(all_columns)]

# Step 3: Compute similarity between users and dishes
similarity_matrix = cosine_similarity(user_features, dish_features)

# Step 4: Get top N recommendations per user based on similarity
def get_top_n_recommendations(user_id, top_n=5):
    user_index = users_df[users_df['userid'] == user_id].index[0]
    top_n_indices = similarity_matrix[user_index].argsort()[-top_n:][::-1]
    return dishes_df.iloc[top_n_indices]

# Example: Get recommendations for user 1
print("Top 5 Recommendations for User 1:\n", get_top_n_recommendations(user_id=1))


In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

# Load cleaned user and dish datasets
users_df = pd.read_csv('cleaned_users_dataset.csv')
dishes_df = pd.read_csv('cleaned_dishes.csv')

# Step 1: Encode categorical variables
label_encoder = LabelEncoder()
users_df['family_size_encoded'] = label_encoder.fit_transform(users_df['family_size'])
users_df['cooking_skills_encoded'] = label_encoder.fit_transform(users_df['cooking_skills'])

# Drop original non-numeric columns
user_features = users_df.drop(columns=['userid', 'cooking_skills', 'allergies', 'family_size'])

# Drop non-numeric columns from dish features
dish_features = dishes_df.drop(columns=['dishid', 'dishname', 'ingre_list', 'nutri_guide'])

# Convert categorical features in dish_features to numeric using one-hot encoding
dish_features = pd.get_dummies(dish_features, drop_first=True)

# Step 2: Align Columns in Both Feature Matrices
# Get the complete list of columns in both matrices
all_columns = set(user_features.columns).union(set(dish_features.columns))

# Add missing columns to both matrices and fill with zeros
for col in all_columns:
    if col not in user_features.columns:
        user_features[col] = 0
    if col not in dish_features.columns:
        dish_features[col] = 0

# Ensure the same column order in both matrices
user_features = user_features[sorted(all_columns)]
dish_features = dish_features[sorted(all_columns)]

# Step 3: Compute similarity between users and dishes
similarity_matrix = cosine_similarity(user_features, dish_features)

# Step 4: Get top N recommendations per user based on similarity
def get_top_n_recommendations(user_id, top_n=5):
    try:
        user_index = users_df[users_df['userid'] == user_id].index[0]
        top_n_indices = similarity_matrix[user_index].argsort()[-top_n:][::-1]
        return dishes_df.iloc[top_n_indices]
    except IndexError:
        return f"User ID {user_id} not found."

# Example: Get recommendations for a specific user
user_id_to_test = 3  # Change this ID to test with different users
print(f"Top {5} Recommendations for User {user_id_to_test}:\n", get_top_n_recommendations(user_id=user_id_to_test))




## with Constraint-based Filtering

In [29]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

# Load cleaned user and dish datasets
users_df = pd.read_csv('cleaned_users_dataset.csv')
dishes_df = pd.read_csv('cleaned_dishes.csv')

# Step 1: Encode categorical variables as necessary
label_encoder = LabelEncoder()
users_df['family_size_encoded'] = label_encoder.fit_transform(users_df['family_size'])
users_df['cooking_skills_encoded'] = label_encoder.fit_transform(users_df['cooking_skills'])

# Drop original non-numeric columns from user features
user_features = users_df.drop(columns=['userid', 'cooking_skills', 'allergies', 'family_size'])

# Drop non-numeric columns from dish features
dish_features = dishes_df.drop(columns=['dishid', 'dishname', 'ingre_list', 'nutri_guide'])

# Convert categorical features in dish_features to numeric using one-hot encoding
dish_features = pd.get_dummies(dish_features, drop_first=False)

# Step 2: Align Columns in Both Feature Matrices
# Get the complete list of columns in both matrices
all_columns = set(user_features.columns).union(set(dish_features.columns))

# Add missing columns to both matrices and fill with zeros
for col in all_columns:
    if col not in user_features.columns:
        user_features[col] = 0
    if col not in dish_features.columns:
        dish_features[col] = 0

# Ensure the same column order in both matrices
user_features = user_features[sorted(all_columns)]
dish_features = dish_features[sorted(all_columns)]

# Step 3: Compute Similarity Between Users and Dishes
similarity_matrix = cosine_similarity(user_features, dish_features)

# Step 4: Define Constraint-Based Filtering Function
def filter_by_constraints(user, recommendations):
    """
    Filter recommendations based on the user's cooking skill level and allergies.
    """
    # Filter based on cooking skills
    recommendations = recommendations[recommendations['skills_needed'].str.lower() <= user['cooking_skills'].lower()]

    # Filter out dishes containing ingredients the user is allergic to
    if pd.notna(user['allergies']):
        allergies = user['allergies'].lower().split(',')  # Convert allergies to a list
        recommendations = recommendations[~recommendations['ingre_list'].str.lower().apply(lambda x: any(allergy in x for allergy in allergies))]

    return recommendations

# Step 5: Get Top N Recommendations per User Based on Similarity
def get_top_n_recommendations(user_id, top_n=10):
    """Get top N recommendations for a given user based on cosine similarity."""
    try:
        # Find the index of the user in the dataframe
        user_index = users_df[users_df['userid'] == user_id].index[0]

        # Compute top N similar dishes
        top_n_indices = similarity_matrix[user_index].argsort()[-top_n:][::-1]

        # Get the recommended dishes
        recommendations = dishes_df.iloc[top_n_indices]

        # Apply constraint-based filtering
        user = users_df.iloc[user_index]  # Get the user's details
        filtered_recommendations = filter_by_constraints(user, recommendations)

        return filtered_recommendations
    except IndexError:
        return f"User ID {user_id} not found."

# Step 6: Example: Get recommendations for a specific user
user_id_to_test = 12  # Change this ID to test with different users
print(f"Top {5} Recommendations for User {user_id_to_test} (after constraints applied):\n", get_top_n_recommendations(user_id=user_id_to_test))


Top 5 Recommendations for User 12 (after constraints applied):
     dishid             dishname  prep_time  \
31     132        Labanos Salad         15   
14     115  Chicken Caesar Wrap         20   
6      107         Quinoa Salad         20   
38     139     Sautéed Ampalaya         20   
40     141                Turon         20   
3      104           Beef Tacos         25   
13     114    Tomato Basil Soup         25   
55     156         Pritong Isda         30   
35     136           Longganisa         30   

                                           ingre_list  num_servings  \
31           Daikon radish, Onions, Tomatoes, Vinegar             0   
14  Caesar dressing, Chicken breast, Romaine lettu...             1   
6       Cucumbers, Lemon, Olive oil, Quinoa, Tomatoes             1   
38                Bitter melon, Eggs, Onion, Tomatoes             1   
40         Bananas, Brown sugar, Spring roll wrappers             1   
3   Cheese, Ground beef, Lettuce, Taco shells, To

## content + constraint

In [19]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

# Load cleaned user and dish datasets
users_df = pd.read_csv('cleaned_users_dataset.csv')
dishes_df = pd.read_csv('cleaned_dishes.csv')

# Step 1: Encode categorical variables as necessary
label_encoder = LabelEncoder()
users_df['family_size_encoded'] = label_encoder.fit_transform(users_df['family_size'])
users_df['cooking_skills_encoded'] = label_encoder.fit_transform(users_df['cooking_skills'])

# Step 2: Ensure consistent data types for filtering and comparison
# Convert key columns to appropriate numeric types if necessary
columns_to_convert = ['family_size_encoded', 'cooking_skills_encoded', 'num_servings', 'skills_needed']

# Convert and handle missing values in users_df and dishes_df
for col in columns_to_convert:
    if col in users_df.columns:
        users_df[col] = pd.to_numeric(users_df[col], errors='coerce')
    if col in dishes_df.columns:
        dishes_df[col] = pd.to_numeric(dishes_df[col], errors='coerce')

# Replace NaN values with 0 after conversion
users_df = users_df.fillna(0)
dishes_df = dishes_df.fillna(0)

# Define meal columns used in the preferences
meal_columns = [
    'preference_appetizer', 'preference_soup', 'preference_vegetable_dishes',
    'preference_vegetable_with_seafood', 'preference_vegetable_with_meat'
]

# Step 3: Create default preference columns if missing in users_df
for col in meal_columns:
    if col not in users_df.columns:
        users_df[col] = 0  # Set default value to 0 (no preference)

# Step 4: Drop original non-numeric columns from user features
user_features = users_df.drop(columns=['userid', 'cooking_skills', 'allergies', 'family_size'])

# Step 5: Drop non-numeric columns from dish features
dish_features = dishes_df.drop(columns=['dishid', 'dishname', 'ingre_list', 'nutri_guide'])

# Step 6: Convert categorical features in dish_features to numeric using one-hot encoding
dish_features = pd.get_dummies(dish_features, drop_first=False)

# Step 7: Align Columns in Both Feature Matrices
# Get the complete list of columns in both matrices
all_columns = set(user_features.columns).union(set(dish_features.columns))

# Add missing columns to both matrices and fill with zeros
for col in all_columns:
    if col not in user_features.columns:
        user_features[col] = 0
    if col not in dish_features.columns:
        dish_features[col] = 0

# Ensure the same column order in both matrices
user_features = user_features[sorted(all_columns)]
dish_features = dish_features[sorted(all_columns)]

# Step 8: Compute Similarity Between Users and Dishes
similarity_matrix = cosine_similarity(user_features, dish_features)

# Step 9: Define Multi-Step Constraint-Based Filtering Functions

def filter_by_allergies(user, recommendations):
    """Filter out dishes containing ingredients the user is allergic to."""
    if pd.notna(user['allergies']):
        allergies = user['allergies'].lower().split(',')  # Convert allergies to a list
        return recommendations[~recommendations['ingre_list'].str.lower().apply(lambda x: any(allergy in x for allergy in allergies))]
    return recommendations

def filter_by_servings(user, recommendations):
    """Filter dishes based on the user's family size."""
    family_size = user['family_size_encoded']
    return recommendations[(recommendations['num_servings'] <= family_size) & (recommendations['num_servings'] >= family_size)]

def filter_by_age_range(user, recommendations):
    """Filter dishes suitable for the user's age group."""
    age_columns = ['age_kids', 'age_teens', 'age_adults', 'age_elders']
    user_age_group = user[age_columns].idxmax()  # Get the primary age group (column) for the user
    return recommendations[recommendations[user_age_group] == 1]

def filter_by_cooking_skills(user, recommendations):
    """Filter dishes that match or are below the user's cooking skill level."""
    user_skill_level = user['cooking_skills_encoded']
    return recommendations[recommendations['skills_needed'] <= user_skill_level]

# Integrated Constraint Filtering Function
def filter_by_constraints(user, recommendations):
    """Apply all constraint-based filters in sequence."""
    recommendations = filter_by_allergies(user, recommendations)
    recommendations = filter_by_servings(user, recommendations)
    recommendations = filter_by_age_range(user, recommendations)
    recommendations = filter_by_cooking_skills(user, recommendations)
    return recommendations

# Step 10: Get Top N Recommendations per User Based on Similarity
def get_top_n_recommendations(user_id, top_n=10):
    """Get top N recommendations for a given user based on cosine similarity."""
    try:
        # Find the index of the user in the dataframe
        user_index = users_df[users_df['userid'] == user_id].index[0]

        # Compute top N similar dishes
        top_n_indices = similarity_matrix[user_index].argsort()[-top_n:][::-1]

        # Get the recommended dishes
        recommendations = dishes_df.iloc[top_n_indices]

        # Apply multi-step constraint-based filtering
        user = users_df.iloc[user_index]  # Get the user's details
        filtered_recommendations = filter_by_constraints(user, recommendations)

        return filtered_recommendations
    except IndexError:
        return f"User ID {user_id} not found."

# Step 11: Example: Get recommendations for a specific user
user_id_to_test = 23  # Change this ID to test with different users
print(f"Top {10} Recommendations for User {user_id_to_test} (after multi-step constraints applied):\n", get_top_n_recommendations(user_id=user_id_to_test))


Top 10 Recommendations for User 23 (after multi-step constraints applied):
     dishid           dishname  prep_time  \
13     114  Tomato Basil Soup         25   
35     136         Longganisa         30   
9      110           Omelette         15   
58     159           Salpicao         40   

                                           ingre_list  num_servings  \
13  Basil, Garlic, Olive oil, Tomatoes, Vegetable ...             0   
35        Garlic, Ground pork, Spices, Sugar, Vinegar             0   
9                       Cheese, Eggs, Onions, Peppers             0   
58      Beef, Garlic, Olive oil, Worcestershire sauce             0   

                                          nutri_guide  skills_needed  \
13   Calories: 180, Protein: 8g, Carbs: 30g, Fats: 6g            0.0   
35  Calories: 400, Protein: 25g, Carbs: 10g, Fats:...            0.0   
9   Calories: 200, Protein: 15g, Carbs: 5g, Fats: 10g            0.0   
58  Calories: 400, Protein: 30g, Carbs: 5g, Fats: 30g      

## how it works (content-based & constraint)

In [25]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

# Load cleaned user and dish datasets
users_df = pd.read_csv('cleaned_users_dataset.csv')
dishes_df = pd.read_csv('cleaned_dishes.csv')

# Step 1: Encode categorical variables as necessary
label_encoder = LabelEncoder()
users_df['family_size_encoded'] = label_encoder.fit_transform(users_df['family_size'])
users_df['cooking_skills_encoded'] = label_encoder.fit_transform(users_df['cooking_skills'])

# Step 2: Ensure consistent data types for filtering and comparison
# Convert key columns to appropriate numeric types if necessary
columns_to_convert = ['family_size_encoded', 'cooking_skills_encoded', 'num_servings', 'skills_needed']

# Convert and handle missing values in users_df and dishes_df
for col in columns_to_convert:
    if col in users_df.columns:
        users_df[col] = pd.to_numeric(users_df[col], errors='coerce')
    if col in dishes_df.columns:
        dishes_df[col] = pd.to_numeric(dishes_df[col], errors='coerce')

# Replace NaN values with 0 after conversion
users_df = users_df.fillna(0)
dishes_df = dishes_df.fillna(0)

# Define meal columns used in the preferences
meal_columns = [
    'preference_appetizer', 'preference_soup', 'preference_vegetable_dishes',
    'preference_vegetable_with_seafood', 'preference_vegetable_with_meat'
]

# Step 3: Create default preference columns if missing in users_df
for col in meal_columns:
    if col not in users_df.columns:
        users_df[col] = 0  # Set default value to 0 (no preference)

# Step 4: Drop original non-numeric columns from user features
user_features = users_df.drop(columns=['userid', 'cooking_skills', 'allergies', 'family_size'])

# Step 5: Drop non-numeric columns from dish features
dish_features = dishes_df.drop(columns=['dishid', 'dishname', 'ingre_list', 'nutri_guide'])

# Step 6: Convert categorical features in dish_features to numeric using one-hot encoding
dish_features = pd.get_dummies(dish_features, drop_first=False)

# Step 7: Align Columns in Both Feature Matrices
# Get the complete list of columns in both matrices
all_columns = set(user_features.columns).union(set(dish_features.columns))

# Add missing columns to both matrices and fill with zeros
for col in all_columns:
    if col not in user_features.columns:
        user_features[col] = 0
    if col not in dish_features.columns:
        dish_features[col] = 0

# Ensure the same column order in both matrices
user_features = user_features[sorted(all_columns)]
dish_features = dish_features[sorted(all_columns)]

# Step 8: Compute Similarity Between Users and Dishes
similarity_matrix = cosine_similarity(user_features, dish_features)

# Step 9: Define Multi-Step Constraint-Based Filtering Functions

def filter_by_allergies(user, recommendations):
    """Filter out dishes containing ingredients the user is allergic to."""
    if pd.notna(user['allergies']):
        allergies = user['allergies'].lower().split(',')  # Convert allergies to a list
        return recommendations[~recommendations['ingre_list'].str.lower().apply(lambda x: any(allergy in x for allergy in allergies))]
    return recommendations

def filter_by_servings(user, recommendations):
    """Filter dishes based on the user's family size."""
    family_size = user['family_size_encoded']
    return recommendations[(recommendations['num_servings'] <= family_size) & (recommendations['num_servings'] >= family_size)]

def filter_by_age_range(user, recommendations):
    """Filter dishes suitable for the user's age group."""
    age_columns = ['age_kids', 'age_teens', 'age_adults', 'age_elders']
    user_age_group = user[age_columns].idxmax()  # Get the primary age group (column) for the user
    return recommendations[recommendations[user_age_group] == 1]

def filter_by_cooking_skills(user, recommendations):
    """Filter dishes that match or are below the user's cooking skill level."""
    user_skill_level = user['cooking_skills_encoded']
    return recommendations[recommendations['skills_needed'] <= user_skill_level]

# Integrated Constraint Filtering Function
def filter_by_constraints(user, recommendations):
    """Apply all constraint-based filters in sequence."""
    recommendations = filter_by_allergies(user, recommendations)
    recommendations = filter_by_servings(user, recommendations)
    recommendations = filter_by_age_range(user, recommendations)
    recommendations = filter_by_cooking_skills(user, recommendations)
    return recommendations

# Step 10: Get Top N Recommendations per User Based on Similarity (with dish-to-user comparisons)
def get_top_n_recommendations(user_id, top_n=10):
    """Get top N recommendations for a given user based on cosine similarity."""
    try:
        # Find the index of the user in the dataframe
        user_index = users_df[users_df['userid'] == user_id].index[0]

        # Get the user's feature vector
        user_vector = user_features.iloc[user_index].values.reshape(1, -1)

        # Calculate similarity scores for all dishes against the user's feature vector
        all_similarity_scores = cosine_similarity(user_vector, dish_features).flatten()

        # Create a DataFrame to hold dish IDs and their similarity scores
        similarity_df = pd.DataFrame({
            'dishid': dishes_df['dishid'],
            'similarity_score': all_similarity_scores
        })

        # Print all dishes with their similarity scores
        print("Dishes considered for recommendations with their similarity scores:")
        print(similarity_df.sort_values(by='similarity_score', ascending=False).head(20))  # Display top 20 for clarity

        # Get the indices of the top N similar dishes
        top_n_indices = all_similarity_scores.argsort()[-top_n:][::-1]

        # Get the recommended dishes
        recommendations = dishes_df.iloc[top_n_indices].copy()

        # Add similarity scores to recommendations
        recommendations['similarity_score'] = all_similarity_scores[top_n_indices]

        # Display initial recommendations before filtering
        print("\nInitial Recommendations (before filtering):")
        print(recommendations[['dishid', 'dishname', 'similarity_score']])

        # Apply multi-step constraint-based filtering and display each step
        user = users_df.iloc[user_index]  # Get the user's details

        # Step-by-step filtering
        filtered_recommendations = recommendations.copy()

        # Filter by allergies
        filtered_recommendations = filter_by_allergies(user, filtered_recommendations)
        print("\nAfter Filtering by Allergies:")
        print(filtered_recommendations[['dishid', 'dishname', 'similarity_score']])

        # Filter by servings
        filtered_recommendations = filter_by_servings(user, filtered_recommendations)
        print("\nAfter Filtering by Servings:")
        print(filtered_recommendations[['dishid', 'dishname', 'similarity_score']])

        # Filter by age range
        filtered_recommendations = filter_by_age_range(user, filtered_recommendations)
        print("\nAfter Filtering by Age Range:")
        print(filtered_recommendations[['dishid', 'dishname', 'similarity_score']])

        # Filter by cooking skills
        filtered_recommendations = filter_by_cooking_skills(user, filtered_recommendations)
        print("\nAfter Filtering by Cooking Skills:")
        print(filtered_recommendations[['dishid', 'dishname', 'similarity_score']])

        return filtered_recommendations[['dishid', 'dishname', 'similarity_score']]
    except IndexError:
        return f"User ID {user_id} not found."

# Step 11: Example: Get recommendations for a specific user
user_id_to_test = 23  # Change this ID to test with different users
recommendations = get_top_n_recommendations(user_id=user_id_to_test)

# Display final recommendations
print(f"\nFinal Recommendations for User {user_id_to_test} (after all constraints applied):\n", recommendations)


Dishes considered for recommendations with their similarity scores:
    dishid  similarity_score
38     139          0.057448
10     111          0.056614
13     114          0.046078
35     136          0.038447
29     130          0.038426
28     129          0.038405
31     132          0.038236
9      110          0.038236
43     144          0.038152
58     159          0.028849
54     155          0.028840
41     142          0.028840
5      106          0.028831
50     151          0.028831
14     115          0.028760
6      107          0.028724
20     121          0.028724
40     141          0.028724
30     131          0.028724
17     118          0.025635

Initial Recommendations (before filtering):
    dishid              dishname  similarity_score
38     139      Sautéed Ampalaya          0.057448
10     111  Fruit Yogurt Parfait          0.056614
13     114     Tomato Basil Soup          0.046078
35     136            Longganisa          0.038447
29     130             