In [1]:
from pathlib import Path
from tqdm.auto import tqdm

import pandas as pd
import numpy as np
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATA_DIR = Path(r"D:\UKW_work\code\recipe_recommender_system\data\food_com_GeniusKitchen")
RAW_recepies_path = DATA_DIR / 'RAW_recipes.csv'
RAW_interactions_path =  DATA_DIR / 'RAW_interactions.csv'


In [3]:
#########################################
# 1. Load the Data
#########################################
# Adjust file paths as needed.
recipes_df = pd.read_csv(RAW_recepies_path)
interactions_df = pd.read_csv(RAW_interactions_path)

print("RAW_recipes.csv columns:", recipes_df.columns.tolist())
print("RAW_recipes.csv shape:", recipes_df.shape)
print("RAW_interactions.csv columns:", interactions_df.columns.tolist())
print("RAW_interactions.csv shape:", interactions_df.shape)


RAW_recipes.csv columns: ['name', 'id', 'minutes', 'contributor_id', 'submitted', 'tags', 'nutrition', 'n_steps', 'steps', 'description', 'ingredients', 'n_ingredients']
RAW_recipes.csv shape: (231637, 12)
RAW_interactions.csv columns: ['user_id', 'recipe_id', 'date', 'rating', 'review']
RAW_interactions.csv shape: (1132367, 5)


In [4]:
#########################################
# 2. Compute Bayesian Average Ratings for Recipes
#########################################
# Group interactions by recipe_id to get:
#   - v: number of ratings for the recipe
#   - R: mean rating for the recipe
agg = interactions_df.groupby('recipe_id').agg(
    v=('rating', 'count'),
    R=('rating', 'mean')
).reset_index()

# Global average rating C
C = (agg['v'] * agg['R']).sum() / agg['v'].sum()

# Choose a smoothing parameter, m.
# This value can be tuned – for example, m might be the median number of ratings.
m = agg['v'].median()

# Compute the Bayesian average rating for each recipe:
# Bayesian_Average = (v/(v+m)) * R + (m/(v+m)) * C
agg['bayesian_avg'] = (agg['v']/(agg['v']+m))*agg['R'] + (m/(agg['v']+m))*C

# Merge the Bayesian average ratings into the recipes dataframe.
recipes_df = recipes_df.merge(agg[['recipe_id', 'bayesian_avg']], left_on='id', right_on='recipe_id', how='left')
# For recipes with no ratings, fill with global average.
recipes_df['bayesian_avg'] = recipes_df['bayesian_avg'].fillna(C)

print("\nSample recipes with Bayesian average rating:")
print(recipes_df[['id', 'bayesian_avg']].head())


Sample recipes with Bayesian average rating:
       id  bayesian_avg
0  137739      4.764406
1   31490      3.803672
2  112140      4.274011
3   59389      4.455508
4   44061      4.607344


In [5]:
#########################################
# 3. Process Recipe Ingredients
#########################################
# The 'ingredients' column is assumed to be a string representation of a list.
# Convert it to an actual list and then join the ingredients into one string.
def process_ingredients(ing_str):
    try:
        ing_list = ast.literal_eval(ing_str)
        # Join the ingredients into one document.
        return " ".join(ing_list)
    except Exception as e:
        return ""

recipes_df['ingredients_doc'] = recipes_df['ingredients'].apply(process_ingredients)


In [9]:
#########################################
# 4. Build a TF-IDF Representation of Recipes
#########################################
vectorizer = TfidfVectorizer(stop_words='english', max_features=10000)
# Fit and transform the ingredients documents.
tfidf_matrix = vectorizer.fit_transform(recipes_df['ingredients_doc'])
print("\nTF-IDF matrix shape:", tfidf_matrix.shape)


TF-IDF matrix shape: (231637, 4158)


In [10]:
#########################################
# 5. Build User Profiles from Interaction Data
#########################################
# We'll assume that a high rating (e.g., rating >= 4) indicates a user "likes" that recipe.
rating_threshold = 4
# For each user, get the list of recipe IDs they rated >= threshold.
user_profiles = {}
# Create a mapping from recipe id to its TF-IDF vector (as a row in tfidf_matrix).
# Note: recipes_df index corresponds to the row in tfidf_matrix.
recipe_id_to_index = {rid: idx for idx, rid in enumerate(recipes_df['id'])}

In [11]:
# Group interactions by user.
for user, group in interactions_df.groupby('user_id'):
    liked_recipe_ids = group[group['rating'] >= rating_threshold]['recipe_id'].tolist()
    # Get the TF-IDF vectors for these recipes (if available).
    vectors = []
    for rid in liked_recipe_ids:
        if rid in recipe_id_to_index:
            idx = recipe_id_to_index[rid]
            vectors.append(tfidf_matrix[idx])
    if vectors:
        # Average the vectors (using mean of sparse vectors).
        user_profile = np.mean([vec.toarray() for vec in vectors], axis=0)
        user_profiles[user] = user_profile

# Convert user_profiles values to 2D numpy arrays.
for u in tqdm(user_profiles):
    user_profiles[u] = np.array(user_profiles[u]).reshape(1, -1)

100%|██████████| 180310/180310 [1:21:03<00:00, 37.08it/s] 


In [None]:
user_profiles

{1533: array([[0.00255534, 0.        , 0.        , ..., 0.        , 0.00901785,
         0.        ]]),
 1535: array([[0.00093452, 0.00063368, 0.        , ..., 0.        , 0.00204889,
         0.        ]]),
 1581: array([[0., 0., 0., ..., 0., 0., 0.]]),
 1634: array([[0., 0., 0., ..., 0., 0., 0.]]),
 1676: array([[0., 0., 0., ..., 0., 0., 0.]]),
 1755: array([[0., 0., 0., ..., 0., 0., 0.]]),
 1773: array([[0., 0., 0., ..., 0., 0., 0.]]),
 1792: array([[0., 0., 0., ..., 0., 0., 0.]]),
 1891: array([[0., 0., 0., ..., 0., 0., 0.]]),
 1962: array([[0., 0., 0., ..., 0., 0., 0.]]),
 1986: array([[0., 0., 0., ..., 0., 0., 0.]]),
 2008: array([[0., 0., 0., ..., 0., 0., 0.]]),
 2033: array([[0., 0., 0., ..., 0., 0., 0.]]),
 2046: array([[0., 0., 0., ..., 0., 0., 0.]]),
 2054: array([[0., 0., 0., ..., 0., 0., 0.]]),
 2059: array([[0., 0., 0., ..., 0., 0., 0.]]),
 2070: array([[0., 0., 0., ..., 0., 0., 0.]]),
 2073: array([[0., 0., 0., ..., 0., 0., 0.]]),
 2095: array([[0., 0., 0., ..., 0., 0., 

In [16]:
import pickle

TEMP_DIR = Path(r'D:\UKW_work\code\recipe_recommender_system\temp')
RECIPE_EMBD_DIR = TEMP_DIR / 'recipes_embeddings'
USER_PROFILE_EMBD_DIR = TEMP_DIR / 'user_profile_embeddings'

In [None]:
user_profiles_save_path = USER_PROFILE_EMBD_DIR / 'user_profiles_embeddings_TfidfVectorizer_ingredients.pt'
# Save the dictionary containing recipe embeddings

with open(user_profiles_save_path,'wb') as file:
    pickle.dump(user_profiles, file)


In [None]:
#########################################
# 6. Recommendation Function: Content-Based + Bayesian Rating
#########################################
def recommend_recipes(user_id, top_k=10, alpha=0.5):
    """
    For a given user, recommend recipes by combining content similarity and Bayesian average.
    
    - Content similarity: cosine similarity between the user's profile (TF-IDF based) and each recipe.
    - Bayesian average: the adjusted quality score.
    
    We combine these by a weighted sum: final_score = alpha * (normalized similarity) + (1-alpha) * (normalized bayesian rating)
    
    Normalization is done over all recipes.
    """
    if user_id not in user_profiles:
        print("User has no high-rated recipes; returning top recipes by Bayesian average.")
        # If no profile available, simply return top recipes by Bayesian average.
        top = recipes_df.sort_values(by='bayesian_avg', ascending=False).head(top_k)
        return top[['id', 'bayesian_avg']]
    
    user_profile = user_profiles[user_id]
    # Compute cosine similarity between user profile and all recipes (TF-IDF vectors)
    similarities = cosine_similarity(user_profile, tfidf_matrix).flatten()  # shape: (n_recipes,)
    
    # Normalize similarity scores between 0 and 1.
    if similarities.max() > 0:
        norm_sim = (similarities - similarities.min()) / (similarities.max() - similarities.min())
    else:
        norm_sim = similarities
    
    # Get Bayesian ratings from the recipes dataframe.
    bayes_scores = recipes_df['bayesian_avg'].values
    # Normalize Bayesian scores between 0 and 1.
    if bayes_scores.max() > 0:
        norm_bayes = (bayes_scores - bayes_scores.min()) / (bayes_scores.max() - bayes_scores.min())
    else:
        norm_bayes = bayes_scores
    
    # Combine the two signals:
    final_scores = alpha * norm_sim + (1 - alpha) * norm_bayes
    
    # Create a DataFrame with recipe id and final score.
    rec_df = pd.DataFrame({
        'id': recipes_df['id'],
        'final_score': final_scores
    })
    # Exclude recipes the user has already interacted with.
    already_seen = set(interactions_df[interactions_df['user_id'] == user_id]['recipe_id'])
    rec_df = rec_df[~rec_df['id'].isin(already_seen)]
    rec_df = rec_df.sort_values(by='final_score', ascending=False).head(top_k)
    # Merge back additional info if desired.
    rec_df = rec_df.merge(recipes_df[['id', 'name', 'ingredients', 'bayesian_avg']], on='id', how='left')
    return rec_df

In [None]:
#########################################
# 7. Example: Generate Recommendations for a Sample User
#########################################
sample_user = interactions_df['user_id'].iloc[0]
print(f"\n--- Recommendations for User {sample_user} ---")
recommendations = recommend_recipes(sample_user, top_k=10, alpha=0.5)
# print(recommendations)
recommendations

# conbined text: name, ingridents, steps

In [None]:
def combine_text(row):
    # Use the 'name' column directly (assumed to be plain text)
    name_text = row['name'] if pd.notnull(row['name']) else ""  
    # Process the 'ingredients' column.
    # It is expected to be a string representation of a Python list.
    try:
        ingredients_list = ast.literal_eval(row['ingredients'])
        ingredients_text = " ".join(ingredients_list)
    except Exception as e:
        ingredients_text = row['ingredients'] if pd.notnull(row['ingredients']) else ""
    # Process the 'steps' column similarly.
    try:
        steps_list = ast.literal_eval(row['steps'])
        steps_text = " ".join(steps_list)
    except Exception as e:
        steps_text = row['steps'] if pd.notnull(row['steps']) else ""    
    # Combine the three parts into one document.
    combined = name_text + " " + ingredients_text + " " + steps_text
    return combined
 

# Create a new column 'ingredients_doc' with the combined text.
recipes_df['combined_text'] = recipes_df.apply(combine_text, axis=1)
# Display a sample of the combined document.
print("\nSample combined recipe text:")
print(recipes_df[['id', 'combined_text']].head())


Sample combined recipe text:
       id                                      combined_text
0  137739  arriba   baked winter squash mexican style win...
1   31490  a bit different  breakfast pizza prepared pizz...
2  112140  all in the kitchen  chili ground beef yellow o...
3   59389  alouette  potatoes spreadable cheese with garl...
4   44061  amish  tomato ketchup  for canning tomato juic...


In [19]:

def combine_text(row, columns):
    """
    Combines text from specified columns into a single structured string.
    
    Special handling for 'nutrition':
      - If the value is a scalar, simply convert it to a string.
      - If it is a list:
          * If it has exactly 7 numbers, format as:
             "Nutrition: ([a, b, c, d, e, f, g]) -> a calories, b total fat, c sugar, d sodium, e protein, f saturated fat, g carbs"
          * Otherwise, map over the available values using nutrient_labels (up to the number available).
    For all other columns, if the value is a string representation of a list,
    it is converted into an actual list and then joined.
    
    Returns:
      A formatted string.
    """
    combined_parts = []
    nutrient_labels = ["calories", "total fat", "sugar", "sodium", "protein", "saturated fat", "carbs"]
    
    for col in columns:
        if col in row and pd.notnull(row[col]):
            try:
                if col.lower() == "nutrition":
                    # For nutrition: if it's a string, try to parse it.
                    if isinstance(row[col], str):
                        value = ast.literal_eval(row[col])
                    else:
                        value = row[col]
                    # If value is numeric, output directly.
                    if isinstance(value, (int, float)):
                        formatted_text = f"{col.capitalize()}: ({value})"
                    elif isinstance(value, list):
                        n = min(len(value), len(nutrient_labels))
                        nutrient_list_str = ", ".join(map(str, value))
                        detailed = ", ".join([f"{value[i]} {nutrient_labels[i]}" for i in range(n)])
                        formatted_text = f"{col.capitalize()}: ([{nutrient_list_str}]) -> {detailed}"
                    else:
                        formatted_text = f"{col.capitalize()}: ({value})"
                else:
                    # For other columns, try to convert string representation of lists into actual lists.
                    if isinstance(row[col], str):
                        try:
                            value = ast.literal_eval(row[col])
                            if isinstance(value, list):
                                value = ", ".join(map(str, value))
                        except Exception:
                            value = row[col]
                    else:
                        value = row[col]
                    formatted_text = f"{col.replace('_', ' ').capitalize()}: ({value})"
            except Exception:
                formatted_text = f"{col.replace('_', ' ').capitalize()}: ({row[col]})"
            combined_parts.append(formatted_text)
    return " | ".join(combined_parts)


columns_to_combine = ['name', 'ingredients', 'nutrition']

recipes_df['combined_addtext'] = recipes_df.apply(lambda row: combine_text(row, columns_to_combine), axis=1)
print(recipes_df['combined_addtext'].dtype)
# Display a sample of the combined document.
print("\nSample combined recipe text:")
print(recipes_df[['id', 'combined_addtext']].head())

object

Sample combined recipe text:
       id                                   combined_addtext
0  137739  Name: (arriba   baked winter squash mexican st...
1   31490  Name: (a bit different  breakfast pizza) | Ing...
2  112140  Name: (all in the kitchen  chili) | Ingredient...
3   59389  Name: (alouette  potatoes) | Ingredients: (spr...
4   44061  Name: (amish  tomato ketchup  for canning) | I...


In [7]:
# We'll assume that a high rating (e.g., rating >= 4) indicates a user "likes" that recipe.
rating_threshold = 4
# For each user, get the list of recipe IDs they rated >= threshold.
user_profiles_combined_text = {}
# Create a mapping from recipe id to its TF-IDF vector (as a row in tfidf_matrix).
# Note: recipes_df index corresponds to the row in tfidf_matrix.
recipe_id_to_index = {rid: idx for idx, rid in enumerate(recipes_df['id'])}

In [8]:
#########################################
# 4. Build a TF-IDF Representation of Recipes
#########################################
vectorizer_combined_text = TfidfVectorizer(stop_words='english', max_features=10000)
# Fit and transform the ingredients documents.
tfidf_matrix_combined_text = vectorizer_combined_text.fit_transform(recipes_df['combined_text'])
print("\nTF-IDF matrix shape:", tfidf_matrix_combined_text.shape)


TF-IDF matrix shape: (231637, 10000)


In [11]:
import torch 

# We'll compute each user's profile from the sparse matrix without converting the whole matrix to dense.
user_profiles_combined_text = {}

# Loop over each user in interactions_df
unique_users = interactions_df['user_id'].unique()
for user, group in tqdm(interactions_df.groupby('user_id'), total=len(unique_users), desc="Building user profiles"):
    # Get the recipe IDs this user rated highly (>= rating_threshold)
    liked_recipe_ids = group[group['rating'] >= rating_threshold]['recipe_id'].tolist()
    
    # Map these recipe IDs to their corresponding row indices in the TF-IDF matrix.
    indices = [recipe_id_to_index[rid] for rid in liked_recipe_ids if rid in recipe_id_to_index]
    
    if indices:
        # Extract only the liked recipe rows from the sparse TF-IDF matrix.
        # This returns a sparse matrix with shape (num_liked, n_features)
        liked_sparse = tfidf_matrix_combined_text[indices, :]
        
        # Compute the mean vector for this user.
        # Sum over axis=0 gives a (1, n_features) sparse matrix.
        user_profile_sparse = liked_sparse.sum(axis=0) / len(indices)
        
        # Convert this 1xF matrix to a dense array.
        user_profile_dense = np.asarray(user_profile_sparse).flatten()
        
        # Convert to a PyTorch tensor (and reshape to 1 x n_features)
        user_profiles_combined_text[user] = torch.tensor(user_profile_dense, dtype=torch.float32).reshape(1, -1)

print("\nComputed user profiles for", len(user_profiles_combined_text), "users.")

Building user profiles: 100%|██████████| 226570/226570 [03:15<00:00, 1157.20it/s]


Computed user profiles for 180310 users.





In [14]:
#########################################
# 6. Recommendation Function: Content-Based + Bayesian Rating
#########################################
def recommend_recipes_combined_text(user_id, top_k=10, alpha=0.5):
    """
    For a given user, recommend recipes by combining:
      - Content similarity: cosine similarity between the user's profile (TF-IDF based) and each recipe.
      - Bayesian average: the adjusted quality score.
    
    We compute:
       final_score = alpha * (normalized similarity) + (1 - alpha) * (normalized bayesian rating)
    
    Normalization is done over all recipes. Recipes the user has already interacted with are excluded.
    
    Parameters:
      user_id (int): The ID of the user.
      top_k (int): Number of recipes to return.
      alpha (float): Weight for the content similarity score.
    
    Returns:
      DataFrame: Recommended recipes with columns: id, name, ingredients, bayesian_avg, final_score.
    """
    # If the user doesn't have a computed profile, return top recipes by Bayesian average.
    if user_id not in user_profiles_combined_text:
        print("User has no high-rated recipes; returning top recipes by Bayesian average.")
        top = recipes_df.sort_values(by='bayesian_avg', ascending=False).head(top_k)
        return top[['id', 'bayesian_avg']]
    
    # Get user profile (stored as a PyTorch tensor) and convert it to NumPy.
    user_profile = user_profiles_combined_text[user_id]  # shape: (1, feature_dim)
    user_profile_np = user_profile.cpu().numpy()
    
    # Compute cosine similarity between the user profile and all recipes in the TF-IDF matrix.
    # Note: tfidf_matrix_combined_text remains in sparse format.
    similarities = cosine_similarity(user_profile_np, tfidf_matrix_combined_text).flatten()  # shape: (n_recipes,)
    
    # Normalize similarity scores between 0 and 1.
    if similarities.max() > similarities.min():
        norm_sim = (similarities - similarities.min()) / (similarities.max() - similarities.min())
    else:
        norm_sim = similarities
    
    # Extract Bayesian average scores from recipes_df.
    bayes_scores = recipes_df['bayesian_avg'].values.astype(np.float32)
    if bayes_scores.max() > bayes_scores.min():
        norm_bayes = (bayes_scores - bayes_scores.min()) / (bayes_scores.max() - bayes_scores.min())
    else:
        norm_bayes = bayes_scores
    
    # Combine the two signals via weighted sum.
    final_scores = alpha * norm_sim + (1 - alpha) * norm_bayes
    
    # Build a DataFrame with recipe ids and the computed final score.
    rec_df = pd.DataFrame({
        'id': recipes_df['id'],
        'final_score': final_scores
    })
    
    # Exclude recipes already seen by the user.
    already_seen = set(interactions_df[interactions_df['user_id'] == user_id]['recipe_id'])
    rec_df = rec_df[~rec_df['id'].isin(already_seen)]
    
    # Sort the recipes by final_score (highest first) and select the top_k.
    rec_df = rec_df.sort_values(by='final_score', ascending=False).head(top_k)
    
    # Merge additional recipe details for display.
    rec_df = rec_df.merge(recipes_df[['id', 'name', 'ingredients', 'bayesian_avg']], on='id', how='left')
    return rec_df

In [16]:
#########################################
# 7. Example: Generate Recommendations for a Sample User
#########################################
sample_user = interactions_df['user_id'].iloc[10]
recommendations = recommend_recipes_combined_text(sample_user, top_k=10, alpha=0.5)
print("\n--- Recommended Recipes for User", sample_user, "---")
# print(recommendations)
recommendations


--- Recommended Recipes for User 353911 ---


Unnamed: 0,id,final_score,name,ingredients,bayesian_avg
0,81375,0.841998,honey barbecue chicken strips,"['boneless skinless chicken', 'flour', 'salt',...",4.491102
1,256223,0.831355,pan fried fish with a rich lemon butter sauce,"['dry white wine', 'lemon juice', 'garlic', 's...",4.600787
2,307723,0.830257,sour cream fish fillets,"['fish fillet', 'fresh mushrooms', 'onion', 'b...",4.854802
3,139740,0.829309,creamy fish and mushroom pie,"['mushrooms', 'cod', 'milk', 'floury potatoes'...",4.764406
4,445985,0.823904,kentucky fried chicken honey bbq chicken strips,"['chicken strips', 'flour', 'pepper', 'butterm...",4.313559
5,467604,0.822947,lemon cod with mushrooms,"['cod fish fillet', 'white flour', 'italian br...",4.607344
6,325893,0.81994,fish meuniere with capers,"['all-purpose flour', 'sole fillets', 'salt', ...",4.831719
7,165856,0.816551,chicken fried fish fingers,"['orange roughy', 'buttermilk', 'egg', 'self-r...",4.705508
8,138647,0.812323,beer battered fish with tartar sauce,"['mayonnaise', 'dijon mustard', 'scallion', 'l...",4.912881
9,14174,0.809187,crispy fish in chili sauce,"['fish fillets', 'plain flour', 'cornflour', '...",4.852754


In [19]:
#########################################
# 4. Build a TF-IDF Representation of Recipes
#########################################

vectorizer = TfidfVectorizer(stop_words='english', max_features=10000)

# Convert the 'ingredients_doc' column to a list.
documents = recipes_df['ingredients_doc'].tolist()
# Wrap the documents with tqdm to show progress.
documents_with_progress = tqdm(documents, desc="Processing documents", total=len(documents))
# Fit and transform the ingredients documents.
tfidf_matrix = vectorizer.fit_transform(documents_with_progress)
print("\nTF-IDF matrix shape:", tfidf_matrix.shape)

Processing documents: 100%|██████████| 231637/231637 [00:22<00:00, 10486.24it/s]



TF-IDF matrix shape: (231637, 4158)


In [None]:
#########################################
# New Function: Recommend Recipes by User-Provided Ingredients
#########################################
def recommend_recipes_by_ingredients(user_ingredients, top_k=10, alpha=0.5):
    """
    Given a list of ingredients from a user, return recommended recipes.
    
    The function works by:
      1. Converting the list of input ingredients into a query string.
      2. Transforming this query using the TF-IDF vectorizer.
      3. Computing cosine similarity between the query and all recipe ingredients.
      4. Combining the normalized similarity score with the normalized Bayesian average rating
         (which reflects other users' experiences).
    
    Parameters:
      - user_ingredients: list of ingredient strings (e.g., ["tomato", "basil", "garlic"])
      - top_k: number of recipes to return
      - alpha: weight for the content similarity (with 1 - alpha for Bayesian rating)
    
    Returns:
      - DataFrame with recommended recipes and relevant fields.
    """
    # Step 1: Combine the input ingredients into one string.
    # Optionally, you can remove spaces from within ingredient names if needed.
    query_str = " ".join([ingredient.strip() for ingredient in user_ingredients])
    
    # Step 2: Transform the query using the same TF-IDF vectorizer.
    query_vector = vectorizer.transform([query_str])
    
    # Step 3: Compute cosine similarity between the query vector and the TF-IDF matrix.
    similarity_scores = cosine_similarity(query_vector, tfidf_matrix).flatten()
    
    # Store the raw similarity scores in the DataFrame (optional).
    recipes_df['similarity'] = similarity_scores
    
    # Step 4: Normalize similarity scores (min-max normalization).
    sim_min, sim_max = similarity_scores.min(), similarity_scores.max()
    if sim_max > sim_min:
        norm_sim = (similarity_scores - sim_min) / (sim_max - sim_min)
    else:
        norm_sim = similarity_scores
    
    # Get the Bayesian average ratings from recipes_df.
    bayes_scores = recipes_df['bayesian_avg'].values
    bayes_min, bayes_max = bayes_scores.min(), bayes_scores.max()
    if bayes_max > bayes_min:
        norm_bayes = (bayes_scores - bayes_min) / (bayes_max - bayes_min)
    else:
        norm_bayes = bayes_scores
    
    # Step 5: Combine the two signals into a final score.
    # final_score = alpha * (normalized content similarity) + (1 - alpha) * (normalized Bayesian rating)
    final_scores = alpha * norm_sim + (1 - alpha) * norm_bayes
    recipes_df['final_score'] = final_scores
    
    # Step 6: Get the top_k recipes (optionally, you may filter out recipes with low similarity).
    recommended_df = recipes_df.sort_values(by='final_score', ascending=False).head(top_k)
    
    # Return a subset of columns that are useful for display.
    return recommended_df[['id', 'name', 'minutes', 'tags', 'steps', 'bayesian_avg', 'similarity', 'final_score']]



In [21]:
#########################################
# 8. Example: Generate Recommendations Based on User Ingredients
#########################################
# For instance, suppose a user inputs the following ingredients:
user_input_ingredients = ["tomato", "basil", "garlic", "olive oil"]

recommended_recipes = recommend_recipes_by_ingredients(user_input_ingredients, top_k=10, alpha=0.5)
print("\n--- Recommended Recipes Based on Input Ingredients ---")
print(recommended_recipes)


--- Recommended Recipes Based on Input Ingredients ---
            id                                      name  minutes  \
112561  111113            italian dipping oil  for bread       10   
143723  279055  nicole s easy meatless mushroom marinara       45   
82806    92923                         favorite croutons       40   
18012   192320                barefoot contessa s pistou       10   
193423  120482                      spaghetti meat sauce       75   
143697   72283       nick stellino s no cook pizza sauce        2   
187184  434393               simple roasted garlic pesto       40   
77079   394646  easy marinara dipping sauce for the lazy       10   
90693   466483                garlic wine marinara sauce       30   
30642    62359                            bruschetta  16       10   

                                                     tags  \
112561  ['15-minutes-or-less', 'time-to-make', 'course...   
143723  ['60-minutes-or-less', 'time-to-make', 'course...   
8