## Loading Data and Libraries

In [3]:
import numpy as np
import pickle
import pandas as pd
from sklearn.decomposition import IncrementalPCA
from sklearn.metrics.pairwise import cosine_similarity

# Load Data
flavor_embedded_path = "flavor_embedded.pkl"
menu_embedded_path = "menu_embedded.pkl"
user_dataset_path = "user_dataset.csv"

with open(flavor_embedded_path, "rb") as f:
    flavor_embedded = pickle.load(f)

with open(menu_embedded_path, "rb") as f:
    menu_embedded = pickle.load(f)

# Defining the column
embedding_col = "Embeddings"

# Convert "Element" column to lowercase for consistent matching
flavor_embedded["Element"] = flavor_embedded["Element"].str.lower().str.strip()

## (Main 1) Applying IPCA

In [4]:
# Function to apply IPCA and ensure 1 × 300D output
def aggregate_embeddings_ipca(embedding_list):
    """
    Aggregates multiple ingredient embeddings into a single vector using IPCA.
    Ensures output is always 1 × 300D.
    """
    if isinstance(embedding_list, list) and len(embedding_list) > 0:
        embedding_array = np.array(embedding_list)
        
        # Ensure embeddings are 2D and contain valid data
        if embedding_array.ndim == 2 and embedding_array.shape[0] > 1:
            ipca = IncrementalPCA(n_components=1, batch_size=min(embedding_array.shape[0], 50))
            ipca_embedding = ipca.fit_transform(embedding_array.T).flatten()  # Ensure 1 × 300D
            
            # Fix cases where IPCA might return fewer than 300D
            if len(ipca_embedding) < 300:
                ipca_embedding = np.pad(ipca_embedding, (0, 300 - len(ipca_embedding)), mode='constant')
            
            return ipca_embedding
        elif embedding_array.ndim == 2 and embedding_array.shape[0] == 1:
            return embedding_array.flatten()  # Use single embedding if only one available
    
    return np.zeros(300)  # Default zero vector for empty cases

# Apply the fixed IPCA aggregation to both datasets
menu_embedded["Aggregated_Embeddings"] = menu_embedded[embedding_col].apply(aggregate_embeddings_ipca)
flavor_embedded["Aggregated_Embeddings"] = flavor_embedded[embedding_col].apply(aggregate_embeddings_ipca)

## Loading User Dataset

In [5]:
import ast

user_dataset = pd.read_csv(user_dataset_path)


def safe_convert_embedding(x):
    if isinstance(x, str):  # Check if the input is a string (stored as text in CSV)
        try:
            # Ensure commas exist before evaluating
            formatted_x = x.replace(" ", ",") if " " in x and "," not in x else x
            return np.array(ast.literal_eval(formatted_x))
        except (SyntaxError, ValueError):  # Handle cases where parsing fails
            print(f"Error parsing embedding: {x}")
            return np.zeros(300)  # Default to zero vector if parsing fails
    return x  # If already a NumPy array, return as is

## (Main 2) Using Cosine Similarity to match dishes

In [6]:
# Retrieve Matching Menus Using Cosine Similarity
def retrieve_matching_menus(flavor_inputs, user_id=None, top_n=5):
    """
    Finds the top N menu items that best match the given flavor inputs.
    Adjusts ranking based on user preferences if user_id is provided.
    """
    # Convert input flavors to lowercase
    flavor_inputs = [flavor.lower().strip() for flavor in flavor_inputs]
    
    # Retrieve the corresponding aggregated embeddings for all flavor inputs
    flavor_rows = flavor_embedded[flavor_embedded["Element"].isin(flavor_inputs)]
    if flavor_rows.empty:
        return pd.DataFrame({"Error": [f"No matching flavors found for {flavor_inputs}."]})

    # Compute mean of all selected flavor embeddings to form a single input vector
    flavor_vectors = np.stack(flavor_rows["Aggregated_Embeddings"].values)
    combined_flavor_vector = np.mean(flavor_vectors, axis=0).reshape(1, -1)

    # Compute cosine similarity with all menu items
    menu_vectors = np.stack(menu_embedded["Aggregated_Embeddings"].dropna().values)  # Stack menu embeddings
    similarities = cosine_similarity(combined_flavor_vector, menu_vectors)[0]

    # Adjust ranking based on user preferences
    if user_id is not None:
        user_embedding = get_user_embedding(user_id)
        similarities += cosine_similarity(user_embedding.reshape(1, -1), menu_vectors)[0] * 0.1  # Small weight adjustment

    # Get top menu matches
    top_indices = similarities.argsort()[-top_n:][::-1]  # Get top indices sorted by similarity
    top_menus = menu_embedded.iloc[top_indices][["menu_item", "description", "ingredients_mapped", "dish_id"]]
    top_menus["Similarity"] = similarities[top_indices]

    return top_menus

## (Main 3) Learn/ Update User Interaction Using BPR

In [7]:
# Get user embedding (default to neutral if new user)
def get_user_embedding(user_id):
    if user_id in user_dataset["user_id"].values:  # Just reading user_dataset
        return np.array(user_dataset.loc[user_dataset["user_id"] == user_id, "user_embedding"].values[0])
    
    return np.zeros(300)  # Default to neutral vector

# Update User Preferences Using BPR
def update_user_preference(user_id, dish_id, feedback):
    """
    This function remember or add new user to learn weight for each interaction
    """
    global user_dataset

    # Check if user exists in dataset
    if user_id not in user_dataset["user_id"].values:
        user_vector = np.zeros(300)
        new_user = pd.DataFrame([[user_id, user_vector.tolist(), [], []]], 
                                columns=["user_id", "user_embedding", "liked_dishes", "disliked_dishes"])
        user_dataset = pd.concat([user_dataset, new_user], ignore_index=True)

    # Retrieve user embedding and update it dynamically
    user_idx = user_dataset[user_dataset["user_id"] == user_id].index[0]
    user_embedding = np.array(user_dataset.at[user_idx, "user_embedding"])
    
    # Get dish embedding
    dish_embedding = menu_embedded.loc[menu_embedded["dish_id"] == dish_id, "Aggregated_Embeddings"]
    
    if dish_embedding.empty:
        print(f"Dish ID {dish_id} not found in dataset.")
        return
    
    dish_embedding = np.array(dish_embedding.values[0])

    # Bayesian Personalized Ranking (BPR) update
    if feedback == "positive":
        user_embedding += 0.05 * (dish_embedding - user_embedding)  # Move user vector closer to liked dish
        user_dataset.at[user_idx, "liked_dishes"] = user_dataset.at[user_idx, "liked_dishes"] + [dish_id]
    else:
        user_embedding -= 0.05 * (dish_embedding - user_embedding)  # Move user vector away from disliked dish
        user_dataset.at[user_idx, "disliked_dishes"] = user_dataset.at[user_idx, "disliked_dishes"] + [dish_id]
    
    # Save updated user embedding
    user_dataset.at[user_idx, "user_embedding"] = user_embedding.tolist()
    
    # Save back to CSV
    user_dataset.to_csv(user_dataset_path, index=False)
    print(f"User {user_id} preference updated based on {feedback} feedback for Dish {dish_id}.")

## Running the system

In [8]:
# --- Simulating the System ---
def interactive_session():
    user_id = input("Enter your User ID: ")
    
    flavor_choice = input("Enter a flavor (e.g., 'sweet', 'sour'): ").strip().lower()
    recommended_dishes = retrieve_matching_menus([flavor_choice], user_id=user_id)
    
    if recommended_dishes.empty:
        print("No recommendations found.")
        return
    
    print("\nRecommended Dishes:")
    print(recommended_dishes)

    selected_dish_id = int(input("Enter the Dish_ID of the dish you want to pick: "))
    feedback = input("Did you like the dish? (positive/negative): ").strip().lower()

    if feedback in ["positive", "negative"]:
        update_user_preference(user_id, selected_dish_id, feedback)
    else:
        print("Invalid input. Please enter 'positive' or 'negative'.")

# Run interactive system
interactive_session()

Enter your User ID:  1
Enter a flavor (e.g., 'sweet', 'sour'):  sour



Recommended Dishes:
                 menu_item                                        description  \
26          Makdous Fatteh  Ground beef yogurt tahini pomegranate molasses...   
290  Shashlik Paneer Tikka  Soft and creamy cubes of cottage cheese marina...   
291  Gilafi Chicken Kebabs  Minced Chicken marinated with rosemary and cho...   
479      Chana Chaat Papri  A street food classic, made with chickpeas cra...   
292     Pepper Corn Prawns  Tiger prawns, marinated with lemon yogurt and ...   

                                    ingredients_mapped  dish_id  Similarity  
26   ground_beef, yogurt, tahini, pomegranate_molas...       27    0.721379  
290  panir, bell_pepper, pineapple, onion, yogurt, ...      291    0.710954  
291  minced_chicken, rosemary, bell_pepper, yogurt,...      292    0.698447  
479  chickpea, paprika, frozen_vegetable, yogurt, t...      480    0.694142  
292  king_prawn, lemon_juice, yogurt, garlic, ginge...      293    0.682456  


Enter the Dish_ID of the dish you want to pick:  26
Did you like the dish? (positive/negative):  positive


User 1 preference updated based on positive feedback for Dish 26.
