In [1]:
!python3 /kaggle/input/bundle/generate_dataset.py

Found sheets: ['orders', 'inventory']
→ Using Orders sheet:   'orders'
→ Using Inventory sheet:'inventory'

📊 ORDERS DATA QUALITY SUMMARY:
   Total records: 165,319
   Date range: 2024-05-21 09:41:56.330000 to 2025-05-21 09:32:37.943000
   Unique orders: 81,775
   Unique SKUs: 67,907
   Records with discounts: 118,349
✅ Saved transformed orders → orders_data.csv (165,319×23)
   Quantity range: 0 to 167

📦 INVENTORY DATA QUALITY SUMMARY:
   Total SKUs: 6,825
   SKUs in stock: 6,049
   SKUs out of stock: 776
   Total inventory units: 23,032
   Stock level distribution:
     Critical_Low: 5,623
     Out_of_Stock: 776
     Low: 417
     Medium: 9

   Examples: ['9-5277002118|EB5690|5A', '9-3367010721|EE0018|9-12', '2-8738011201', '3RE2643|LP40|TU', '9-4768000763|EE0019|32']
✅ Saved transformed inventory → inventory_data.csv (6,825×4)

🎯 DATA PREPROCESSING COMPLETE!
   Files ready for bundling analysis:
   • orders_data.csv
   • inventory_data.csv


In [2]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m55.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0


In [3]:
import pandas as pd
import numpy as np
# SentenceTransformer is imported here because the pre-computation utility is now in this file
# It will ONLY be used if the embedding files need to be created for the first time.
from sentence_transformers import SentenceTransformer
import faiss # Using FAISS as requested
import warnings
import os
from itertools import combinations # For generating 3-item bundle combinations
import random # For picking users

warnings.filterwarnings('ignore')

# Define Kaggle paths globally
KAGGLE_WORKING_DIR = "/kaggle/working/"
EMBEDDINGS_PATH = os.path.join(KAGGLE_WORKING_DIR, 'product_embeddings.npy')
SKUS_JSON_PATH = os.path.join(KAGGLE_WORKING_DIR, 'product_skus.json')

# Input file paths
ORDERS_FILE_PATH = '/kaggle/working/orders_data.csv'
INVENTORY_FILE_PATH = '/kaggle/working/inventory_data.csv'
SUGGESTIONS_FILE_PATH = '/kaggle/input/bundle/final_dataset_with_suggestions2.csv'


HIGH_MARGIN_CATEGORIES = {"Προϊόντα Ομορφιάς", "BEAUTY"}

# --- 0. UTILITY FUNCTION FOR ONE-TIME EMBEDDING PRE-COMPUTATION ---
def create_and_save_embeddings_utility(
    orders_filepath=ORDERS_FILE_PATH,
    output_embeddings_path=EMBEDDINGS_PATH,
    output_skus_path=SKUS_JSON_PATH
    ):
    print("🚀 Starting one-time embedding generation for FAISS...")
    try:
        df_orders = pd.read_csv(orders_filepath)
    except FileNotFoundError:
        print(f"❌ Error: The file '{orders_filepath}' was not found for pre-computation.")
        return False
    products_to_embed = df_orders[['SKU', 'Item title']].drop_duplicates(subset=['SKU']).set_index('SKU')
    if products_to_embed.empty:
        print(f"❌ No products found in '{orders_filepath}' to embed.")
        return False
    print(f"  > Loading SentenceTransformer model (all-MiniLM-L6-v2) for pre-computation...")
    model = SentenceTransformer('all-MiniLM-L6-v2')
    print(f"  > Encoding {len(products_to_embed)} product titles...")
    titles = products_to_embed['Item title'].fillna('').tolist()
    embeddings_array = model.encode(titles, show_progress_bar=True)
    os.makedirs(KAGGLE_WORKING_DIR, exist_ok=True)
    np.save(output_embeddings_path, embeddings_array)
    products_to_embed.index.to_series().to_json(output_skus_path, orient='values')
    print(f"✔ Embeddings saved to '{output_embeddings_path}' & SKU list to '{output_skus_path}'")
    return True

# --- UTILITY FUNCTIONS (for vector math & Jaccard) ---
def cosine_similarity_numpy(vec1, vec2):
    if vec1 is None or vec2 is None: return 0.0
    vec1, vec2 = np.asarray(vec1, dtype=np.float32), np.asarray(vec2, dtype=np.float32)
    norm_vec1, norm_vec2 = np.linalg.norm(vec1), np.linalg.norm(vec2)
    if norm_vec1 == 0 or norm_vec2 == 0 : return 0.0
    return np.dot(vec1, vec2) / (norm_vec1 * norm_vec2)

def jaccard_similarity_sets(set1, set2):
    if not all(isinstance(s, set) for s in [set1, set2]): return 0.0
    if not set1 or not set2: return 0.0
    intersection = len(set1.intersection(set2))
    union = len(set1.union(set2))
    return intersection / union if union > 0 else 0.0

# --- 1. SETUP: LOAD ALL DATA & PRE-COMPUTED EMBEDDINGS ---
def load_and_engineer_data(
    orders_filepath, inventory_filepath, suggestions_filepath,
    embeddings_filepath, skus_json_filepath):
    print("🚀 Loading all data and pre-computed embeddings...")
    try:
        orders_df = pd.read_csv(orders_filepath, parse_dates=['CreatedDate'])
        inventory_df = pd.read_csv(inventory_filepath)
        suggestions_df = pd.read_csv(suggestions_filepath)
        precomputed_embeddings_array = np.load(embeddings_filepath)
        precomputed_skus = pd.read_json(skus_json_filepath, typ='series')
    except FileNotFoundError as e: # Error printed in main
        print(f"Initial data loading error: {e}")
        return None, None 
    print("  > Engineering master product DataFrame...")
    products = orders_df.groupby('SKU').agg(ItemTitle=('Item title', 'first'),Category=('Category', 'first'),AvgUnitPrice=('FinalUnitPrice', 'mean')).reset_index().set_index('SKU')
    if products.index.dtype == 'object' and 'SKU' in suggestions_df.columns: 
        suggestions_df['SKU'] = suggestions_df['SKU'].astype(str)
    
    if 'SKU' in suggestions_df.columns and suggestions_df.index.name != 'SKU':
        suggestions_df_indexed = suggestions_df.drop_duplicates(subset=['SKU']).set_index('SKU')
    elif 'SKU' in suggestions_df.columns: 
         suggestions_df_indexed = suggestions_df.drop_duplicates(subset=['SKU']).set_index('SKU')
    else: 
        print("Warning: 'SKU' column not found in suggestions_df. Cannot merge 'similar_items'.")
        suggestions_df_indexed = pd.DataFrame(index=products.index) 
        suggestions_df_indexed['similar_items'] = np.nan

    products = pd.merge(products, suggestions_df_indexed[['similar_items']], on='SKU', how='left')
    
    embedding_map = pd.Series(list(precomputed_embeddings_array), index=precomputed_skus)
    products['NameEmbedding'] = products.index.map(embedding_map)
    if products['NameEmbedding'].isnull().all(): # Error printed in main
        print("Critical error: No embeddings mapped.")
        return None, None 
    print(f"    Successfully mapped {products['NameEmbedding'].notna().sum()} pre-computed embeddings.")
    print("  > Calculating stock scores...")
    max_date = orders_df['CreatedDate'].max()
    recent_date_threshold = max_date - pd.Timedelta(days=90) if pd.notna(max_date) else pd.Timestamp.min
    recent_sales = orders_df[orders_df['CreatedDate'] >= recent_date_threshold]
    sales_velocity_df = recent_sales.groupby('SKU')['Quantity'].sum().reset_index()
    sales_velocity_df['SalesVelocity'] = sales_velocity_df['Quantity'] / 90
    products = pd.merge(products, sales_velocity_df[['SKU', 'SalesVelocity']].set_index('SKU'), on='SKU', how='left')
    products = pd.merge(products, inventory_df[['SKU', 'Quantity']].set_index('SKU'), on='SKU', how='left')
    products.fillna({'SalesVelocity': 0, 'Quantity': 0}, inplace=True)
    products['StockToSalesRatio'] = products['Quantity'] / (products['SalesVelocity'] + 1e-6)
    products['StockLevelScore'] = products['StockToSalesRatio'].rank(pct=True).fillna(0)
    print("  > Engineering purchase vectors...")
    user_item_interactions = orders_df.groupby('SKU')['UserID'].apply(set).to_dict()
    products['PurchaseVector'] = pd.Series(products.index.map(user_item_interactions), index=products.index).apply(lambda v: v if isinstance(v, set) else set())
    print("✔ Master product features complete.")
    print("🚀 Creating user profile vectors...")
    user_profiles = {}
    sku_to_embedding = products['NameEmbedding'].dropna().to_dict()
    user_item_groups = orders_df.dropna(subset=['UserID', 'SKU', 'FinalUnitPrice']).groupby('UserID')
    for user_id, group in user_item_groups:
        item_embeddings = [sku_to_embedding.get(sku) for sku in group['SKU'] if sku in sku_to_embedding and sku_to_embedding.get(sku) is not None]
        profile_vec = np.mean([emb for emb in item_embeddings if emb is not None], axis=0) if any(emb is not None for emb in item_embeddings) else None
        user_profiles[user_id] = {'profile_vector': profile_vec, 'avg_price': group['FinalUnitPrice'].mean() if not group['FinalUnitPrice'].empty else 0}
    print(f"✔ Created {len(user_profiles)} user profiles.")
    return products, user_profiles

def build_faiss_index_from_loaded_embeddings(products_df_with_embeddings):
    print("🚀 Building FAISS index from loaded embeddings...")
    embeddings_series = products_df_with_embeddings['NameEmbedding'].dropna()
    if embeddings_series.empty: return None, None
    faiss_embeddings_array = np.array(embeddings_series.tolist()).astype('float32')
    faiss_skus_in_index = embeddings_series.index.tolist()
    if faiss_embeddings_array.ndim != 2 or faiss_embeddings_array.shape[0] == 0: return None, None
    d = faiss_embeddings_array.shape[1]
    faiss.normalize_L2(faiss_embeddings_array) 
    index = faiss.IndexFlatL2(d)
    index.add(faiss_embeddings_array)
    print(f"✔ FAISS index built with {index.ntotal} items.")
    return index, faiss_skus_in_index

# --- 2. REFACTOR: CALCULATE SCORE FOR A SINGLE CANDIDATE ITEM ---
def calculate_single_candidate_score(
    candidate_sku: str, target_sku_for_context: str, products_df: pd.DataFrame,
    weights: dict, user_id: str, user_profiles: dict
    ):
    if candidate_sku not in products_df.index or target_sku_for_context not in products_df.index:
        return 0.0 

    cand_item = products_df.loc[candidate_sku]
    target_item = products_df.loc[target_sku_for_context]
    user_profile = user_profiles.get(user_id, {})
    user_profile_vector = user_profile.get('profile_vector')
    user_avg_price = user_profile.get('avg_price', 0)

    purchase_sim = jaccard_similarity_sets(target_item.get('PurchaseVector'), cand_item.get('PurchaseVector'))
    price_context_sim = np.exp(-10 * (abs(target_item.AvgUnitPrice - cand_item.AvgUnitPrice) / (target_item.AvgUnitPrice + 1e-6)))
    name_sim = cosine_similarity_numpy(target_item.NameEmbedding, cand_item.NameEmbedding)
    category_sim = 1.0 if target_item.Category == cand_item.Category else 0.0
    user_taste_sim = cosine_similarity_numpy(cand_item.NameEmbedding, user_profile_vector)
    price_affinity_sim = np.exp(-5 * (abs(cand_item.AvgUnitPrice - user_avg_price) / (user_avg_price + 1e-6))) if user_avg_price > 0 else 0.0
    stock_level_score = cand_item.StockLevelScore
    
    total_score = (
        (purchase_sim * weights.get('purchase', 0)) + 
        (price_context_sim * weights.get('price_context', 0)) + 
        (name_sim * weights.get('name', 0)) + 
        (category_sim * weights.get('category', 0)) + 
        (user_taste_sim * weights.get('user_taste', 0)) + 
        (price_affinity_sim * weights.get('price_affinity', 0)) + 
        (stock_level_score * weights.get('stock_level', 0))
    )
    total_active_weight = sum(v for k,v in weights.items() if k in ['purchase','price_context','name','category','user_taste','price_affinity','stock_level'] and v > 0)
    return total_score / total_active_weight if total_active_weight > 0 else 0.0

# --- 3. NEW MAIN FUNCTION FOR SMART BUNDLING ---
def suggest_smart_bundle_for_user(
    target_item_name: str, user_id: str, products_df: pd.DataFrame,
    user_profiles: dict, weights: dict,
    bundle_score_bonus_3_item: float = 1.1, 
    min_quality_threshold: float = 0.3, 
    prefer_3_item_score_ratio: float = 1.0 
    ):
    # This function's internal print statements can be verbose for 1000 examples,
    # so they are commented out here but can be re-enabled for debugging individual calls.
    # print(f"\n--- Suggesting Smart Bundle for '{target_item_name}' for user '{user_id}' ---") 
    
    target_items_query = products_df[products_df['ItemTitle'].str.lower() == target_item_name.lower()]
    if target_items_query.empty: return {"error": f"Item '{target_item_name}' not found."}
    target_item_row = target_items_query.iloc[0]
    target_sku = target_item_row.name
    
    similar_items_str = target_item_row.get('similar_items', '')
    if pd.isna(similar_items_str) or not similar_items_str:
        return {"error": f"Item '{target_item_name}' has no pre-defined 'similar_items' to create bundles from."}
    
    candidate_names = [name.strip() for name in similar_items_str.split('|')]
    candidate_skus_from_names_df = products_df[products_df['ItemTitle'].isin(candidate_names)]
    alternative_skus = sorted(list(set(sku for sku in candidate_skus_from_names_df.index.tolist() if sku != target_sku)))

    if not alternative_skus:
        return {"error": "No valid alternative items (excluding the target itself) found from 'similar_items' list to create bundles."}

    # print(f"  > Considering {len(alternative_skus)} unique alternatives for bundling with '{target_item_name}' (SKU: {target_sku}): {alternative_skus}")

    best_2_item_bundle_details = None
    max_2_item_score = -1.0
    
    for alt_sku in alternative_skus:
        score = calculate_single_candidate_score(alt_sku, target_sku, products_df, weights, user_id, user_profiles)
        if score > max_2_item_score:
            max_2_item_score = score
            best_2_item_bundle_details = {'skus': tuple(sorted((target_sku, alt_sku))), 'score': score}
    
    best_3_item_bundle_details = None
    max_3_item_score = -1.0

    if len(alternative_skus) >= 2:
        for alt_sku_A, alt_sku_B in combinations(alternative_skus, 2):
            score_A = calculate_single_candidate_score(alt_sku_A, target_sku, products_df, weights, user_id, user_profiles)
            score_B = calculate_single_candidate_score(alt_sku_B, target_sku, products_df, weights, user_id, user_profiles)
            current_3_item_score = ((score_A + score_B) / 2) * bundle_score_bonus_3_item 
            if current_3_item_score > max_3_item_score:
                max_3_item_score = current_3_item_score
                best_3_item_bundle_details = {'skus': tuple(sorted((target_sku, alt_sku_A, alt_sku_B))), 'score': current_3_item_score}
                
    chosen_bundle_skus = None
    chosen_bundle_type = ""
    final_bundle_score = -1.0

    # print(f"  > Best 2-item bundle (Target + Alt) score: {max_2_item_score:.3f} (Bundle SKUs: {best_2_item_bundle_details['skus'] if best_2_item_bundle_details else 'N/A'})")
    # print(f"  > Best 3-item bundle (Target + Alt1 + Alt2) score: {max_3_item_score:.3f} (Bundle SKUs: {best_3_item_bundle_details['skus'] if best_3_item_bundle_details else 'N/A'})")
    
    three_item_viable = best_3_item_bundle_details and best_3_item_bundle_details['score'] >= min_quality_threshold
    two_item_viable = best_2_item_bundle_details and best_2_item_bundle_details['score'] >= min_quality_threshold

    if three_item_viable and two_item_viable:
        if best_3_item_bundle_details['score'] >= best_2_item_bundle_details['score'] * prefer_3_item_score_ratio:
            chosen_bundle_skus = best_3_item_bundle_details['skus']
            chosen_bundle_type = "3-Item Bundle"
            final_bundle_score = best_3_item_bundle_details['score']
        else:
            chosen_bundle_skus = best_2_item_bundle_details['skus']
            chosen_bundle_type = "2-Item Bundle"
            final_bundle_score = best_2_item_bundle_details['score']
    elif three_item_viable:
        chosen_bundle_skus = best_3_item_bundle_details['skus']
        chosen_bundle_type = "3-Item Bundle"
        final_bundle_score = best_3_item_bundle_details['score']
    elif two_item_viable:
        chosen_bundle_skus = best_2_item_bundle_details['skus']
        chosen_bundle_type = "2-Item Bundle"
        final_bundle_score = best_2_item_bundle_details['score']
    
    if not chosen_bundle_skus:
        return {"message": f"No suitable bundle found meeting quality thresholds for '{target_item_name}'. Best 2-item score: {max_2_item_score:.2f}, Best 3-item score: {max_3_item_score:.2f}."}

    bundle_item_details_list = [products_df.loc[sku] for sku in chosen_bundle_skus]
    sum_individual_prices = sum(item.AvgUnitPrice for item in bundle_item_details_list)
    
    def get_main_category(cat_string):
        if pd.isna(cat_string) or not isinstance(cat_string, str): return "Unknown"
        return cat_string.split('/')[0].strip()

    item_margin_flags = [1 if get_main_category(item_detail.get('Category', "Unknown")) in HIGH_MARGIN_CATEGORIES else 0 for item_detail in bundle_item_details_list]
    avg_bundle_margin_flag = np.mean(item_margin_flags) if item_margin_flags else 0
    # print(f"  > Bundle Category Mix: {avg_bundle_margin_flag*100:.0f}% high-margin items.")

    base_discount_rate = 0.15 if len(chosen_bundle_skus) == 3 else 0.10
    margin_based_discount_reduction_factor = avg_bundle_margin_flag * 0.40
    effective_base_discount_rate = base_discount_rate * (1 - margin_based_discount_reduction_factor)
    # print(f"    Base disc: {base_discount_rate:.2%}, Margin factor: {margin_based_discount_reduction_factor:.2%}, Eff. base: {effective_base_discount_rate:.2%}")

    avg_stock_score = np.mean([item.StockLevelScore for item in bundle_item_details_list])
    stock_discount_boost = 0.05 if avg_stock_score > 0.7 else 0.0
    if avg_bundle_margin_flag > 0.5 and stock_discount_boost > 0:
         stock_discount_boost *= 0.5
         # print(f"    Stock boost reduced for high-margin to: {stock_discount_boost:.2%}")

    total_discount_rate = effective_base_discount_rate + stock_discount_boost
    total_discount_rate = max(0.05, min(total_discount_rate, 0.40))
    # print(f"    Total discount rate: {total_discount_rate:.2%}")

    bundle_price = sum_individual_prices * (1 - total_discount_rate)
    savings = sum_individual_prices - bundle_price

    return {
        "bundle_type": chosen_bundle_type,
        "bundle_skus": chosen_bundle_skus,
        "bundle_item_titles": [item.ItemTitle for item in bundle_item_details_list],
        "personalized_score": final_bundle_score,
        "sum_individual_prices": sum_individual_prices,
        "bundle_price": bundle_price,
        "total_discount_applied": total_discount_rate,
        "savings": savings,
        "target_sku_for_reference": target_sku 
    }

# --- 4. EXAMPLE USAGE (EXPANDED DEMONSTRATION) ---
if __name__ == '__main__':
    print("Starting Smart Bundling Engine...")
    if not (os.path.exists(EMBEDDINGS_PATH) and os.path.exists(SKUS_JSON_PATH)):
        print(f"⚠️ Pre-computed embeddings not found. Running one-time pre-computation...")
        if create_and_save_embeddings_utility(): print("✔ Embedding pre-computation successful.")
        else: print("❌ Embedding pre-computation FAILED. Exiting."); exit()
    else: print(f"✔ Pre-computed embeddings found. Loading them.")
            
    master_products_df, user_profiles = load_and_engineer_data(
        ORDERS_FILE_PATH, INVENTORY_FILE_PATH, SUGGESTIONS_FILE_PATH,
        EMBEDDINGS_PATH, SKUS_JSON_PATH
    )
    
    if master_products_df is None or user_profiles is None:
        print("❌ Master feature engineering or user profile creation failed. Exiting.")
        exit()
    if master_products_df.empty or not user_profiles:
        print("❌ Product data or user profiles are empty after loading/engineering. Exiting.")
        exit()

    faiss_index, _ = build_faiss_index_from_loaded_embeddings(master_products_df)
        
    similarity_weights = {
            'user_taste': 0.4, 'price_affinity': 0.2, 'purchase': 0.15,
            'stock_level': 0.1, 'price_context': 0.05,
            'category': 0.05, 'name': 0.05,
        }
    
    # --- Prepare lists for expanded demonstration ---
    try:
        orders_df_for_demo = pd.read_csv(ORDERS_FILE_PATH)
        user_counts = orders_df_for_demo['UserID'].value_counts()
        # Get users who have made at least 3 orders and have a profile
        potential_active_users = user_counts[user_counts >= 3].index.tolist()
        active_user_list = [uid for uid in potential_active_users if uid in user_profiles and user_profiles[uid].get('profile_vector') is not None]
        
        if len(active_user_list) < 2:
            print("❌ Need at least 2 active users with profiles for the comparative demo. Trying with any available users.")
            active_user_list = [uid for uid in user_counts.index.tolist() if uid in user_profiles and user_profiles[uid].get('profile_vector') is not None]
            if len(active_user_list) < 2:
                print("❌ Still not enough users with profiles for comparative demo. Exiting.")
                exit()
        
        print(f"\nFound {len(active_user_list)} active users with profiles for demo.")
        # Select two distinct users for the comparative demo
        user_A_id = active_user_list[0]
        user_B_id = active_user_list[1]
        print(f"Comparative Demo Users: User A ({user_A_id}), User B ({user_B_id})")

    except FileNotFoundError:
        exit(f"Error loading '{ORDERS_FILE_PATH}' for demo user selection.")
    except Exception as e:
        exit(f"Error preparing active user list: {e}")

    try:
        suggestions_df_for_demo = pd.read_csv(SUGGESTIONS_FILE_PATH)
        if 'Item title' not in suggestions_df_for_demo.columns or 'similar_items' not in suggestions_df_for_demo.columns:
            exit(f"Critical columns missing in '{SUGGESTIONS_FILE_PATH}'.")
        
        target_items_with_suggestions = suggestions_df_for_demo.dropna(subset=['similar_items', 'Item title'])
        target_items_with_suggestions = target_items_with_suggestions[target_items_with_suggestions['similar_items'].str.strip() != '']
        
        target_item_names_for_demo_list = [
            name for name in target_items_with_suggestions['Item title'].unique() 
            if name in master_products_df['ItemTitle'].values # Ensure item exists in master_products_df
        ]
        if not target_item_names_for_demo_list:
            exit(f"No items from '{SUGGESTIONS_FILE_PATH}' with 'similar_items' also exist in master product list for demo.")
        print(f"Found {len(target_item_names_for_demo_list)} target items with suggestions for comparative demo.")
        random.shuffle(target_item_names_for_demo_list) # Shuffle for variety
    except FileNotFoundError:
        exit(f"Error loading '{SUGGESTIONS_FILE_PATH}' for demo items.")
    except Exception as e:
        exit(f"Error preparing target item list: {e}")

    # --- Comparative Personalization Demo Loop (up to 100 products) ---
    num_comparative_examples = min(1000, len(target_item_names_for_demo_list))
    print(f"\n\n{'='*70}\n✨ COMPARATIVE PERSONALIZATION DEMO (for {num_comparative_examples} products) ✨\n{'='*70}")

    for i in range(num_comparative_examples):
        target_item_name = target_item_names_for_demo_list[i]
        print(f"\n--- DEMO PRODUCT {i+1} of {num_comparative_examples} ---")
        print(f"🎯 Target Item: '{target_item_name}'")
        print("-" * 50)

        # Recommendation for User A
        print(f"  👤 For User A ({user_A_id}):")
        bundle_A = suggest_smart_bundle_for_user(
            target_item_name=target_item_name, user_id=user_A_id,
            products_df=master_products_df, user_profiles=user_profiles, weights=similarity_weights
        )
        if "error" in bundle_A: print(f"    Error: {bundle_A['error']}")
        elif "message" in bundle_A: print(f"    Message: {bundle_A['message']}")
        elif "bundle_type" in bundle_A:
            print(f"    Recommends: {bundle_A['bundle_type']}")
            additional_titles_A = [title for sku, title in zip(bundle_A['bundle_skus'], bundle_A['bundle_item_titles']) if sku != bundle_A.get('target_sku_for_reference')]
            if additional_titles_A: print(f"    With: {', '.join(additional_titles_A)}")
            else: print("    (Primarily the target item itself)")
            print(f"    Score: {bundle_A['personalized_score']:.3f}, Price: ${bundle_A['bundle_price']:.2f} (Save ${bundle_A['savings']:.2f})")
        else: print("    Unexpected result for User A.")
        
        print("-" * 30)

        # Recommendation for User B
        print(f"  👤 For User B ({user_B_id}):")
        bundle_B = suggest_smart_bundle_for_user(
            target_item_name=target_item_name, user_id=user_B_id,
            products_df=master_products_df, user_profiles=user_profiles, weights=similarity_weights
        )
        if "error" in bundle_B: print(f"    Error: {bundle_B['error']}")
        elif "message" in bundle_B: print(f"    Message: {bundle_B['message']}")
        elif "bundle_type" in bundle_B:
            print(f"    Recommends: {bundle_B['bundle_type']}")
            additional_titles_B = [title for sku, title in zip(bundle_B['bundle_skus'], bundle_B['bundle_item_titles']) if sku != bundle_B.get('target_sku_for_reference')]
            if additional_titles_B: print(f"    With: {', '.join(additional_titles_B)}")
            else: print("    (Primarily the target item itself)")
            print(f"    Score: {bundle_B['personalized_score']:.3f}, Price: ${bundle_B['bundle_price']:.2f} (Save ${bundle_B['savings']:.2f})")
        else: print("    Unexpected result for User B.")
        print("="*70)

2025-05-25 08:04:27.079949: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748160267.360784      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748160267.439598      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Starting Smart Bundling Engine...
⚠️ Pre-computed embeddings not found. Running one-time pre-computation...
🚀 Starting one-time embedding generation for FAISS...
  > Loading SentenceTransformer model (all-MiniLM-L6-v2) for pre-computation...


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  > Encoding 67907 product titles...


Batches:   0%|          | 0/2123 [00:00<?, ?it/s]

✔ Embeddings saved to '/kaggle/working/product_embeddings.npy' & SKU list to '/kaggle/working/product_skus.json'
✔ Embedding pre-computation successful.
🚀 Loading all data and pre-computed embeddings...
  > Engineering master product DataFrame...
    Successfully mapped 67907 pre-computed embeddings.
  > Calculating stock scores...
  > Engineering purchase vectors...
✔ Master product features complete.
🚀 Creating user profile vectors...
✔ Created 12942 user profiles.
🚀 Building FAISS index from loaded embeddings...
✔ FAISS index built with 67907 items.

Found 6499 active users with profiles for demo.
Comparative Demo Users: User A (44140.0), User B (49464.0)
Found 6786 target items with suggestions for comparative demo.


✨ COMPARATIVE PERSONALIZATION DEMO (for 1000 products) ✨

--- DEMO PRODUCT 1 of 1000 ---
🎯 Target Item: 'Chiara Ferragni Lip Plumper - Volume Kiss 01 Honey'
--------------------------------------------------
  👤 For User A (44140.0):
    Recommends: 2-Item Bundle
    