<a href="https://colab.research.google.com/github/nancy-kataria/NexTrade/blob/main/product_matching.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

=== Imports ===

In [1]:
import kagglehub
import pandas as pd
import os
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

=== 1. Dataset Download ===

In [2]:
# Download latest version
print("Dowlaod Dataset...")
path = kagglehub.dataset_download("vivek468/superstore-dataset-final")
print(f"Dataset downloaded to: {path}")
csv_file_path = os.path.join(path, "Sample - Superstore.csv")
print(f"Reading data from: {csv_file_path}")

Dowlaod Dataset...
Dataset downloaded to: /kaggle/input/superstore-dataset-final
Reading data from: /kaggle/input/superstore-dataset-final/Sample - Superstore.csv


=== 2. Load & Clean Data ===

In [3]:
try:
    superstore_data = pd.read_csv(csv_file_path, encoding='ISO-8859-1')
    print("Data loaded successfully.")
except FileNotFoundError:
    print(f"ERROR: File not found at {csv_file_path}.")
    exit()

Data loaded successfully.


In [4]:
# Keep necessary columns
columns_to_keep = ['Order ID', 'Order Date', 'Ship Date', 'Customer ID', 'Country', 'City', 'State', 'Postal Code', 'Product ID', 'Product Name', 'Sales', 'Quantity', 'Category', 'Sub-Category']
superstore_data = superstore_data[columns_to_keep]

In [None]:
# Display the first 5 rows to check the data
print("First 5 rows of data:")
print(superstore_data.head())

In [5]:
# Convert dates
superstore_data['Order Date'] = pd.to_datetime(superstore_data['Order Date'])
superstore_data['Ship Date'] = pd.to_datetime(superstore_data['Ship Date'])

In [6]:
# drop rows with missing any necessary columns
superstore_data.dropna(subset=columns_to_keep, inplace=True)

In [12]:
print("\n--- Finding Customers with Most Transactions ---")

# Count the number of rows (transaction line items) for each Customer ID
customer_transaction_counts = superstore_data.groupby('Customer ID').size()

# Sort the counts in descending order
customer_transaction_counts_sorted = customer_transaction_counts.sort_values(ascending=False)

# Display the top, e.g., 10 customers
print("Top 10 Customers by Number of Transaction Entries:")
print(customer_transaction_counts_sorted.head(10))

# Get the Customer ID with the absolute highest count
if not customer_transaction_counts_sorted.empty:
    top_customer_id = customer_transaction_counts_sorted.index[0]
    top_customer_count = customer_transaction_counts_sorted.iloc[0]
    print(f"\nCustomer with the most transaction entries: '{top_customer_id}' ({top_customer_count} entries)")
else:
    top_customer_id = None # Handle case where data might be empty
    print("\nCould not determine top customer.")

print("-" * 40)


--- Finding Customers with Most Transactions ---
Top 10 Customers by Number of Transaction Entries:
Customer ID
WB-21850    37
MA-17560    34
JL-15835    34
PP-18955    34
EH-13765    32
JD-15895    32
SV-20365    32
CK-12205    32
AP-10915    31
EP-13915    31
dtype: int64

Customer with the most transaction entries: 'WB-21850' (37 entries)
----------------------------------------


=== (TEST STAGE) 2b. Evaluation Split (Time-Based) ===

In [None]:
print("\n--- Splitting Data for Evaluation ---")
# Sort data by order date
superstore_data_sorted = superstore_data.sort_values('Order Date').copy()
superstore_data_sorted.reset_index(drop=True, inplace=True) # Optional: Reset index

# Define split point (e.g., 80% train, 20% test based on row count after sorting)
# Alternatively, pick a specific date for splitting
split_index = int(len(superstore_data_sorted) * 0.8)
train_df = superstore_data_sorted.iloc[:split_index].copy()
test_df = superstore_data_sorted.iloc[split_index:].copy()

print(f"Training data shape: {train_df.shape}")
print(f"Testing data shape: {test_df.shape}")
if not train_df.empty:
    print(f"Training data period: {train_df['Order Date'].min()} to {train_df['Order Date'].max()}")
if not test_df.empty:
    print(f"Testing data period: {test_df['Order Date'].min()} to {test_df['Order Date'].max()}")

# Identify users present in the test set for evaluation
test_users = test_df['Customer ID'].unique()
print(f"Number of unique users in test set: {len(test_users)}")

=== 3. Precomputation  ===

In [7]:
# 1. Product Popularity
product_popularity = superstore_data.groupby('Product ID').agg({
    'Product Name': 'first',
    'Category': 'first',
    'Sub-Category': 'first',
    'Quantity': 'sum',
    'Sales': 'sum'
}).reset_index()

# Normalize popularity score
product_popularity['popularity_score'] = product_popularity['Quantity'] / product_popularity['Quantity'].max()

# 2. Content-Based Info Preparation
superstore_data['product_info'] = (
    superstore_data['Product Name'].astype(str) + ' ' +
    superstore_data['Category'].astype(str) + ' ' +
    superstore_data['Sub-Category'].astype(str)
)

# One row per product
products = superstore_data.drop_duplicates(subset='Product ID')[
    ['Product ID', 'Product Name', 'Category', 'Sub-Category', 'product_info']
]

# 3. TF-IDF Matrix and Cosine Similarity
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(products['product_info'])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# 4. Product Index Mapping
product_indices = pd.Series(products.index, index=products['Product ID']).drop_duplicates()

# 5. User-Product Matrix and Product Similarity for Collaborative Filtering
user_product_matrix = superstore_data.pivot_table(
    index='Customer ID',
    columns='Product ID',
    values='Quantity',
    aggfunc='sum'
).fillna(0)

product_similarity = cosine_similarity(user_product_matrix.T)
product_similarity_df = pd.DataFrame(
    product_similarity,
    index=user_product_matrix.columns,
    columns=user_product_matrix.columns
)

=== 4. Recommendation Functions ===

In [15]:
# === Helper Functions ===
def get_customer_data(customer_id, df):
    """Fetches purchase data and unique purchased product IDs for a customer.

    Args:
        customer_id (str): The ID of the target customer.
        df (pd.DataFrame): The main DataFrame containing all transaction data.
                           Must include 'Customer ID' and 'Product ID'.

    Returns:
        tuple[pd.DataFrame, np.ndarray]: A tuple containing:
            - customer_data (pd.DataFrame): A DataFrame filtered to only include
                                            rows for the given customer_id. Returns
                                            an empty DataFrame if customer not found.
            - purchased_ids (np.ndarray): A NumPy array of unique Product IDs
                                          purchased by the customer. Returns an
                                          empty array if customer not found.
    """
    customer_data = df[df['Customer ID'] == customer_id].copy()
    # Using .copy() is good practice here to prevent potential SettingWithCopyWarning
    # if the returned DataFrame is modified later in another function.
    purchased_ids = customer_data['Product ID'].unique()
    return customer_data, purchased_ids

def get_unseen_products(customer_id, df, product_df):
    """
    Get a list of products the customer hasn't purchased yet

    Args:
      customer_id (str): ID of the target customer.
      df (pd.DataFrame): Full transaction data (e.g., superstore_data)
                           used to find customer history.
      product_df (pd.DataFrame): DataFrame of all products to recommend
                                   from (e.g., product_popularity).

    Returns:
      pd.DataFrame: filtered product_df with only unseen products
      pd.DataFrame: list of purchased Product IDs for fallback logic
    """

    _, purchased_ids = get_customer_data(customer_id, df)
    return product_df[~product_df['Product ID'].isin(purchased_ids)], purchased_ids

def add_fallback_if_needed(recommendations, purchased, product_df, n, by):
    """
    Add fallback recommendations if there aren't enough unseen products to recommend
    This uses globally popular products (based on 'Quantity' or 'Sales') to fill the gap

    Args:
      recommendations: filtered list of unseen, ranked products
      purchased: list of already purchased product IDs
      product_df: global product list (e.g., product_popularity)
      n: number of products we want to recommend
      by: popularity metric ('Quantity' or 'Sales')

    Returns:
     pd.DataFrame: final DataFrame of n recommendations
    """

    if len(recommendations) < n:
        print(f"Customer has only {len(recommendations)} new products available. Showing global popular items instead.")
        fallback = get_global_popular_products(n=n, by=by)
        fallback = fallback[~fallback['Product ID'].isin(purchased)]
        recommendations = pd.concat([recommendations, fallback]).drop_duplicates('Product ID')
    return recommendations

def get_customer_preferences(customer_id, df):
    """
    Gets the customer's most frequent categories and sub-categories.

    Analyzes a customer's purchase history to find the categories and
    sub-categories they interact with most often, based on the count
    of purchases in each. Used for personalized popularity recommendations.

    Args:
        customer_id (str): The ID of the target customer.
        df (pd.DataFrame): The DataFrame containing transaction data, including
                           'Customer ID', 'Category', and 'Sub-Category' columns.

    Returns:
        tuple[list[str], list[str]]: A tuple containing two lists:
            - The first list contains category names, sorted by frequency (most frequent first).
            - The second list contains sub-category names, sorted by frequency.
            Returns two empty lists ([], []) if the customer has no purchase history in df.
    """
    customer_data, _ = get_customer_data(customer_id, df)
    if customer_data.empty:
        return [], []
    top_categories = customer_data['Category'].value_counts().index.tolist()
    top_subcategories = customer_data['Sub-Category'].value_counts().index.tolist()
    return top_categories, top_subcategories



# === Calculation  Functions ===
def get_global_popular_products(n=10, by='Quantity'):
    """
    Recommends top-N globally popular products. Sorts all products based on a specified metric ('Quantity' or 'Sales') and returns the top N. Does not consider customer history.

    Args:
        n (int, optional): The number of products to recommend. Defaults to 10.
        by (str, optional): The metric to sort popularity by ('Quantity' or 'Sales'). Defaults to 'Quantity'.

    Returns:
        pd.DataFrame: A DataFrame containing the top N popular products with columns ['Product ID', 'Product Name', 'Category', 'Sub-Category', <by>]. Returns an empty DataFrame if an invalid 'by' parameter is provided (though it currently raises ValueError).

    Raises:
        ValueError: If 'by' is not 'Quantity' or 'Sales'.
    """
    if by not in ['Quantity', 'Sales']:
        raise ValueError("Parameter 'by' must be either 'Quantity' or 'Sales'")

    return product_popularity.sort_values(by=by, ascending=False).head(n)[['Product ID', 'Product Name', 'Category', 'Sub-Category', by]]

def get_content_similar_items(product_id, top_n=5):
    """
    Recommends products similar to a given product based on content.

      Uses precomputed TF-IDF vectors and cosine similarity based on product
      name, category, and sub-category.

      Args:
          product_id (str): The ID of the product to find similar items for.
          top_n (int, optional): The number of similar products to return.
                                Defaults to 5.

      Returns:
          pd.DataFrame: A DataFrame containing the top_n similar products with
                        columns ['Product Name', 'Category', 'Sub-Category'].
                        Returns an empty DataFrame if the product_id is not found.
      """
    if product_id not in product_indices.index:
      print(f"Product ID '{product_id}' not found in product indices.")
      return pd.DataFrame() # Or an empty list

    idx = product_indices[product_id]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    product_idxs = [i[0] for i in sim_scores]

    return products.iloc[product_idxs][['Product Name', 'Category', 'Sub-Category']]

def get_collaborative_similar_items(product_id, top_n=5):
    """Recommends products similar to a given product using item-item collaborative filtering.

    Uses a precomputed product similarity matrix based on user co-purchase patterns.

    Args:
        product_id (str): The ID of the product to find collaboratively similar items for.
        top_n (int, optional): The number of similar products to return. Defaults to 5.

    Returns:
        pd.DataFrame or str: A DataFrame containing the top_n similar products
                             with columns ['Product ID', 'Similarity Score', 'Product Name',
                             'Category', 'Sub-Category']. Returns a string message if the
                             product_id is not found in the similarity matrix. (Consider
                             changing string returns to an empty DataFrame).
    """

    if product_id not in product_similarity_df.columns:
        print(f"Product {product_id} not found in dataset.")
        return pd.DataFrame() # Or an empty list
    similar_scores = product_similarity_df[product_id].sort_values(ascending=False)
    # return similar_scores[1:top_n+1]

    recommended = similar_scores[1:top_n+1].reset_index()
    recommended.columns = ['Product ID', 'Similarity Score']
    return recommended.merge(
        product_popularity[['Product ID', 'Product Name', 'Category', 'Sub-Category']],
        on='Product ID', how='left'
    )



# === Main Recommendation Functions ===
def recommend_popular(customer_id=None, personalized=False, n=10, by='Quantity'):
    """Recommends popular products, optionally personalized for a customer.

    Modes:
    1. Global: If customer_id is None, returns globally popular products.
    2. Unseen for Customer: If customer_id is provided and personalized=False,
       returns globally popular products not yet purchased by the customer.
    3. Personalized Popular: If customer_id is provided and personalized=True,
       returns popular products from the customer's preferred categories/sub-categories
       that they haven't purchased yet.

    Includes fallback to global popular items if not enough personalized/unseen
    items are found.

    Args:
        customer_id (str, optional): The ID of the customer. Defaults to None.
        personalized (bool, optional): Whether to filter by customer preferences.
                                       Defaults to False. Ignored if customer_id is None.
        n (int, optional): The number of products to recommend. Defaults to 10.
        by (str, optional): The metric for popularity ('Quantity' or 'Sales').
                            Defaults to 'Quantity'.

    Returns:
        pd.DataFrame: A DataFrame containing the recommended products with columns
                      ['Product ID', 'Product Name', 'Category', 'Sub-Category', <metric_used>].
                      Returns global recommendations if customer preferences cannot be determined.
                      May return fewer than n items if insufficient products are available
                      even after fallback.

    Raises:
        ValueError: If 'by' is not 'Quantity' or 'Sales'.
    """
    if by not in ['Quantity', 'Sales']:
        raise ValueError("Parameter 'by' must be either 'Quantity' or 'Sales'")

    if customer_id is None:
        return get_global_popular_products(n, by)

    df = product_popularity

    if personalized:
        top_cats, top_subcats = get_customer_preferences(customer_id, superstore_data)
        if not top_cats or not top_subcats:
            print(f"No purchase history for customer '{customer_id}'.")
            return get_global_popular_products(n, by)
        df = df[(df['Category'].isin(top_cats)) | (df['Sub-Category'].isin(top_subcats))]

    unseen, purchased = get_unseen_products(customer_id, superstore_data, df)
    unseen = unseen.sort_values(by=by, ascending=False)
    final = add_fallback_if_needed(unseen, purchased, product_popularity, n, by)

    return final.head(n)[['Product ID', 'Product Name', 'Category', 'Sub-Category', by]]

def recommend_content_based(customer_id, top_n=5):
    """
    Recommends products similar to the last item purchased by a customer.

    Finds the customer's most recent purchase and then uses content-based
    similarity (get_content_similar_items) to find similar items.

    Args:
        customer_id (str): The ID of the customer.
        top_n (int, optional): The number of similar products to recommend.
                               Defaults to 5.

    Returns:
        pd.DataFrame or str: A DataFrame containing the recommended products
                             (from get_content_similar_items) or a string message
                             if the customer has no purchase history.
                             (Consider changing the string return to an empty DataFrame
                             for consistency).
    """
    customer_purchases = superstore_data[superstore_data['Customer ID'] == customer_id]
    if customer_purchases.empty:
          print(f"No purchase history for customer '{customer_id}'.")
          return pd.DataFrame() # Or an empty list

    # Get last product bought
    last_purchase = customer_purchases.sort_values('Order Date', ascending=False).iloc[0]
    last_product_id = last_purchase['Product ID']
    last_product_name = last_purchase['Product Name']

    print(f"Based on last product purchased (ID: {last_product_id}): {last_product_name}")
    return get_content_similar_items(last_product_id, top_n)

def recommend_collaborative(customer_id, top_n=5):
    """Recommends products to a customer based on collaborative filtering.

    Aggregates similarity scores from items the customer has purchased to find
    new items that are similar based on co-purchase patterns across all users.
    Excludes items already purchased by the customer.

    Args:
        customer_id (str): The ID of the customer.
        top_n (int, optional): The number of products to recommend. Defaults to 5.

    Returns:
        pd.DataFrame or str: A DataFrame containing the top_n recommended products
                             with columns ['Product ID', 'Product Name', 'Category',
                             'Sub-Category']. Returns a string message if the customer
                             has no history or suitable product data isn't found.
                             (Consider changing string returns to an empty DataFrame).
    """

    customer_data, purchased_ids = get_customer_data(customer_id, superstore_data)
    if customer_data.empty:
        print(f"No purchase history for customer '{customer_id}'.")
        return pd.DataFrame() # Or an empty list

    # If user has multiple purchases, accumulate similarity
    sim_scores = None
    for pid in purchased_ids:
        if pid not in product_similarity_df.columns:
            continue
        product_scores = product_similarity_df[pid]
        sim_scores = product_scores if sim_scores is None else sim_scores + product_scores

    if sim_scores is None:
        print(f"No valid products found for similarity for customer '{customer_id}'.")
        return pd.DataFrame() # Or an empty list

    # Normalize if multiple products
    sim_scores = sim_scores / len(purchased_ids)

    # Remove already purchased products
    sim_scores = sim_scores.drop(labels=purchased_ids, errors='ignore')

    # Top N similar products
    top_ids = sim_scores.sort_values(ascending=False).head(top_n).index

    return product_popularity[product_popularity['Product ID'].isin(top_ids)][[
        'Product ID', 'Product Name', 'Category', 'Sub-Category'
    ]]

def recommend_hybrid(customer_id, top_n=5, w_content=0.4, w_collab=0.4, w_pop=0.2):
    """
    Recommends products using a hybrid approach combining content similarity,
    collaborative similarity, and global popularity.
    """
    customer_data, purchased_ids = get_customer_data(customer_id, superstore_data)
    if customer_data.empty:
        print(f"No purchase history found for customer '{customer_id}'.")
        return pd.DataFrame()

    # --- 1. Calculate Average Content Similarity Scores ---
    purchased_idxs_content = [product_indices[pid] for pid in purchased_ids if pid in product_indices]
    if not purchased_idxs_content:
        print(f"No purchased products for customer '{customer_id}' found in content product index.")
        # Could potentially proceed without content score or return empty
        avg_content_sim_scores = np.zeros(len(products)) # Assign zero score if no history match
    else:
        # Average similarity to user's purchase history
        valid_idxs = [idx for idx in purchased_idxs_content if idx < cosine_sim.shape[0]]
        if not valid_idxs:
            print(f"No valid content-based product indices for customer '{customer_id}'.")
            avg_content_sim_scores = np.zeros(len(products))
        else:
            avg_content_sim_scores = sum(cosine_sim[idx] for idx in valid_idxs) / len(valid_idxs)

    content_df = pd.DataFrame({
        'Product ID': products['Product ID'], # Use Product ID from the 'products' DataFrame
        'content_score': avg_content_sim_scores
    })

    # --- 2. Calculate Average Collaborative Similarity Scores ---
    sim_scores_collab = None
    valid_purchased_ids_count = 0
    for pid in purchased_ids:
        if pid not in product_similarity_df.columns:
            continue
        product_scores = product_similarity_df[pid]
        sim_scores_collab = product_scores if sim_scores_collab is None else sim_scores_collab + product_scores
        valid_purchased_ids_count += 1

    if sim_scores_collab is None:
        print(f"No valid products found for collaborative similarity for customer '{customer_id}'.")
         # Assign zero score if no history match in collaborative matrix
        collab_df = pd.DataFrame({'Product ID': product_similarity_df.columns, 'collab_score': 0.0})
    else:
        avg_collab_sim_scores = sim_scores_collab / valid_purchased_ids_count
        collab_df = avg_collab_sim_scores.reset_index()
        collab_df.columns = ['Product ID', 'collab_score']

    # --- 3. Combine Scores ---
    # Start with all products and their popularity
    combined_df = product_popularity[['Product ID', 'Product Name', 'Category', 'Sub-Category', 'popularity_score']].copy()

    # Merge content scores
    combined_df = pd.merge(combined_df, content_df, on='Product ID', how='left')
    combined_df['content_score'] = combined_df['content_score'].fillna(0) # Handle products not in content matrix (if any)

    # Merge collaborative scores
    combined_df = pd.merge(combined_df, collab_df, on='Product ID', how='left')
    combined_df['collab_score'] = combined_df['collab_score'].fillna(0) # Handle products not in collab matrix

    # Filter out already purchased items
    combined_df = combined_df[~combined_df['Product ID'].isin(purchased_ids)]

    # Calculate final hybrid score
    combined_df['final_score'] = (
        w_content * combined_df['content_score'] +
        w_collab * combined_df['collab_score'] +
        w_pop * combined_df['popularity_score']
    )

    # Get top N recommendations
    final_recommendations = combined_df.sort_values(by='final_score', ascending=False).head(top_n)

    return final_recommendations[['Product ID', 'Product Name', 'Category', 'Sub-Category', 'final_score']]

=== 5. Example Usage ===

In [17]:
if top_customer_id:
     customer_example_id = top_customer_id
else:
     print("WARN: Using default customer ID as top customer wasn't found.")
     customer_example_id = 'CG-12520' # Fallback to original if needed

num_recommendations = 5

customer_history_context, purchased_ids_context = get_customer_data(customer_example_id, superstore_data)

if customer_history_context.empty:
    print(f"\nWARN: No purchase history found for example customer '{customer_example_id}'. Cannot show context.")
    # Depending on your functions, recommendations might still work (e.g., global popular) or return empty.
else:
    print(f"\nCustomer Purchase Context (from full history):")
    print(f"  Total Unique Products Purchased: {len(purchased_ids_context)}")

    # --- Last Purchase ---
    # Sort by date to find the most recent purchase in the full history
    customer_history_context_sorted = customer_history_context.sort_values('Order Date', ascending=False)
    last_purchase = customer_history_context_sorted.iloc[0]
    print(f"  Last Purchase (on {last_purchase['Order Date'].date()}):")
    print(f"    - Product: '{last_purchase['Product Name']}' (ID: {last_purchase['Product ID']})")
    # Note: recommend_content_based uses this logic internally. Printing it here gives context.

    # --- Most Frequent Purchases (e.g., Top 3) ---
    print(f"  Most Frequent Purchases:")
    # Count occurrences of each product ID in the history
    freq_counts = customer_history_context['Product ID'].value_counts()
    # Get names for the top IDs
    top_freq_ids = freq_counts.head(3).index.tolist()
    # Look up names (using product_popularity which has unique IDs and names)
    top_freq_products = product_popularity[product_popularity['Product ID'].isin(top_freq_ids)][['Product ID', 'Product Name']]
    # Merge to show count (optional, simpler to just list names)
    for pid in top_freq_ids:
         name = top_freq_products[top_freq_products['Product ID'] == pid]['Product Name'].iloc[0]
         count = freq_counts[pid]
         print(f"    - '{name}' ({count} times)")


    # --- Top Categories/Sub-Categories ---
    # Use the helper function to get preferences from the full history
    top_cats, top_subcats = get_customer_preferences(customer_example_id, superstore_data)
    print(f"  Top Categories (by purchase frequency): {top_cats[:3]}") # Show top 3
    print(f"  Top Sub-Categories (by purchase frequency): {top_subcats[:3]}") # Show top 3


Customer Purchase Context (from full history):
  Total Unique Products Purchased: 36
  Last Purchase (on 2017-12-10):
    - Product: 'Contract Clock, 14", Brown' (ID: FUR-FU-10001475)
  Most Frequent Purchases:
    - 'Fellowes 8 Outlet Superior Workstation Surge Protector' (2 times)
    - 'Fellowes PB200 Plastic Comb Binding Machine' (1 times)
    - 'Motorla HX550 Universal Bluetooth Headset' (1 times)
  Top Categories (by purchase frequency): ['Office Supplies', 'Technology', 'Furniture']
  Top Sub-Categories (by purchase frequency): ['Binders', 'Phones', 'Furnishings']


In [18]:
# --- Display Recommendations ---
print("\n--- Generating Recommendations ---")

print(f"\nTop {num_recommendations} Popular Products (Unseen by Customer):")
print(recommend_popular(customer_id=customer_example_id, personalized=False, n=num_recommendations))

print(f"\nTop {num_recommendations} Content-Based Recommendations (Based on Last Purchase):")
print(recommend_content_based(customer_example_id, top_n=num_recommendations))

print(f"\nTop {num_recommendations} Collaborative Recommendations (Based on Customer History):")
print(recommend_collaborative(customer_example_id, top_n=num_recommendations))

print(f"\nTop {num_recommendations} Advanced Hybrid Recommendations (Content+Collab+Pop):")
print(recommend_hybrid(customer_example_id, top_n=num_recommendations))


--- Generating Recommendations ---

Top 5 Popular Products (Unseen by Customer):
           Product ID                                       Product Name  \
1569  TEC-AC-10003832                 Logitech P710e Mobile Speakerphone   
1144  OFF-PA-10001970                                         Xerox 1881   
694   OFF-BI-10001524  GBC Premium Transparent Covers with Diagonal L...   
721   OFF-BI-10002026                            Avery Arch Ring Binders   
93    FUR-CH-10002647         Situations Contoured Folding Chairs, 4/Set   

             Category Sub-Category  Quantity  
1569       Technology  Accessories        75  
1144  Office Supplies        Paper        70  
694   Office Supplies      Binders        67  
721   Office Supplies      Binders        64  
93          Furniture       Chairs        64  

Top 5 Content-Based Recommendations (Based on Last Purchase):
Based on last product purchased (ID: FUR-FU-10001475): Contract Clock, 14", Brown
                  Product Name    

=== 6. Evaluation ===