In [10]:
import re
import string
from Levenshtein import ratio
import numpy as np

# Normalize function
def normalize(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub('\s+', ' ', text).strip()
    return text

In [11]:
import pickle

def get_test_item_by_userid(pkl_file_path, user_id):
    # Load the retrieved.pkl data
    with open(pkl_file_path, 'rb') as f:
        retrieved_data = pickle.load(f)
    
    # Extract the relevant dictionaries
    test = retrieved_data['test']
    meta = retrieved_data['meta']
    umap = retrieved_data['umap']
    
    # Check if the user_id exists in umap
    if user_id not in umap:
        print(f"User ID {user_id} not found in umap.")
        return None
    
    # Get the user index
    user_index = umap[user_id]
    
    # Get the test item ID(s) for the user
    test_item_ids = test.get(user_index)
    if not test_item_ids:
        print(f"No test items found for user index {user_index}.")
        return None
    
    # Get the product name(s) from meta
    product_names = []
    for test_item_id in test_item_ids:
        product_name = meta.get(test_item_id)
        if product_name:
            product_names.append(product_name)
        else:
            print(f"Product ID {test_item_id} not found in meta.")
    
    return product_names


In [12]:
# Path to your retrieved.pkl file
pkl_file_path = 'data/dataset.pkl'  # Update this path

# The user ID you want to look up
user_id = 'AFSKPY37N3C43SOI5IEXEK5JSIYA'  # Replace with your user ID

# Get the test product names
test_product_names = get_test_item_by_userid(pkl_file_path, user_id)

if test_product_names:
    print(f"Test product(s) for user {user_id}:")
    for product_name in test_product_names:
        print(product_name)
else:
    print("Test product not found.")


Test product(s) for user AFSKPY37N3C43SOI5IEXEK5JSIYA:
NIRA Skincare Laser & Serum Bundle - Includes Anti-Aging Laser & Hyaluronic Acid Serum - Reduces Appearance of Fine Lines & Wrinkles - FDA Cleared


In [20]:
# Get test product names
test_product_names = get_test_item_by_userid(pkl_file_path, user_id)

if not test_product_names:
    raise ValueError(f"Test product not found for user {user_id}")

test_product_names


['NIRA Skincare Laser & Serum Bundle - Includes Anti-Aging Laser & Hyaluronic Acid Serum - Reduces Appearance of Fine Lines & Wrinkles - FDA Cleared']

In [15]:
# Normalize function (as defined previously)
def normalize(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub('\s+', ' ', text).strip()
    return text

# Normalize test product names
normalized_test_products = [normalize(name) for name in test_product_names]

# Similarity function
def compute_similarity(str1, str2):
    return ratio(str1, str2) * 100  # Convert to percentage


In [16]:
normalized_test_products = [normalize(name) for name in test_product_names]

In [23]:
recommended_products = ["Fifth & Skin Better'n Ur Skin Mineral Bronzer (TOUCH OF SUN) - Natural Face Bronzer - Gluten Free Tan - Cruelty Free Makeup - Vegan - Paraben Free", 'Keratin Secrets Keratin Infused Shampoo and Conditioner Duo 12 Ounce', 'Bath and Body Works 2 Pack Aromatherapy Stress Relief Sage and Cedarwood Body Wash & Foam Bath 10 Oz.', 'artnaturals Natural Face Makeup Primer - 1 Fl Oz Hydrating Facial Foundation - Long Lasting for Wrinkles, Oily Skin, and Pore Size - Rejuvenating Formula - Aloe Vera, Coconut Oil and Safflower Oil', 'Keratin Cure Brazilian Bio Chocolate Professional Complex Coconut Argan Oil Aminos Blow Out Hair Treatment Formaldehyde Free (160ml/ 5 fl oz) tratamiento keratina', 'Bath and Body Works - Aromatherapy - Promotes Stress Relief- Cedarwood & Sage- Body Wash & Foam Bath & Body Cream – Bundle', 'Organic Self Tanner for Face and Body - Streak Free Sunless Tanner - Self Tanning Lotion with Manuka Honey and Aloe Vera - Fake Tan for Fair to Medium Skin - Indoor Tanning Lotion with Bronzer (8oz)', 'Keragen - Volumizing Shampoo and Conditioner for Fine Hair with Keratin and Collagen, Sulfate Free', 'Flower Power Wellness Bath, Organic Bath Tea, Aromatherapy Bath with Essential Oils, Relaxing Bathing Salts', 'Ownest Matte Oil Control Concealer Foundation Cream, Broad Spectrum SPF 25, Long Lasting Waterproof Flawless Matte Sunscreen Liquid Foundation-104A Natural Beige']

In [26]:
normalized_recommended_products = [normalize(name) for name in recommended_products]
normalized_recommended_products

['fifth skin bettern ur skin mineral bronzer touch of sun natural face bronzer gluten free tan cruelty free makeup vegan paraben free',
 'keratin secrets keratin infused shampoo and conditioner duo 12 ounce',
 'bath and body works 2 pack aromatherapy stress relief sage and cedarwood body wash foam bath 10 oz',
 'artnaturals natural face makeup primer 1 fl oz hydrating facial foundation long lasting for wrinkles oily skin and pore size rejuvenating formula aloe vera coconut oil and safflower oil',
 'keratin cure brazilian bio chocolate professional complex coconut argan oil aminos blow out hair treatment formaldehyde free 160ml 5 fl oz tratamiento keratina',
 'bath and body works aromatherapy promotes stress relief cedarwood sage body wash foam bath body cream – bundle',
 'organic self tanner for face and body streak free sunless tanner self tanning lotion with manuka honey and aloe vera fake tan for fair to medium skin indoor tanning lotion with bronzer 8oz',
 'keragen volumizing shamp

In [27]:
SIMILARITY_THRESHOLD = 90.0
similarity_scores = []
matches = []
for rec_product in normalized_recommended_products:
    # Compute max similarity across all test products
    sim_scores = [compute_similarity(rec_product, test_product) for test_product in normalized_test_products]
    max_sim_score = max(sim_scores)
    similarity_scores.append(max_sim_score)
    matches.append(max_sim_score >= SIMILARITY_THRESHOLD)

# Evaluate Recall@K and NDCG@K
def recall_at_k(matches, k):
    relevant_in_top_k = matches[:k].count(True)
    total_relevant = 1  # Assuming one relevant item per user
    recall = relevant_in_top_k / total_relevant
    return recall

def ndcg_at_k(matches, k):
    relevance_scores = [1 if match else 0 for match in matches[:k]]
    relevance_scores = np.array(relevance_scores)

    gains = 2 ** relevance_scores - 1
    discounts = np.log2(np.arange(2, relevance_scores.size + 2))
    dcg = np.sum(gains / discounts)

    # Ensure ideal_relevance is a NumPy array
    ideal_relevance = np.sort(relevance_scores)[::-1]  # Sort in descending order
    # Now ideal_relevance is a NumPy array, and element-wise operations can be performed
    ideal_gains = 2 ** ideal_relevance - 1
    idcg = np.sum(ideal_gains / discounts)

    ndcg = dcg / idcg if idcg > 0 else 0.0
    return ndcg


K = 10
recall = recall_at_k(matches, K)
ndcg = ndcg_at_k(matches, K)

print(f"Recall@{K}: {recall}")
print(f"NDCG@{K}: {ndcg}")

# Optionally, print similarity scores and matches
for i, (score, match) in enumerate(zip(similarity_scores, matches)):
    print(f"Product {i+1}: Similarity Score = {score:.2f}% - {'Match' if match else 'No Match'}")

Recall@10: 0.0
NDCG@10: 0.0
Product 1: Similarity Score = 42.42% - No Match
Product 2: Similarity Score = 37.81% - No Match
Product 3: Similarity Score = 34.63% - No Match
Product 4: Similarity Score = 41.51% - No Match
Product 5: Similarity Score = 40.41% - No Match
Product 6: Similarity Score = 34.57% - No Match
Product 7: Similarity Score = 41.12% - No Match
Product 8: Similarity Score = 41.23% - No Match
Product 9: Similarity Score = 37.97% - No Match
Product 10: Similarity Score = 39.18% - No Match
