https://amazon-reviews-2023.github.io/

In [1]:
import os
import sys
import itertools

import scipy.sparse as sp
import numpy as np
import pandas as pd

import cornac
from cornac.eval_methods import RatioSplit, BaseMethod, StratifiedSplit
from cornac.metrics import FMeasure, RMSE, NCRR, NDCG, Precision, Recall
from cornac.models import BaselineOnly, MF, NMF, BPR, WMF, EASE

import optuna

from datetime import datetime
import random

import matplotlib.pyplot as plt
import pickle
import builtins

print(f"System version: {sys.version}")
print(f"Cornac version: {cornac.__version__}")

SEED = 42
VERBOSE = False
TOP_X = 50

  from .autonotebook import tqdm as notebook_tqdm


System version: 3.12.11 (main, Aug 12 2025, 22:46:48) [GCC 14.2.0]
Cornac version: 2.3.3


In [3]:
raw_data = pd.read_parquet('data/5_category_final_table_(767,684 rows).parquet')

In [4]:
full_df = raw_data.copy()

In [5]:
print(len(full_df['parent_asin'].unique()))

584


# Data Analysis

In [6]:
full_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 767684 entries, 0 to 767683
Data columns (total 21 columns):
 #   Column                  Non-Null Count   Dtype              
---  ------                  --------------   -----              
 0   rating                  767684 non-null  int32              
 1   title_review            767623 non-null  object             
 2   text                    767277 non-null  object             
 3   parent_asin             767684 non-null  object             
 4   user_id                 767684 non-null  object             
 5   helpful_vote            767684 non-null  int64              
 6   title_meta              767684 non-null  object             
 7   price                   675350 non-null  float64            
 8   store                   760314 non-null  object             
 9   features_clean          767684 non-null  object             
 10  description_clean       767684 non-null  object             
 11  timestamp_utc           76

In [7]:
full_df.head()

Unnamed: 0,rating,title_review,text,parent_asin,user_id,helpful_vote,title_meta,price,store,features_clean,...,timestamp_utc,category_depth,verified_purchase_flag,cat_1,cat_2,cat_3,cat_4,price_log,helpful_vote_clipped,avg_rating_parent
0,1,only 1 worked correctly,i ordered this set right before my camping tri...,B09LW2KHPM,AGO25X54UAIXSKHOAH4SG2VUDUNA,1,"vont led camping lantern, led lanterns, suitab...",14.38,vont,bright & lasting - equipped with 30 crazy brig...,...,2020-11-11 07:10:55.679000+00:00,6,1,outdoor recreation,camping & hiking,lights & lanterns,lanterns,2.733068,1,4.7
1,5,lightweight and quiet,this bike is nice and simple. easy to build an...,B0BTDLFXSL,AHGGRLIBAWVH44XAY2QHQOE7S6OQ,0,ativafit exercise bike foldable fitness indoor...,139.99,ativafit,digital monitor shows your data - monitor your...,...,2020-05-17 22:40:46.636000+00:00,4,1,exercise & fitness,cardio training,exercise bikes,exercise bikes,4.948689,0,4.3
2,5,great little tent,"i love the alps tent. it is light, super easy...",B09W4W9JB5,AGOSUQAEYTC5A6YDIJ7X5ZFAOSIQ,0,alps mountaineering lynx 1-person backpacking ...,109.99,alps mountaineering,100% polyester imported great backpacking tent,...,2019-09-01 01:46:16.061000+00:00,5,0,outdoor recreation,camping & hiking,tents & shelters,tents,4.70944,0,4.7
3,5,very cute,"bought this for my son’s birthday, super cute ...",B07Z6Y87XH,AEGEQO4R6APZMUYEYT2VGJ3BKKAQ,0,raskullz mohawk toddler 3+ and child 5+ helmets,35.49,raskullz,design - kids love the red bendable 3d rubber ...,...,2020-11-16 19:07:19.645000+00:00,5,1,sports,cycling,kids' bikes & accessories,kids' helmets,3.597038,0,4.7
4,5,my pooches love it!,we purchased this two person tent for our dogs...,B09HRDWXCK,AEDAZ44DKSCBHMY4VOCU6Z7DWM7A,3,2-person camping tent – includes rain fly and ...,45.94,wakeman,100% polyester imported 2 person tent – this l...,...,2016-02-24 17:58:45+00:00,5,1,outdoor recreation,camping & hiking,tents & shelters,tents,3.84887,3,3.6


In [8]:
print(f"Count of unique user id in train: {len(full_df['user_id'].unique())}")
print(f"Count of unique item id in train: {len(full_df['parent_asin'].unique())}")

Count of unique user id in train: 649151
Count of unique item id in train: 584


In [9]:
pd.crosstab(full_df['rating'], full_df['verified_purchase_flag'])

verified_purchase_flag,0,1
rating,Unnamed: 1_level_1,Unnamed: 2_level_1
1,5748,50862
2,3097,29952
3,3376,45165
4,6175,94818
5,27083,501408


In [10]:
pd.crosstab(full_df['rating'], full_df['helpful_vote_clipped'])

helpful_vote_clipped,0,1,2,3,4,5
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,30523,11730,4751,2402,1480,5724
2,23067,5813,1682,732,440,1315
3,35453,7370,2197,1025,568,1928
4,79757,11895,3417,1504,886,3534
5,449449,47833,11926,4990,2493,11800


In [11]:
for t in [5.0, 4.0, 3.0, 2.0, 1.0]:
    pos_users = full_df[full_df['rating'] >= t]['user_id'].nunique()
    print(f"Threshold >= {t}: {pos_users} users with at least 1 relevant item")

Threshold >= 5.0: 462064 users with at least 1 relevant item
Threshold >= 4.0: 540231 users with at least 1 relevant item
Threshold >= 3.0: 577261 users with at least 1 relevant item
Threshold >= 2.0: 603297 users with at least 1 relevant item
Threshold >= 1.0: 649151 users with at least 1 relevant item


In [12]:
# Number of non-zero entries (observed interactions)
num_interactions = len(full_df['rating'])

# Total possible entries
total_entries = len(full_df['user_id'].unique()) * len(full_df['parent_asin'].unique())

# Sparsity
sparsity = 1 - (num_interactions / total_entries)

print(f"Sparsity: {sparsity:.4f} ({sparsity*100:.2f}%)")

Sparsity: 0.9980 (99.80%)


# Create Item Dataset

In [13]:
# Restore a Item dateset
items_df = full_df[['parent_asin', 'cat_2', 'cat_4']].drop_duplicates().reset_index(drop=True)
items_df.head()

Unnamed: 0,parent_asin,cat_2,cat_4
0,B09LW2KHPM,camping & hiking,lanterns
1,B0BTDLFXSL,cardio training,exercise bikes
2,B09W4W9JB5,camping & hiking,tents
3,B07Z6Y87XH,cycling,kids' helmets
4,B09HRDWXCK,camping & hiking,tents


# Create Interacted Dataset

In [14]:
# Create a Interacted Dataset
interacted_df = full_df[['user_id', 'parent_asin','cat_2', 'cat_4']]
interacted_df.head()

Unnamed: 0,user_id,parent_asin,cat_2,cat_4
0,AGO25X54UAIXSKHOAH4SG2VUDUNA,B09LW2KHPM,camping & hiking,lanterns
1,AHGGRLIBAWVH44XAY2QHQOE7S6OQ,B0BTDLFXSL,cardio training,exercise bikes
2,AGOSUQAEYTC5A6YDIJ7X5ZFAOSIQ,B09W4W9JB5,camping & hiking,tents
3,AEGEQO4R6APZMUYEYT2VGJ3BKKAQ,B07Z6Y87XH,cycling,kids' helmets
4,AEDAZ44DKSCBHMY4VOCU6Z7DWM7A,B09HRDWXCK,camping & hiking,tents


# Create UIR Dataset

In [15]:
uir_dataset = cornac.data.Dataset.from_uir(
    list(full_df[['user_id', 'parent_asin', 'rating']].itertuples(index=False, name=None))
)
R = uir_dataset.matrix.toarray()
R_mask = (R > 0).astype(float)
pd.DataFrame(
  data=R,
  index=[f"User {uir_dataset.user_ids[u]}" for u in np.arange(uir_dataset.num_users)],
  columns=[f"Item {uir_dataset.item_ids[i]}" for i in np.arange(uir_dataset.num_items)]
).replace(0, np.nan)



Unnamed: 0,Item B09LW2KHPM,Item B0BTDLFXSL,Item B09W4W9JB5,Item B07Z6Y87XH,Item B09HRDWXCK,Item B000KC009Y,Item B0C6H9K5NF,Item B00INQVYZ8,Item B078FHD1ZH,Item B0855B4QZR,...,Item B0C6KP8SRQ,Item B0B829WR28,Item B0C1BD44L9,Item B0864RK56W,Item B07RHPQ3FW,Item B0917VV12K,Item B0B4M6Z2GH,Item B00H90PFOK,Item B07BM1RK7K,Item B077KGFWJ9
User AGO25X54UAIXSKHOAH4SG2VUDUNA,1.0,,,,,,,,,,...,,,,,,,,,,
User AHGGRLIBAWVH44XAY2QHQOE7S6OQ,,5.0,,,,,,,,,...,,,,,,,,,,
User AGOSUQAEYTC5A6YDIJ7X5ZFAOSIQ,,,5.0,,,,,,,,...,,,,,,,,,,
User AEGEQO4R6APZMUYEYT2VGJ3BKKAQ,,,,5.0,,,,,,,...,,,,,,,,,,
User AEDAZ44DKSCBHMY4VOCU6Z7DWM7A,,,,,5.0,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
User AGJHIOTHTLUFIWCJUSNIW35RMYRQ,,,,,,,,,,,...,,,,,,,,,,
User AE4TJGRYAKCRX7BHLJGWUUP6ZQPQ,,,,,,,,,,,...,,,,,,,,,,
User AEYUKKCRDBX4SCDNEBVYQQ2TORRA,,,,,,,,,,,...,,,,,,,,,,
User AESGZS7KSDLXD6OXRMLICJWVE57Q,,,,,,,,,,,...,,,,,,,,,,


# Helper Functions

In [16]:
def convert_df_to_cornac_format(df, model=None):
    """
    Converts a Pandas DataFrame to Cornac-compatible (user, item, rating/confidence) triplets
    based on the model type.

    Parameters:
        df (pd.DataFrame): Must contain 'user_id', 'parent_asin', and relevant columns
        model (str): One of 'BPR', 'WMF', 'MF', or 'EASE'

    Returns:
        List of (user, item, score) triplets
    """
    df = df.copy()

    if model in {"BPR", "EASE"}:
        # Binary implicit feedback (1 if purchased or helped)
        df['interaction'] = ((df['verified_purchase_flag'] == 1) | (df['helpful_vote_clipped'] > 0)).astype(np.float32)
        r = 'interaction'

    elif model == "WMF":
        # Confidence score (clipped helpful votes)
        df['confidence'] = df['helpful_vote_clipped'].fillna(0).astype(np.float32)
        r = 'confidence'

    elif model == "MF":
        # Explicit feedback model
        if "rating" not in df.columns:
            raise ValueError("DataFrame must contain a 'rating' column for MF.")
        r = "rating"

    else:
        raise ValueError(f"Unsupported model type: {model}")

    return list(zip(
        df['user_id'].astype(str).values,
        df['parent_asin'].astype(str).values,
        df[r].astype(np.float32).values
    ))


In [17]:
# --- Shared setup ---
def run_experiment(model, model_name, full_data, rating_threshold=4.0):
    ev = RatioSplit(
    data=full_data,
    test_size=0.2,
    rating_threshold=rating_threshold,
    exclude_unknowns=True,
    seed=SEED,
    verbose=VERBOSE
    )

    metrics = [Recall(k=50), FMeasure(k=50), NCRR(k=50), NDCG(k=50)]
    exp = cornac.Experiment(eval_method=ev, models=[model], metrics=metrics, save_dir=None, verbose=False)

    # UTF-8 patch to fix UnicodeEncodeError (for EASE)
    def safe_utf8_open(*args, **kwargs):
        if len(args) >= 2 and isinstance(args[1], str) and 'b' in args[1]:
            # Binary mode
            return open_backup(*args, **kwargs)
        # Text mode – enforce UTF-8 encoding
        kwargs.setdefault('encoding', 'utf-8')
        return open_backup(*args, **kwargs)
            
    open_backup = builtins.open
    exp.run()
    builtins.open = safe_utf8_open  # Restore open()
    return exp.result[0].metric_avg_results, exp.result[0]

# --- MF tuning only ---
def objective_mf(trial):
    k = trial.suggest_int("k", 10, 200)
    learning_rate = trial.suggest_float("learning_rate", 0.001, 0.05, log=True)
    lambda_reg = trial.suggest_float("lambda_reg", 0.001, 0.1, log=True)
    max_iter = 100

    model = MF(
        k=k,
        max_iter=max_iter,
        learning_rate=learning_rate,
        lambda_reg=lambda_reg,
        use_bias=True,  # Optional: can set to False if you want unbiased MF
        seed=SEED,
        verbose=VERBOSE
    )

    metrics, result = run_experiment(
        model=model,
        model_name="MF",
        full_data=rating_data,            # assumed explicit ratings (e.g., 1–5)
        rating_threshold=4.0
    )

    trial.set_user_attr("result", result)
    return metrics["F1@50"]

# --- BPR tuning only ---
def objective_bpr(trial):
    k = trial.suggest_int("k", 100, 200)
    lr = trial.suggest_float("learning_rate", 0.001, 0.05, log=True)
    lambda_reg = trial.suggest_float("lambda_reg", 0.001, 0.1, log=True)
    max_iter = 500

    model = BPR(k=k, max_iter=max_iter, learning_rate=lr,
                lambda_reg=lambda_reg, seed=SEED, verbose=VERBOSE)
    metrics, result = run_experiment(model, "BPR", interaction_data, rating_threshold=0.5)

    trial.set_user_attr("result", result)
    return metrics["F1@50"]

# --- WMF tuning only ---
def objective_wmf(trial):
    k = trial.suggest_int("k", 50, 250)
    lr = trial.suggest_float("learning_rate", 0.001, 0.05, log=True)
    lambda_u = trial.suggest_float("lambda_u", 0.001, 0.1, log=True)
    lambda_v = trial.suggest_float("lambda_v", 0.001, 0.1, log=True)
    a = trial.suggest_float("a", 1.0, 10.0)
    b = trial.suggest_float("b", 0.0001, 0.01, log=True)
    max_iter = 50
    #max_iter = trial.suggest_int("max_iter", 100, 400)

    model = WMF(k=k, max_iter=max_iter, a=a, b=b, learning_rate=lr,
                lambda_u=lambda_u, lambda_v=lambda_v, seed=SEED, verbose=VERBOSE)
    metrics, result = run_experiment(model, "WMF", confidence_data, rating_threshold=0.5)
    trial.set_user_attr("result", result)
    return metrics["F1@50"]
    
# --- EASE tuning only ---
def objective_ease(trial):
    lamb = trial.suggest_float("lamb", 10.0, 1000.0, log=True)

    model = EASE(lamb=lamb, verbose=VERBOSE)
    metrics, result = run_experiment(
        model=model,
        model_name="EASE",
        full_data=interaction_data,
        rating_threshold=0.5
    )

    trial.set_user_attr("result", result)
    return metrics["F1@50"]

In [18]:
def generate_top_k_recommendations(model_class, dataset, study=None, k=10, model_path=None):
    """
    Generate top-k item recommendations for a sample of users using a trained Cornac model.
    
    Parameters:
        model_class: Cornac model class (e.g., BPR, WMF, MF, EASE)
        dataset: Cornac-format dataset
        study: Optuna study object (optional)
        k: number of top items per user
        model_path: optional path to load a pickled model

    Returns:
        pd.DataFrame: top-k item IDs per user
    """
    # === Extract best parameters ===
    best_params = study.best_params if study else {}

    # === Load model if path provided ===
    if model_path:
        with open(model_path, "rb") as f:
            model = pickle.load(f)
        print(f"Loaded model from: {model_path}")
    else:
        # === Generate model name for logging/debugging ===
        if study:
            if model_class.__name__ == "WMF":
                model_name = (
                    f"WMF(k={best_params['k']}, lr={best_params['learning_rate']:.4f}, "
                    f"lambda_u={best_params['lambda_u']:.4f}, lambda_v={best_params['lambda_v']:.4f}, "
                    f"a={best_params['a']}, b={best_params['b']:.4f}, it=50)"
                )
            elif model_class.__name__ == "BPR":
                model_name = (
                    f"BPR(k={best_params['k']}, lr={best_params['learning_rate']:.4f}, "
                    f"lambda_reg={best_params['lambda_reg']:.4f}, it=500)"
                )
            elif model_class.__name__ == "EASE":
                model_name = f"EASE(lamb={best_params['lamb']:.2f})"
            elif model_class.__name__ == "MF":
                model_name = (
                    f"MF(k={best_params['k']}, lr={best_params['learning_rate']:.4f}, "
                    f"lambda_reg={best_params['lambda_reg']:.4f}, use_bias={best_params.get('use_bias', True)})"
                )
            else:
                model_name = f"{model_class.__name__}_tuned"
        else:
            model_name = f"{model_class.__name__}_default"

        # === Instantiate and train model ===
        model = model_class(
            **best_params,
            seed=SEED,
            verbose=VERBOSE,
            name=model_name
        )
        model.fit(dataset)

    # === Extract mapping from train set ===
    train_set = model.train_set
    id2raw_iid = {v: k for k, v in train_set.iid_map.items()}
    id2raw_uid = {v: k for k, v in train_set.uid_map.items()}
    user_indices = sorted(range(train_set.num_users), key=lambda x: id2raw_uid[x])
    sampled_user_indices = random.sample(user_indices, k=100)

    # === Generate top-k items per user ===
    all_top_k = []
    for uid in sampled_user_indices:
        scores = np.asarray(model.score(user_idx=uid)).flatten()
        top_iids = np.argsort(-scores)[:k]
        raw_iids = [id2raw_iid[i] for i in top_iids]
        all_top_k.append(raw_iids)

    # === Return as DataFrame ===
    df_topk = pd.DataFrame(all_top_k, index=[id2raw_uid[uid] for uid in sampled_user_indices])
    df_topk.index.name = "user_id"

    return df_topk


In [19]:
def annotate_recommendations_with_category(df_topk, items_df):
    """
    Converts wide-format top-k recommendation DataFrame into long format and 
    joins with item metadata (cat_2, cat_4).

    Parameters:
        df_topk (pd.DataFrame): Top-k recommendations (users as index, items as columns).
        items_df (pd.DataFrame): Item metadata with columns ['parent_asin', 'cat_2', 'cat_4'].

    Returns:
        pd.DataFrame: Long-format DataFrame with user_id, rank (1-based), parent_asin, cat_2, cat_4.
    """
    # Flatten to long format
    df_long = df_topk.reset_index().melt(id_vars='user_id', var_name='rank', value_name='parent_asin')
    
    # Merge with metadata
    df_merged = df_long.merge(items_df[['parent_asin', 'cat_2', 'cat_4']], on='parent_asin', how='left')
    
    # Convert rank to 1-based integer
    df_merged['rank'] = df_merged['rank'].astype(int) + 1
    
    # Sort result
    df_merged = df_merged.sort_values(by=['user_id', 'rank'])

    return df_merged


In [20]:
import pandas as pd

def compute_cat2_diff_cat4_rate(interacted_df, recommendation_df):
    """
    Computes the rate of recommendations that match the user's rated `cat_2`
    but have different `cat_4`.

    Parameters:
        interacted_df (pd.DataFrame): User-item interactions with columns:
            ['user_id', 'parent_asin', 'cat_2', 'cat_4']
        recommendation_df (pd.DataFrame): Recommendations with:
            ['user_id', 'parent_asin', 'cat_2', 'cat_4']

    Returns:
        overall_rate (float): Fraction of recommended items that are in the same
                              `cat_2` but not in the same `cat_4` as the rated ones.
        labeled_recommendations (pd.DataFrame): recommendation_df with an added
                                                'cat_match' column.
        user_rate (pd.Series): Per-user rate of 'same_cat2_diff_cat4' items.
    """
    # Build per-user sets of rated categories
    rated_cat2 = interacted_df.groupby('user_id')['cat_2'].apply(set)
    rated_cat4 = interacted_df.groupby('user_id')['cat_4'].apply(set)

    # Helper to assign match label
    def label_category_match(row):
        user = row['user_id']
        c2 = row['cat_2']
        c4 = row['cat_4']
        cat2_set = rated_cat2.get(user, set())
        cat4_set = rated_cat4.get(user, set())
        if c4 in cat4_set:
            return 'same_cat4'
        elif c2 in cat2_set:
            return 'same_cat2_diff_cat4'
        else:
            return 'diff_cat2'

    # Apply labeling
    recommendation_df = recommendation_df.copy()
    recommendation_df['cat_match'] = recommendation_df.apply(label_category_match, axis=1)

    # Compute overall rate
    overall_rate = (recommendation_df['cat_match'] == 'same_cat2_diff_cat4').mean()

    # Per-user rate
    user_rate = (
        recommendation_df.groupby('user_id')['cat_match']
        .apply(lambda x: (x == 'same_cat2_diff_cat4').mean())
    )

    return overall_rate, recommendation_df, user_rate


# Create Interaction, Confidence Data ad Rating Data for Models

In [21]:
interaction_data = convert_df_to_cornac_format(full_df, model="BPR")
confidence_data = convert_df_to_cornac_format(full_df, model="WMF")
rating_data = convert_df_to_cornac_format(full_df, model="MF")


In [22]:
N = 15
now = datetime.now().strftime("%Y%m%d_%H%M%S")

# MF

In [23]:
# Run MF optimization
study_mf = optuna.create_study(direction='maximize')
study_mf.optimize(objective_mf, n_trials=N)

# Save best model
best_trial = study_mf.best_trial
if "result" in best_trial.user_attrs:
    best_model = best_trial.user_attrs["result"]

    # Ensure train_set is included
    if not hasattr(best_model, "train_set"):
        print("Warning: No train_set found in best_model.")
    else:
        best_model.train_set = best_model.train_set  # explicitly attach

    with open("mf_model_with_trainset.pkl", "wb") as f:
        pickle.dump(best_model, f)

    print("Best MF model saved to mf_model_with_trainset.pkl")

# Collect all trial results
mf_rows = []
for trial in study_mf.trials:
    if trial.value is None:
        continue
    trial_data = {
        "Trial": trial.number,
        "F1@50": trial.value,
        "Model": "MF",
        **trial.params
    }

    # Add additional metrics from trial's result (if present)
    if "result" in trial.user_attrs:
        for metric, value in trial.user_attrs["result"].metric_avg_results.items():
            trial_data[metric] = value

    mf_rows.append(trial_data)

# Convert to DataFrame and export
mf_results_df = pd.DataFrame(mf_rows)


[I 2025-09-01 05:06:54,566] A new study created in memory with name: no-name-f9dcd903-2b36-4a1d-b2bc-8f57cd9ac4e5



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0043 |  0.0154 |  0.0318 |    0.1010 |    8.9022 | 248.6463



[I 2025-09-01 05:11:15,029] Trial 0 finished with value: 0.00426875421451207 and parameters: {'k': 148, 'learning_rate': 0.005317387018458959, 'lambda_reg': 0.0014444281739516911}. Best is trial 0 with value: 0.00426875421451207.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0039 |  0.0117 |  0.0268 |    0.0911 |    4.5025 | 258.7457



[I 2025-09-01 05:15:40,904] Trial 1 finished with value: 0.0038549419196019113 and parameters: {'k': 62, 'learning_rate': 0.03295472790861735, 'lambda_reg': 0.07826231853587734}. Best is trial 0 with value: 0.00426875421451207.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0043 |  0.0107 |  0.0281 |    0.1024 |    1.3328 | 259.5199



[I 2025-09-01 05:20:04,562] Trial 2 finished with value: 0.004305434577118263 and parameters: {'k': 16, 'learning_rate': 0.03472144120387885, 'lambda_reg': 0.0019215357741660165}. Best is trial 2 with value: 0.004305434577118263.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0037 |  0.0089 |  0.0238 |    0.0884 |    7.6075 | 258.2441



[I 2025-09-01 05:24:33,196] Trial 3 finished with value: 0.0037199721878105997 and parameters: {'k': 126, 'learning_rate': 0.001182954144120544, 'lambda_reg': 0.03371747652495995}. Best is trial 2 with value: 0.004305434577118263.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0040 |  0.0094 |  0.0255 |    0.0948 |   11.4275 | 261.1962



[I 2025-09-01 05:29:08,551] Trial 4 finished with value: 0.003994499655891023 and parameters: {'k': 195, 'learning_rate': 0.0021968666206472545, 'lambda_reg': 0.02235466783183327}. Best is trial 2 with value: 0.004305434577118263.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0044 |  0.0128 |  0.0301 |    0.1031 |    4.3080 | 260.6963



[I 2025-09-01 05:33:36,358] Trial 5 finished with value: 0.004356035290110689 and parameters: {'k': 60, 'learning_rate': 0.002059459771946506, 'lambda_reg': 0.002708211549013927}. Best is trial 5 with value: 0.004356035290110689.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0043 |  0.0124 |  0.0297 |    0.1029 |    3.9062 | 269.4601



[I 2025-09-01 05:38:12,488] Trial 6 finished with value: 0.00434749093351853 and parameters: {'k': 52, 'learning_rate': 0.010190693881518253, 'lambda_reg': 0.008426101127360064}. Best is trial 5 with value: 0.004356035290110689.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0042 |  0.0111 |  0.0281 |    0.1005 |    9.1384 | 259.9421



[I 2025-09-01 05:42:44,780] Trial 7 finished with value: 0.004228024257544062 and parameters: {'k': 138, 'learning_rate': 0.0035299185984066822, 'lambda_reg': 0.01208470545020436}. Best is trial 5 with value: 0.004356035290110689.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0042 |  0.0119 |  0.0285 |    0.1001 |    5.7122 | 287.0088



[I 2025-09-01 05:47:40,295] Trial 8 finished with value: 0.004216421119734009 and parameters: {'k': 86, 'learning_rate': 0.023212431935699142, 'lambda_reg': 0.036552857513754554}. Best is trial 5 with value: 0.004356035290110689.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0038 |  0.0113 |  0.0263 |    0.0910 |    3.8363 | 299.1894



[I 2025-09-01 05:52:46,503] Trial 9 finished with value: 0.003839715771250952 and parameters: {'k': 48, 'learning_rate': 0.010769674213284017, 'lambda_reg': 0.06141267544284003}. Best is trial 5 with value: 0.004356035290110689.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0040 |  0.0102 |  0.0262 |    0.0941 |    5.8421 | 266.5706



[I 2025-09-01 05:57:22,002] Trial 10 finished with value: 0.003972695492411738 and parameters: {'k': 93, 'learning_rate': 0.00115315791721606, 'lambda_reg': 0.0035829007633120533}. Best is trial 5 with value: 0.004356035290110689.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0042 |  0.0103 |  0.0271 |    0.0986 |    1.1698 | 268.4792



[I 2025-09-01 06:01:54,569] Trial 11 finished with value: 0.004174021581990648 and parameters: {'k': 10, 'learning_rate': 0.008257188028975548, 'lambda_reg': 0.005409351101594197}. Best is trial 5 with value: 0.004356035290110689.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0044 |  0.0129 |  0.0303 |    0.1047 |    3.8644 | 266.9285



[I 2025-09-01 06:06:28,122] Trial 12 finished with value: 0.004404088770974109 and parameters: {'k': 51, 'learning_rate': 0.014276528069064843, 'lambda_reg': 0.007263002812603859}. Best is trial 12 with value: 0.004404088770974109.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0044 |  0.0138 |  0.0311 |    0.1049 |    4.8473 | 267.1095



[I 2025-09-01 06:11:02,930] Trial 13 finished with value: 0.004423905007543019 and parameters: {'k': 70, 'learning_rate': 0.01682394225026944, 'lambda_reg': 0.002901308403754475}. Best is trial 13 with value: 0.004423905007543019.



TEST:
...
   |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
-- + ------ + ------- + ------- + --------- + --------- + --------
MF | 0.0044 |  0.0127 |  0.0300 |    0.1035 |    3.0426 | 261.3146



[I 2025-09-01 06:15:30,189] Trial 14 finished with value: 0.004382815207889754 and parameters: {'k': 34, 'learning_rate': 0.01672320821877955, 'lambda_reg': 0.005976209845509361}. Best is trial 13 with value: 0.004423905007543019.


Best MF model saved to mf_model_with_trainset.pkl


## Generate Top K Recommendations for Sampled 100 Users

In [24]:
mf_topk_df = generate_top_k_recommendations(MF, uir_dataset, study_mf, k=10)

## Final Recommendations

In [25]:
mf_recommendation_df = annotate_recommendations_with_category(mf_topk_df, items_df)

In [26]:
mf_recommendation_df.head()

Unnamed: 0,user_id,rank,parent_asin,cat_2,cat_4
29,AE2CA7ZNECJK5ECZRRRGMPGLZXKA,1,B09DX9KS81,camping & hiking,trekking poles
129,AE2CA7ZNECJK5ECZRRRGMPGLZXKA,2,B00OUGNUO8,cardio training,rowers
229,AE2CA7ZNECJK5ECZRRRGMPGLZXKA,3,B0C5WTV2XH,camping & hiking,headlamps
329,AE2CA7ZNECJK5ECZRRRGMPGLZXKA,4,B0BXHLWCR5,camping & hiking,hand & foot warmers
429,AE2CA7ZNECJK5ECZRRRGMPGLZXKA,5,B00F9F6OVK,camping & hiking,cots & hammocks


In [27]:
# Compute intra-category diversity rate for MF
mf_overall_rate, mf_labeled_recs, mf_user_rates = compute_cat2_diff_cat4_rate(interacted_df, mf_recommendation_df)

# Print overall result
print(f"Overall Same Main Cat but Diff Sub-cat Rate (MF): {mf_overall_rate:.2%}")


Overall Same Main Cat but Diff Sub-cat Rate (MF): 31.60%


In [28]:
mf_labeled_recs.head()

Unnamed: 0,user_id,rank,parent_asin,cat_2,cat_4,cat_match
29,AE2CA7ZNECJK5ECZRRRGMPGLZXKA,1,B09DX9KS81,camping & hiking,trekking poles,same_cat2_diff_cat4
129,AE2CA7ZNECJK5ECZRRRGMPGLZXKA,2,B00OUGNUO8,cardio training,rowers,diff_cat2
229,AE2CA7ZNECJK5ECZRRRGMPGLZXKA,3,B0C5WTV2XH,camping & hiking,headlamps,same_cat2_diff_cat4
329,AE2CA7ZNECJK5ECZRRRGMPGLZXKA,4,B0BXHLWCR5,camping & hiking,hand & foot warmers,same_cat2_diff_cat4
429,AE2CA7ZNECJK5ECZRRRGMPGLZXKA,5,B00F9F6OVK,camping & hiking,cots & hammocks,same_cat2_diff_cat4


# BPR

In [29]:
# Run BPR optimization
study_bpr = optuna.create_study(direction='maximize')
study_bpr.optimize(objective_bpr, n_trials=N)

# Save best model
best_trial = study_bpr.best_trial
if "result" in best_trial.user_attrs:
    best_model = best_trial.user_attrs["result"]

    # Ensure train_set is included
    if not hasattr(best_model, "train_set"):
        print("Warning: No train_set found in best_model.")
    else:
        best_model.train_set = best_model.train_set  # explicitly attach

    with open("bpr_model_with_trainset.pkl", "wb") as f:
        pickle.dump(best_model, f)

    print("Best BPR model saved to bpr_model_with_trainset.pkl")

# Collect all trial results
bpr_rows = []
for trial in study_bpr.trials:
    if trial.value is None:
        continue
    trial_data = {
        "Trial": trial.number,
        "F1@50": trial.value,
        "Model": "BPR",
        **trial.params
    }

    # Add other evaluation metrics from user_attrs["result"]
    if "result" in trial.user_attrs:
        for metric, value in trial.user_attrs["result"].metric_avg_results.items():
            trial_data[metric] = value

    bpr_rows.append(trial_data)

# Convert to DataFrame and export
bpr_results_df = pd.DataFrame(bpr_rows)


[I 2025-09-01 06:16:00,621] A new study created in memory with name: no-name-2b2e04db-d685-44d0-9b16-bf49081e0f37



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0117 |  0.0395 |  0.0852 |    0.2745 |  131.8775 | 299.8455



[I 2025-09-01 06:23:15,371] Trial 0 finished with value: 0.011711722194874404 and parameters: {'k': 200, 'learning_rate': 0.010305710118505595, 'lambda_reg': 0.06776984464662328}. Best is trial 0 with value: 0.011711722194874404.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0134 |  0.0589 |  0.1090 |    0.3131 |  123.2717 | 302.8556



[I 2025-09-01 06:30:24,449] Trial 1 finished with value: 0.013382116046413872 and parameters: {'k': 165, 'learning_rate': 0.010287452509615066, 'lambda_reg': 0.042274902300089866}. Best is trial 1 with value: 0.013382116046413872.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0138 |  0.0719 |  0.1215 |    0.3226 |  127.3270 | 294.0800



[I 2025-09-01 06:37:28,955] Trial 2 finished with value: 0.013770878387956449 and parameters: {'k': 186, 'learning_rate': 0.011694148200934947, 'lambda_reg': 0.0015032482679339168}. Best is trial 2 with value: 0.013770878387956449.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0136 |  0.0654 |  0.1144 |    0.3178 |  123.4527 | 302.8880



[I 2025-09-01 06:44:38,415] Trial 3 finished with value: 0.013566753296212763 and parameters: {'k': 149, 'learning_rate': 0.004620346350366421, 'lambda_reg': 0.004549499335617619}. Best is trial 2 with value: 0.013770878387956449.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0103 |  0.0360 |  0.0768 |    0.2434 |  518.1833 | 294.4508



[I 2025-09-01 06:58:14,181] Trial 4 finished with value: 0.010312921221109685 and parameters: {'k': 197, 'learning_rate': 0.0017320237012261484, 'lambda_reg': 0.01549400966375203}. Best is trial 2 with value: 0.013770878387956449.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0106 |  0.0363 |  0.0780 |    0.2495 |  133.0873 | 293.7645



[I 2025-09-01 07:05:24,112] Trial 5 finished with value: 0.010585799576414845 and parameters: {'k': 197, 'learning_rate': 0.001437277713366045, 'lambda_reg': 0.00835045803342913}. Best is trial 2 with value: 0.013770878387956449.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0146 |  0.0897 |  0.1398 |    0.3402 |  115.7051 | 291.6662



[I 2025-09-01 07:12:14,337] Trial 6 finished with value: 0.01455681514660474 and parameters: {'k': 148, 'learning_rate': 0.04533218830222551, 'lambda_reg': 0.004856216842410588}. Best is trial 6 with value: 0.01455681514660474.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0104 |  0.0361 |  0.0769 |    0.2450 |  110.8562 | 318.3061



[I 2025-09-01 07:19:26,223] Trial 7 finished with value: 0.010377231651478759 and parameters: {'k': 120, 'learning_rate': 0.003885512403391974, 'lambda_reg': 0.09790370452166856}. Best is trial 6 with value: 0.01455681514660474.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0161 |  0.0791 |  0.1371 |    0.3743 |  131.0122 | 290.1423



[I 2025-09-01 07:26:30,453] Trial 8 finished with value: 0.016060284293854 and parameters: {'k': 189, 'learning_rate': 0.018287561127409983, 'lambda_reg': 0.020985630631793123}. Best is trial 8 with value: 0.016060284293854.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0134 |  0.0582 |  0.1085 |    0.3143 |  107.3957 | 286.9721



[I 2025-09-01 07:33:07,738] Trial 9 finished with value: 0.013419346845033892 and parameters: {'k': 104, 'learning_rate': 0.009858808083934849, 'lambda_reg': 0.039970827520593256}. Best is trial 8 with value: 0.016060284293854.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0164 |  0.0904 |  0.1486 |    0.3818 |  124.6996 | 295.3942



[I 2025-09-01 07:40:10,779] Trial 10 finished with value: 0.016363253695420427 and parameters: {'k': 166, 'learning_rate': 0.03051020489394059, 'lambda_reg': 0.020522067527395577}. Best is trial 10 with value: 0.016363253695420427.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0162 |  0.0939 |  0.1509 |    0.3788 |  127.3069 | 294.6568



[I 2025-09-01 07:47:15,712] Trial 11 finished with value: 0.016245471824585293 and parameters: {'k': 171, 'learning_rate': 0.03542885423459813, 'lambda_reg': 0.018628391565240737}. Best is trial 10 with value: 0.016363253695420427.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0163 |  0.0967 |  0.1537 |    0.3803 |  123.1618 | 291.1568



[I 2025-09-01 07:54:13,031] Trial 12 finished with value: 0.016286749105513063 and parameters: {'k': 167, 'learning_rate': 0.04151644256337581, 'lambda_reg': 0.02033648379655572}. Best is trial 10 with value: 0.016363253695420427.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0154 |  0.0859 |  0.1407 |    0.3602 |  125.7710 | 295.9960



[I 2025-09-01 08:01:17,889] Trial 13 finished with value: 0.015416954340352342 and parameters: {'k': 166, 'learning_rate': 0.024374193567667016, 'lambda_reg': 0.008360385197400524}. Best is trial 10 with value: 0.016363253695420427.



TEST:
...
    |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------- + ------- + --------- + --------- + --------
BPR | 0.0163 |  0.0848 |  0.1436 |    0.3805 |  113.8416 | 287.8544



[I 2025-09-01 08:08:02,592] Trial 14 finished with value: 0.01629919941691622 and parameters: {'k': 136, 'learning_rate': 0.026746461464024755, 'lambda_reg': 0.031230327017152643}. Best is trial 10 with value: 0.016363253695420427.


Best BPR model saved to bpr_model_with_trainset.pkl


## Generate Top K Recommendations for Sampled 100 Users

In [30]:
bpr_topk_df = generate_top_k_recommendations(BPR, uir_dataset, study_bpr, k=10)

In [31]:
bpr_topk_df.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AHSQXZIETCR5RZFI5PVEQGISVJOA,B00NWXLQD2,B09LW2KHPM,B09MJKJYLQ,B01L6RE7Z4,B07TDPP7LQ,B07BQRWTDJ,B094DXJZPN,B08GM3QY5N,B00NPLSZF8,B01A4W8AUK
AHWK7DFBUIQVZU6H2O64VFJR3RWA,B00NWXLQD2,B09LW2KHPM,B07YFXSG73,B09MJKJYLQ,B01L6RE7Z4,B094DXJZPN,B07BQRWTDJ,B07TDPP7LQ,B00NPLSZF8,B08GM3QY5N
AGQTTANUKMPEDBG4DVG45FTLQMCQ,B00NWXLQD2,B09LW2KHPM,B09MJKJYLQ,B07BQRWTDJ,B01L6RE7Z4,B07TDPP7LQ,B094DXJZPN,B08GM3QY5N,B00NPLSZF8,B01A4W8AUK
AEWX5IH5DIMGDSK66USSOGTUDZGQ,B00NWXLQD2,B07BJ9NLDJ,B09LW2KHPM,B09MJKJYLQ,B094DXJZPN,B07BQRWTDJ,B00NPLSZF8,B07TDPP7LQ,B0BX5QFWQN,B01L6RE7Z4
AE27CZZMARXNW7ONE4U73YWWCJKA,B00NWXLQD2,B09LW2KHPM,B09MJKJYLQ,B07TDPP7LQ,B07BQRWTDJ,B01L6RE7Z4,B094DXJZPN,B08GM3QY5N,B00NPLSZF8,B01A4W8AUK


## Final Recommendations

In [32]:
bpr_recommendation_df = annotate_recommendations_with_category(bpr_topk_df, items_df)

In [33]:
bpr_recommendation_df.head()

Unnamed: 0,user_id,rank,parent_asin,cat_2,cat_4
4,AE27CZZMARXNW7ONE4U73YWWCJKA,1,B00NWXLQD2,fitness technology,activity & fitness trackers
104,AE27CZZMARXNW7ONE4U73YWWCJKA,2,B09LW2KHPM,camping & hiking,lanterns
204,AE27CZZMARXNW7ONE4U73YWWCJKA,3,B09MJKJYLQ,strength training equipment,resistance bands
304,AE27CZZMARXNW7ONE4U73YWWCJKA,4,B07TDPP7LQ,fitness technology,heart rate monitors
404,AE27CZZMARXNW7ONE4U73YWWCJKA,5,B07BQRWTDJ,camping & hiking,air mattresses


In [34]:
bpr_overall_rate, bpr_labeled_recs, bpr_user_rates = compute_cat2_diff_cat4_rate(interacted_df, bpr_recommendation_df)

print(f"Overall Same Main Cat but Diff Sub-cat Rate: {bpr_overall_rate:.2%}")


Overall Same Main Cat but Diff Sub-cat Rate: 23.40%


In [35]:
bpr_labeled_recs.head()

Unnamed: 0,user_id,rank,parent_asin,cat_2,cat_4,cat_match
4,AE27CZZMARXNW7ONE4U73YWWCJKA,1,B00NWXLQD2,fitness technology,activity & fitness trackers,diff_cat2
104,AE27CZZMARXNW7ONE4U73YWWCJKA,2,B09LW2KHPM,camping & hiking,lanterns,same_cat2_diff_cat4
204,AE27CZZMARXNW7ONE4U73YWWCJKA,3,B09MJKJYLQ,strength training equipment,resistance bands,diff_cat2
304,AE27CZZMARXNW7ONE4U73YWWCJKA,4,B07TDPP7LQ,fitness technology,heart rate monitors,diff_cat2
404,AE27CZZMARXNW7ONE4U73YWWCJKA,5,B07BQRWTDJ,camping & hiking,air mattresses,same_cat2_diff_cat4


# EASE

In [36]:
# Run EASE optimization
study_ease = optuna.create_study(direction='maximize')
study_ease.optimize(objective_ease, n_trials=N)

# Save best model
best_trial = study_ease.best_trial

if "model" in best_trial.user_attrs:
    best_model = best_trial.user_attrs["result"]
    
    # Ensure train_set is included
    if not hasattr(best_model, "train_set"):
        print("Warning: No train_set found in best_model.")
    else:
        best_model.train_set = best_model.train_set  # explicitly attach

    with open("ease_model_with_trainset.pkl", "wb") as f:
        pickle.dump(best_model, f)

    print("Best EASE model saved to ease_model_with_trainset.pkl")

# Collect all trial results
ease_rows = []
for trial in study_ease.trials:
    if trial.value is None:
        continue
    trial_data = {
        "Trial": trial.number,
        "F1@50": trial.value,
        "Model": "EASE",
        **trial.params
    }

    # Add other evaluation metrics from user_attrs["result"]
    if "result" in trial.user_attrs:
        for metric, value in trial.user_attrs["result"].metric_avg_results.items():
            trial_data[metric] = value

    ease_rows.append(trial_data)

# Convert to DataFrame and export
ease_results_df = pd.DataFrame(ease_rows)


[I 2025-09-01 08:09:07,852] A new study created in memory with name: no-name-8334dd88-b1fe-4e2d-a748-b7bb730ef33a



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0157 |  0.1004 |  0.1540 |    0.3638 |    0.5042 |   8.2121



[I 2025-09-01 08:09:19,344] Trial 0 finished with value: 0.015690012427293832 and parameters: {'lamb': 114.31447561628943}. Best is trial 0 with value: 0.015690012427293832.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0157 |  0.1003 |  0.1537 |    0.3632 |    0.6975 |   8.2093



[I 2025-09-01 08:09:31,214] Trial 1 finished with value: 0.01566231782171672 and parameters: {'lamb': 56.050480904107204}. Best is trial 0 with value: 0.015690012427293832.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0156 |  0.1002 |  0.1535 |    0.3623 |    0.9071 |   8.3714



[I 2025-09-01 08:09:43,491] Trial 2 finished with value: 0.015625756892837264 and parameters: {'lamb': 19.298682564307043}. Best is trial 0 with value: 0.015690012427293832.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0157 |  0.1004 |  0.1540 |    0.3639 |    0.7486 |   7.7764



[I 2025-09-01 08:09:54,964] Trial 3 finished with value: 0.01569127671532899 and parameters: {'lamb': 118.55139550414748}. Best is trial 3 with value: 0.01569127671532899.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0157 |  0.1003 |  0.1538 |    0.3634 |    0.6249 |   7.6912



[I 2025-09-01 08:10:06,078] Trial 4 finished with value: 0.015672432125997986 and parameters: {'lamb': 64.2658932563292}. Best is trial 3 with value: 0.01569127671532899.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0157 |  0.1006 |  0.1542 |    0.3648 |    0.5704 |   7.9060



[I 2025-09-01 08:10:17,420] Trial 5 finished with value: 0.01573151868135049 and parameters: {'lamb': 234.44031912805596}. Best is trial 5 with value: 0.01573151868135049.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0157 |  0.1005 |  0.1540 |    0.3640 |    0.9110 |   8.3294



[I 2025-09-01 08:10:29,729] Trial 6 finished with value: 0.015698766974603883 and parameters: {'lamb': 135.7431844662849}. Best is trial 5 with value: 0.01573151868135049.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0156 |  0.1002 |  0.1535 |    0.3621 |    1.2651 |   8.6784



[I 2025-09-01 08:10:42,589] Trial 7 finished with value: 0.015616952801583886 and parameters: {'lamb': 10.995864353578304}. Best is trial 5 with value: 0.01573151868135049.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0156 |  0.1003 |  0.1536 |    0.3628 |    0.5884 |   9.4227



[I 2025-09-01 08:10:55,570] Trial 8 finished with value: 0.015645955016954004 and parameters: {'lamb': 42.07031417391907}. Best is trial 5 with value: 0.01573151868135049.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0157 |  0.1003 |  0.1537 |    0.3632 |    0.8515 |   9.2237



[I 2025-09-01 08:11:08,969] Trial 9 finished with value: 0.01566231782171672 and parameters: {'lamb': 55.90944720225037}. Best is trial 5 with value: 0.01573151868135049.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0158 |  0.1006 |  0.1548 |    0.3672 |    1.8363 |   9.3912



[I 2025-09-01 08:11:23,633] Trial 10 finished with value: 0.015836093457776375 and parameters: {'lamb': 625.2588417834248}. Best is trial 10 with value: 0.015836093457776375.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0158 |  0.1007 |  0.1548 |    0.3672 |    0.7503 |   9.3793



[I 2025-09-01 08:11:37,168] Trial 11 finished with value: 0.015836046666274348 and parameters: {'lamb': 691.1875529205801}. Best is trial 10 with value: 0.015836093457776375.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0159 |  0.1006 |  0.1550 |    0.3683 |    1.2515 |   8.9281



[I 2025-09-01 08:11:50,564] Trial 12 finished with value: 0.01587774569316398 and parameters: {'lamb': 990.0146576356935}. Best is trial 12 with value: 0.01587774569316398.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0159 |  0.1006 |  0.1549 |    0.3679 |    0.7909 |   8.8410



[I 2025-09-01 08:12:03,504] Trial 13 finished with value: 0.01586262194572454 and parameters: {'lamb': 880.3256704075147}. Best is trial 12 with value: 0.01587774569316398.



TEST:
...
      |  F1@50 | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + ------- + --------- + --------- + --------
EASEᴿ | 0.0159 |  0.1006 |  0.1549 |    0.3680 |    0.6594 |   9.3470



[I 2025-09-01 08:12:16,701] Trial 14 finished with value: 0.015866439123061463 and parameters: {'lamb': 897.6641562748401}. Best is trial 12 with value: 0.01587774569316398.


## Generate Top K Recommendations for Sampled 100 Users

In [37]:
ease_topk_df = generate_top_k_recommendations(EASE, uir_dataset, study_ease, k=10)

## Final Recommendations

In [38]:
ease_recommendation_df = annotate_recommendations_with_category(ease_topk_df, items_df)

In [39]:
ease_recommendation_df.head()

Unnamed: 0,user_id,rank,parent_asin,cat_2,cat_4
14,AE5OEVKQ2R5GAUM5WKSPMZIZGDSQ,1,B09LW2KHPM,camping & hiking,lanterns
114,AE5OEVKQ2R5GAUM5WKSPMZIZGDSQ,2,B0BKFGDBBM,camping & hiking,camping cookware
214,AE5OEVKQ2R5GAUM5WKSPMZIZGDSQ,3,B0038A05X6,camping & hiking,shovels
314,AE5OEVKQ2R5GAUM5WKSPMZIZGDSQ,4,B01M4IGQ2I,camping & hiking,tents
414,AE5OEVKQ2R5GAUM5WKSPMZIZGDSQ,5,B00NPLSZF8,camping & hiking,lanterns


In [40]:
ease_overall_rate, ease_labeled_recs, ease_user_rates = compute_cat2_diff_cat4_rate(interacted_df, ease_recommendation_df)

print(f"Overall Same Main Cat but Diff Sub-cat Rate: {ease_overall_rate:.2%}")

Overall Same Main Cat but Diff Sub-cat Rate: 56.50%


In [41]:
ease_labeled_recs.head()

Unnamed: 0,user_id,rank,parent_asin,cat_2,cat_4,cat_match
14,AE5OEVKQ2R5GAUM5WKSPMZIZGDSQ,1,B09LW2KHPM,camping & hiking,lanterns,same_cat2_diff_cat4
114,AE5OEVKQ2R5GAUM5WKSPMZIZGDSQ,2,B0BKFGDBBM,camping & hiking,camping cookware,same_cat2_diff_cat4
214,AE5OEVKQ2R5GAUM5WKSPMZIZGDSQ,3,B0038A05X6,camping & hiking,shovels,same_cat2_diff_cat4
314,AE5OEVKQ2R5GAUM5WKSPMZIZGDSQ,4,B01M4IGQ2I,camping & hiking,tents,same_cat2_diff_cat4
414,AE5OEVKQ2R5GAUM5WKSPMZIZGDSQ,5,B00NPLSZF8,camping & hiking,lanterns,same_cat2_diff_cat4


# WMF

In [1]:
# Run WMF optimization
study_wmf = optuna.create_study(direction='maximize')
study_wmf.optimize(objective_wmf, n_trials=N)

# Save best model
best_trial = study_wmf.best_trial

if "result" in best_trial.user_attrs:
    best_model = best_trial.user_attrs["result"]

    # Ensure train_set is included
    if not hasattr(best_model, "train_set"):
        print("Warning: No train_set found in best_model.")
    else:
        best_model.train_set = best_model.train_set  # explicitly attach

    with open("wmf_model_with_trainset.pkl", "wb") as f:
        pickle.dump(best_model, f)

    print("Best WMF model saved to wmf_model_with_trainset.pkl")

# Collect all trial results
wmf_rows = []
for trial in study_wmf.trials:
    if trial.value is None:
        continue
    trial_data = {
        "Trial": trial.number,
        "F1@50": trial.value,
        "Model": "WMF",
        **trial.params
    }

    # Add other evaluation metrics from user_attrs["result"]
    if "result" in trial.user_attrs:
        for metric, value in trial.user_attrs["result"].metric_avg_results.items():
            trial_data[metric] = value

    wmf_rows.append(trial_data)

# Convert to DataFrame
wmf_results_df = pd.DataFrame(wmf_rows)


NameError: name 'optuna' is not defined

## Generate Top K Recommendations for Sampled 100 Users

In [None]:
wmf_topk_df = generate_top_k_recommendations(WMF, uir_dataset, study_wmf, k=10)

In [None]:
wmf_recommendation_df = annotate_recommendations_with_category(wmf_topk_df, items_df)

In [None]:
wmf_recommendation_df.head()

## Final Recommendations

In [None]:
wmf_overall_rate, wmf_labeled_recs, wmf_user_rates = compute_cat2_diff_cat4_rate(interacted_df, wmf_recommendation_df)

print(f"Overall Same Main Cat but Diff Sub-cat Rate (WMF): {wmf_overall_rate:.2%}")


In [None]:
wmf_labeled_recs.head()

# Summary

In [None]:
import pandas as pd

# Your existing summary dictionary
summary = {
    "EASE": ease_overall_rate,
    "BPR": bpr_overall_rate,
    "WMF": wmf_overall_rate,
    "MF": mf_overall_rate,
}

# Convert to DataFrame
summary_df = pd.DataFrame([
    {"Model": model, "Same Cat-2, Diff Cat-4 Rate": f"{rate:.2%}"}
    for model, rate in summary.items()
])

# Display using print
print("\n Intra-category Diversity Summary (Same Cat-2, Different Cat-4):")
print(summary_df.to_string(index=False))


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Model names and metric values
models = ['WMF', 'EASEʳ', 'BPR', 'MF']
f1     = [0.0040, 0.0159, 0.0162, 0.0045]
ndcg   = [0.0307, 0.1549, 0.1423, 0.0321]
recall = [0.0976, 0.3679, 0.3772, 0.1068]
ncrr   = [0.0153, 0.1006, 0.0840, 0.0144]  # NCRR@50 for each model

x = np.arange(len(models))
width = 0.2

fig, ax = plt.subplots(figsize=(8, 4))

# Plot grouped bars
bars_f1    = ax.bar(x - 1.5*width, f1,   width, label='F1@10',   color='#1f77b4')
bars_ndcg  = ax.bar(x - 0.5*width, ndcg, width, label='NDCG@10', color='#ff7f0e')
bars_recall= ax.bar(x + 0.5*width, recall,width, label='Recall@10', color='#2ca02c')
bars_ncrr  = ax.bar(x + 1.5*width, ncrr,  width, label='NCRR@10', color='#d62728')

# Labels and format
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.set_ylabel('Score')
ax.set_title('Performance Comparison of Recommender Models')
ax.legend()

# Add value labels
for bars in [bars_f1, bars_ndcg, bars_recall, bars_ncrr]:
    ax.bar_label(bars, fmt='%.3f', padding=2, fontsize=8)

plt.ylim(0, max(recall + ncrr) + 0.05)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

models = ['EASE', 'BPR', 'WMF', 'MF']
diversity = [0.5770, 0.2490, 0.3270, 0.3990]  # proportions

# Convert to percentages
div_pct = [d * 100 for d in diversity]

fig, ax = plt.subplots(figsize=(8, 5))
bars = ax.bar(models, div_pct, color='#1f77b4')

ax.set_ylim(0, max(div_pct) + 10)
ax.set_ylabel('Same Main Cat but Different Sub Cat (%)')
ax.set_title('Intra‑Category Diversity Across Models')

# Add value annotation on each bar
for bar in bars:
    height = bar.get_height()
    ax.annotate(f'{height:.1f}%', xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3), textcoords='offset points',
                ha='center', va='bottom', fontsize=10)

plt.tight_layout()
plt.show()
