In [14]:
# %%
import pandas as pd
import numpy as np
import random
import math
from river import optim
from river.reco import BiasedMF
import tqdm

# Load the preprocessed DataFrame and sort by TransactionDate
final_df = pd.read_parquet("final_df.parquet")
print("Shape:", final_df.shape)
display(final_df.head())
display(final_df.dtypes)
final_df = final_df.sort_values("TransactionDate").reset_index(drop=True)

# %%
all_items = final_df["ProductID"].unique().tolist()

def sample_negative_items(user_item_pairs, n_neg=3):
    results = []
    for (u, i, feats) in user_item_pairs:
        results.append((u, i, feats, 1.0))
        neg_samples = 0
        tries = 0
        while neg_samples < n_neg and tries < 100:
            candidate = random.choice(all_items)
            if candidate != i:
                neg_feats = feats.copy()
                results.append((u, candidate, neg_feats, 0.0))
                neg_samples += 1
            tries += 1
    return results

# %%
class AverageRank:
    def __init__(self):
        self.sum_of_ranks = 0.0
        self.count = 0
    def update(self, rank):
        self.sum_of_ranks += rank
        self.count += 1
    def get(self):
        return self.sum_of_ranks / self.count if self.count > 0 else None
    def __repr__(self):
        val = self.get()
        return f"AverageRank={val:.3f}" if val is not None else "AverageRank=None"

def get_rank(model, user_id, item_id, all_items_list, features):
    scores = {it: model.predict_one({"user": user_id, "item": it, **features})
              for it in all_items_list}
    ranked_items = sorted(scores, key=scores.get, reverse=True)
    return ranked_items.index(item_id) + 1

# %%
# Define a custom BPR loss (pointwise logistic loss as a surrogate)
class BPRLoss:
    def __call__(self, y_true, y_pred):
        sigmoid = 1 / (1 + math.exp(-y_pred))
        if y_true == 1.0:
            return -math.log(sigmoid + 1e-15)
        else:
            return -math.log(1 - sigmoid + 1e-15)

# Initialize the BiasedMF model with our custom BPRLoss.
model = BiasedMF(
    n_factors=10,
    bias_lr=0.01,
    user_lr=0.01,
    item_lr=0.01,
    loss=BPRLoss(),
    seed=42
)

# %%
avg_rank_metric = AverageRank()
MAX_ROWS = 5000  
processed = 0
log_step = 1000

for idx, row in final_df.iterrows():
    if processed >= MAX_ROWS:
        break
    processed += 1
    
    user_id = str(row["ClientID"])
    item_id = str(row["ProductID"])
    
    side_feats = {
        "ClientGender": str(row["ClientGender"]) if pd.notnull(row["ClientGender"]) else "Unknown",
        "ClientSegment": str(row["ClientSegment"]) if pd.notnull(row["ClientSegment"]) else "Unknown",
        "Brand": str(row["Brand"]) if pd.notnull(row["Brand"]) else "Unknown",
        "Category": str(row["Category"]) if pd.notnull(row["Category"]) else "Unknown",
        "Universe": str(row["Universe"]) if pd.notnull(row["Universe"]) else "Unknown",
        "Age": row["Age"] if pd.notnull(row["Age"]) else 0,
        "CumulativeSpent": row["CumulativeSpent"],
        "DayOfWeek": str(row["DayOfWeek"]),
        "Season": str(row["Season"]),
        "Frequency_30": row["Frequency_30"],
        "Recency_30": row["Recency_30"]
    }
    
    if processed > 100:
        rank = get_rank(model, user_id, item_id, all_items, side_feats)
        avg_rank_metric.update(rank)
    
    x_pos = {"user": user_id, "item": item_id}
    x_pos.update(side_feats)
    
    negatives = set()
    n_neg = 3
    tries = 0
    while len(negatives) < n_neg and tries < 50:
        candidate = random.choice(all_items)
        if candidate != item_id:
            negatives.add(candidate)
        tries += 1
    
    model = model.learn_one(x_pos, 1.0)
    for neg_item in negatives:
        x_neg = {"user": user_id, "item": neg_item}
        x_neg.update(side_feats)
        model = model.learn_one(x_neg, 0.0)
    
    if processed % log_step == 0:
        print(f"Processed {processed} transactions, current {avg_rank_metric}")

print(f"Final Average Rank on the last {processed - 100} transactions: {avg_rank_metric}")


Shape: (896963, 58)


Unnamed: 0,ClientID,ProductID,TransactionDate,StoreID,StoreCountry,Category,FamilyLevel1,FamilyLevel2,Universe,ClientSegment,...,Rolling90Pct_Cricket,Rolling90Pct_Beach,Rolling90Pct_Basketball,Rolling90Pct_Rugby,Rolling90Pct_Golf,Rolling90Pct_Softball,Rolling90Pct_Cycling,Rolling90Pct_Volleyball,Rolling90Pct_Running,Rolling90Pct_Skiing
0,4388436561084682799,3260004767786243986,2023-01-01,1913433680162801979,USA,Basketball,Ball,Wilson Evolution Basketball,Women,LOYAL,...,,,1.0,0.0,,,,,,
1,5475934562856106533,4081002095016762501,2023-01-01,7704230050291051317,GBR,Baseball,Ball,Wilson A1030,Men,LOYAL,...,,,0.0,,,,,,,
2,7571493122530801912,6392464777854173474,2023-01-01,6490882301505443815,USA,Football,Ball,Puma Final 1,Women,LOYAL,...,,,,,,,,,,
3,9054036776577299596,4007202900314609762,2023-01-01,8616445013777361432,USA,Cycling,Bike,Specialized S-Works Roubaix,Men,LOYAL,...,,,,0.0,,,1.0,,,
4,7828763863563966653,1064014581685647413,2023-01-01,7215875119995113089,AUS,Tennis,Racket,Wilson Pro Staff RF97,Men,LOYAL,...,,0.0,,,,0.0,0.0,,,


ClientID                                     int64
ProductID                                    int64
TransactionDate                     datetime64[ns]
StoreID                                      int64
StoreCountry                                object
Category                                    object
FamilyLevel1                                object
FamilyLevel2                                object
Universe                                    object
ClientSegment                               object
ClientGender                                object
Age                                        float64
ClientCountry                               object
ClientOptINEmail                             int64
ClientOptINPhone                             int64
Quantity_sold                                int64
SalesNetAmountEuro                         float64
product_avg_price_order                    float64
avg_price                                  float64
Weekday                        

TypeError: BiasedMF.__init__() got an unexpected keyword argument 'bias_lr'