# Baseline Model

In [1]:
%load_ext autoreload
%autoreload 2

import math
import time
import statistics

import numpy as np
import pandas as pd
from itertools import permutations
from scipy.sparse import dok_matrix, csr_matrix
from tqdm import tqdm
import multiprocessing

from get_data import get_data
from baseline_recommender import MostPopularRecommender, MostPopularForUserRecommender, ItemCoCountRecommender
from utils import convert_size, load_data, get_product_key_conversion, product_key_to_meta, product_key_to_name

## Load Data

In [24]:
small = ""
# small = "small_"

In [25]:
data_loader = load_data(small_data=(small!=""))

In [26]:
train_data = data_loader["train"]
validation_data = data_loader["validation"]
test_data = data_loader["test"]
item_metadata = data_loader["metadata"]

In [27]:
# gets the actual number of products in the training set
train_data_products = set(item for transaction in list(train_data) for item in transaction if item.startswith("product"))
n_items = len(train_data_products)
print(f"Data for {n_items} products.")

Data for 49641 products.


## User Item Interactions

In [48]:
def generate_user_item_interactions(train_data, n_items, item_key_mapping):
    # user_transactions_map = {}
    user_item_frequency = {}
    item_frequency = np.zeros(n_items)
    for index, test_transaction in enumerate(train_data):
        # user id is always first in list, then all the purchased items
        user_id = test_transaction[0]
        items = test_transaction[1:]
        # temp_transactions = user_transactions_map.get(user_id, [])
        # temp_transactions.append(index)
        # user_transactions_map[user_id] = temp_transactions

        temp_item_frequency = user_item_frequency.get(user_id, {})
        for item in items:
            temp_item_frequency[item] = temp_item_frequency.get(item, 0) + 1
            item_frequency[item_key_mapping[item]] += 1
        user_item_frequency[user_id] = temp_item_frequency        

    return user_item_frequency, item_frequency# user_transactions_map, user_item_frequency, item_frequency

In [49]:
mapping = {item_key: index for index, item_key in enumerate(train_data_products)}
mapping_back = {index: item_key for item_key, index in mapping.items()}

In [50]:
user_item_frequency, item_frequency = generate_user_item_interactions(train_data=train_data, 
                                                                         n_items=n_items, 
                                                                         item_key_mapping=mapping)

# Item Item Interactions

In [31]:
def add_item_pairs_to_matrix(data, item_key_mapping):
    item_interactions_dict = {}
    
    for index, test_transaction in enumerate(data):
        # user id is always first in list, then all the purchased items
        user_id = test_transaction[0]
        items = [item_key_mapping[index] for index in test_transaction[1:]]
        
        for item, paired_item in permutations(set(items), r=2):
            if item != paired_item:
                key = (item, paired_item)
                item_interactions_dict[key] = item_interactions_dict.get(key, 0) + 1
            
    return item_interactions_dict

In [32]:
def generate_item_item_interactions(train_data, item_key_mapping, item_frequency, n_items):
    # Create dictionary of item-item pairs and the number of times they appeared together in a basket
    item_interactions_dict = add_item_pairs_to_matrix(data=train_data, item_key_mapping=item_key_mapping)
    
    # divide each item pair (item1, item2) by the total number of appearances of item1
    for k in item_interactions_dict.keys():
        item_interactions_dict[k] /= item_frequency[k[0]]

    print("Generating the item, item dok matrix...")
    item_interactions_dok = dok_matrix((n_items, n_items), dtype=np.float32)

    for k, v in item_interactions_dict.items():
        item_interactions_dok[k] = v

    del item_interactions_dict

    print("Turning the item, item dok matrix into a csr matrix...")
    item_interactions_csr = item_interactions_dok.tocsr()
    print(convert_size(item_interactions_csr.data.nbytes + item_interactions_csr.indptr.nbytes + item_interactions_csr.indices.nbytes))
    
    del item_interactions_dok

    return item_interactions_csr

In [33]:
# mapped_item_frequency = {}
# for k, v in item_frequency.items():
#      mapped_item_frequency[mapping[k]] = v

In [34]:
sparse_item_item_matrix = generate_item_item_interactions(train_data=train_data,
                                                          item_key_mapping=mapping, 
                                                          item_frequency=item_frequency,
                                                          n_items=n_items)

Generating the item, item dok matrix...
Turning the item, item dok matrix into a csr matrix...
580.17 MB


## Old Evaluation (HitRate@K, NDCG@K)
(moved function to recommender.py)

In [None]:
# Test evaluate function
test_transaction = [1, 889, 887, 990, 998]

user_id = test_transaction[0]
items = test_transaction[1:]
test_item = items.pop()

print(user_id, items, test_item)

def test_evaluate(predicted_items, k=5):
    hit_rate_at_k = 0.0
    ndcg_at_k = 0.0
    if test_item in predicted_items:
        # Hit Rate @ k
        hit_rate_at_k += 1
        # NDCG @ k
        index_match = (np.where(test_item == np.array(predicted_items)))[0][0]
        # In our case only one item in the retrieved list can be relevant,
        # so in particular the ideal ndcg is 1 and ndcg_at_k = 1/log_2(1+j)
        # where j is the position of the relevant item in the list.
        ndcg_at_k += 1/np.log2(np.arange(2, k+2))[index_match]

    print(hit_rate_at_k)
    print(ndcg_at_k)
    
    return hit_rate_at_k, ndcg_at_k
    
# test item 998 in the third spot
predicted_items1 = [889, 887, 998, 881, 882]
hr_1, ndcg_1 = test_evaluate(predicted_items1, k=5)
assert hr_1 == 1.0 and ndcg_1 == 0.5

# test item 998 in first spot
predicted_items2 = [998, 887, 995, 881, 882]
hr_2, ndcg_2 = test_evaluate(predicted_items2, k=5)
assert hr_2 == 1.0 and ndcg_2 == 1.0

# test item 998 not in the list
predicted_items3 = [889, 887, 880, 881, 882]
hr_3, ndcg_3 = test_evaluate(predicted_items3, k=5)
assert hr_3 == 0.0 and ndcg_3 == 0.0

## Most Popular

In [35]:
most_popular_baseline = MostPopularRecommender(item_key_mapping=mapping, 
                                               item_frequency=item_frequency, 
                                               user_item_frequency=user_item_frequency)

In [None]:
# Test Most Popular Baseline
k = 10

item_frequency_test_list = sorted(item_frequency.items(), key=lambda t: t[1], reverse=True)
test_indices = [index for (index, frequency) in item_frequency_test_list[:k]]

predicted_indices = most_popular_baseline.predict_items(user_id=None, given_items=None, predict_k=k)

print(f"Test: {test_indices}")
print(f"Predicted: {predicted_indices}")

assert predicted_indices == test_indices

In [36]:
# Validation Set and Test Set
# Within Basket Recommendations
val_auc, val_ndcg, val_recall, val_precision = most_popular_baseline.evaluate(validation_data, 
                                                                              k=10,
                                                                              within_basket=True)

test_auc, test_ndcg, test_recall, test_precision = most_popular_baseline.evaluate(test_data, 
                                                                                  k=10,
                                                                                  within_basket=True)

206209 transactions to evaluate.
Evaluated 195288 transactions.
Took 8.662 minutes.
AUC:              0.922
NDCG:             0.1333
Recall at 10:     0.0546
Precision at 10:  0.0253
206209 transactions to evaluate.
Evaluated 195558 transactions.
Took 8.505 minutes.
AUC:              0.9216
NDCG:             0.133
Recall at 10:     0.0543
Precision at 10:  0.0258


In [37]:
# Next Basket Recommendations
val_auc, val_ndcg, val_recall, val_precision = most_popular_baseline.evaluate(validation_data, 
                                                                              k=10,
                                                                              within_basket=False)

test_auc, test_ndcg, test_recall, test_precision = most_popular_baseline.evaluate(test_data, 
                                                                                  k=10,
                                                                                  within_basket=False)

206209 transactions to evaluate.
Evaluated 195288 transactions.
Took 11.446 minutes.
AUC:              0.9281
NDCG:             0.145
Recall at 10:     0.0706
Precision at 10:  0.0742
206209 transactions to evaluate.
Evaluated 195558 transactions.
Took 10.732 minutes.
AUC:              0.9276
NDCG:             0.1449
Recall at 10:     0.0706
Precision at 10:  0.0757


In [None]:
# Most Popular - Within Basket Recommendations
# Baseline (Triplet2Vec Paper)
# AUC  0.918
# NDCG 0.145

# My Scores
# Validation
# AUC:              0.922
# NDCG:             0.1333
# Recall at 10:     0.0546
# Precision at 10:  0.0253

# Test
# AUC:              0.9216
# NDCG:             0.133
# Recall at 10:     0.0543
# Precision at 10:  0.0258

# Most Popular - Next Basket Recommendations
# Baseline (Triplet2Vec Paper)
# AUC  0.918
# NDCG 0.145

# My Scores
# Validation
# AUC:              0.9281
# NDCG:             0.145
# Recall at 10:     0.0706
# Precision at 10:  0.0742

# Test
# AUC:              0.9276
# NDCG:             0.1449
# Recall at 10:     0.0706
# Precision at 10:  0.0757

## Most Popular for User

In [51]:
most_popular_for_user_baseline = MostPopularForUserRecommender(item_key_mapping=mapping, 
                                                               user_item_frequency=user_item_frequency)

In [None]:
# Test Most Popular Baseline
k = 10

# test for user with id
user_id = list(user_item_frequency.keys())[0]
user_item_frequency_test_list = sorted(user_item_frequency[user_id].items(), key=lambda t: t[1], reverse=True)
test_indices = [index for (index, frequency) in user_item_frequency_test_list[:k]]

predicted_indices = most_popular_for_user_baseline.predict_items(user_id=user_id, given_items=None, predict_k=k)

print(f"Test: {test_indices}")
print(f"Predicted: {predicted_indices}")

assert predicted_indices == test_indices

# test for non-existing user, should return most popular
user_id = -1
item_frequency_test_list = sorted(item_frequency.items(), key=lambda t: t[1], reverse=True)
test_indices = [index for (index, frequency) in item_frequency_test_list[:10]]

predicted_indices = most_popular_for_user_baseline.predict_items(user_id=user_id, given_items=None, predict_k=k)

print(f"Test: {test_indices}")
print(f"Predicted: {predicted_indices}")

assert predicted_indices == test_indices

In [52]:
# Validation Set and Test Set
# Within Basket Recommendations
val_auc, val_ndcg, val_recall, val_precision = most_popular_for_user_baseline.evaluate(validation_data,
                                                                                       k=10,
                                                                                       within_basket=True)

test_auc, test_ndcg, test_recall, test_precision = most_popular_for_user_baseline.evaluate(test_data, 
                                                                                           k=10,
                                                                                           within_basket=True)

206209 transactions to evaluate.
Evaluated 195288 transactions.
Took 9.066 minutes.
AUC:              0.7473
NDCG:             0.213
Recall at 10:     0.2192
Precision at 10:  0.0782
206209 transactions to evaluate.
Evaluated 195558 transactions.
Took 9.071 minutes.
AUC:              0.7271
NDCG:             0.1987
Recall at 10:     0.1978
Precision at 10:  0.0731


In [53]:
# Next Basket Recommendations
val_auc, val_ndcg, val_recall, val_precisiong = most_popular_for_user_baseline.evaluate(validation_data,
                                                                                        k=10,
                                                                                        within_basket=False)

test_auc, test_ndcg, test_recall, test_precision = most_popular_for_user_baseline.evaluate(test_data, 
                                                                                           k=10,
                                                                                           within_basket=False)

206209 transactions to evaluate.
Evaluated 195288 transactions.
Took 10.883 minutes.
AUC:              0.79
NDCG:             0.3
Recall at 10:     0.3082
Precision at 10:  0.2756
206209 transactions to evaluate.
Evaluated 195558 transactions.
Took 10.587 minutes.
AUC:              0.769
NDCG:             0.2791
Recall at 10:     0.283
Precision at 10:  0.2585


In [None]:
# Most Popular for User - Within Basket Recommendations
# Baseline (Triplet2Vec Paper)
# AUC  0.773
# NDCG 0.272 

# My Scores
# Validation
# AUC:              0.7473
# NDCG:             0.213
# Recall at 10:     0.2192
# Precision at 10:  0.0782

# Test
# AUC:              0.7271
# NDCG:             0.1987
# Recall at 10:     0.1978
# Precision at 10:  0.0731

# Most Popular for User - Next Basket Recommendations
# Baseline (Triplet2Vec Paper)
# AUC  0.773
# NDCG 0.273 

# My Scores
# Validation
# AUC:              0.79
# NDCG:             0.3
# Recall at 10:     0.3082
# Precision at 10:  0.2756

# Test
# AUC:              0.769
# NDCG:             0.2791
# Recall at 10:     0.283
# Precision at 10:  0.2585

## Co-Count 

In [54]:
item_co_count_baseline = ItemCoCountRecommender(item_key_mapping=mapping, 
                                                item_co_count_matrix=sparse_item_item_matrix, 
                                                user_item_frequency=user_item_frequency)

In [None]:
# Test co-count Baseline
k = 10
item_ids = ['product_30252']

co_count_for_item = np.asarray(sparse_item_item_matrix.getrow(product_mapping_dict[item_ids[0]]).todense())[0]
candidate_indices = np.argsort(co_count_for_item)[::-1][:k]
test_indices = [reverse_product_mapping_dict[candidate_index] for candidate_index in candidate_indices]

predicted_indices = item_co_count_baseline.predict_items(user_id=None, given_items=item_ids)

print(f"Test: {test_indices}")
print(f"Predicted: {predicted_indices}")

assert predicted_indices == test_indices

In [55]:
# Validation Set and Test Set
# Within Basket Recommendations
val_auc, val_ndcg, val_recall, val_precision = item_co_count_baseline.evaluate(validation_data, 
                                                                               k=10,
                                                                               within_basket=True)

test_auc, test_ndcg, test_recall, test_precision = item_co_count_baseline.evaluate(test_data, 
                                                                                   k=10,
                                                                                   within_basket=True)

206209 transactions to evaluate.
Evaluated 195288 transactions.
Took 11.835 minutes.
AUC:              0.8932
NDCG:             0.1533
Recall at 10:     0.0798
Precision at 10:  0.0332
206209 transactions to evaluate.
Evaluated 195558 transactions.
Took 11.936 minutes.
AUC:              0.8917
NDCG:             0.1527
Recall at 10:     0.0791
Precision at 10:  0.0335


In [56]:
# Next Basket Recommendations
val_auc, val_ndcg, val_recall, val_precision = item_co_count_baseline.evaluate(validation_data, 
                                                                               k=10,
                                                                               within_basket=False)

test_auc, test_ndcg, test_recall, test_precision = item_co_count_baseline.evaluate(test_data, 
                                                                                   k=10,
                                                                                   within_basket=False)

206209 transactions to evaluate.
Evaluated 195288 transactions.
Took 49.457 minutes.
AUC:              0.9575
NDCG:             0.1678
Recall at 10:     0.0896
Precision at 10:  0.0891
206209 transactions to evaluate.
Evaluated 195558 transactions.
Took 48.384 minutes.
AUC:              0.9553
NDCG:             0.1671
Recall at 10:     0.0893
Precision at 10:  0.0904


In [None]:
# Item Co-count - Within Basket Recommendations
# My Scores
# Validation
# AUC:              0.8932
# NDCG:             0.1533
# Recall at 10:     0.0798
# Precision at 10:  0.0332

# Test
# AUC:              0.8917
# NDCG:             0.1527
# Recall at 10:     0.0791
# Precision at 10:  0.0335

# Item Co-count - Next Basket Recommendations
# My Scores
# Validation
# AUC:              0.9575
# NDCG:             0.1678
# Recall at 10:     0.0896
# Precision at 10:  0.0891

# Test
# AUC:              0.9553
# NDCG:             0.1671
# Recall at 10:     0.0893
# Precision at 10:  0.0904