In [1]:
# Better GPU tracebacks
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [2]:
import faiss
import pathlib
import time
from argparse import ArgumentParser
from tqdm import tqdm

import numpy as np
import pybloomfilter as pbf
import torch as th

from pytorch_lightning import seed_everything, Trainer
from ranking_metrics_torch.precision_recall import precision_at, recall_at
from ranking_metrics_torch.cumulative_gain import ndcg_at
from torch_factorization_models.implicit_mf import ImplicitMatrixFactorization
from torch_factorization_models.movielens import MovielensDataset, MovielensDataModule

In [3]:
from practicalrecs_examples.pipeline import RecsPipeline

from practicalrecs_examples.ann_search import *
from practicalrecs_examples.dithering import *
from practicalrecs_examples.filtering import *
from practicalrecs_examples.matrix_factorization import *

In [4]:
seed_everything(42)  # same seed used to create splits in training

42

### Load dataset

In [5]:
movielens_module = MovielensDataModule(
    "/home/karl/Projects/datasets/ml-20m/",
    batch_size=128
)
movielens_module.setup()

In [6]:
movielens = movielens_module.dataset
preprocessor = movielens.preprocessor
user_xformer = preprocessor.named_transformers_['user_id']
item_xformer = preprocessor.named_transformers_['item_id']

### Load the model

In [7]:
parser = ArgumentParser(add_help=False)
parser = Trainer.add_argparse_args(parser)
parser = ImplicitMatrixFactorization.add_model_specific_args(parser)

args = parser.parse_args(args=[])
args.num_users = 138287
args.num_items = 20720
# args.use_biases = False
args.embedding_dim = 32
args.eval_cutoff = th.tensor([100])

args

Namespace(accumulate_grad_batches=1, amp_backend='native', amp_level='O2', auto_lr_find=False, auto_scale_batch_size=False, auto_select_gpus=False, benchmark=False, beta1=0.9, beta2=0.999, check_val_every_n_epoch=1, checkpoint_callback=True, default_root_dir=None, deterministic=False, distributed_backend=None, early_stop_callback=False, embedding_dim=32, eval_cutoff=tensor([100]), fast_dev_run=False, gpus=<function Trainer._gpus_arg_default at 0x7f766b94fdc0>, gradient_clip_val=0, learning_rate=0.1, limit_test_batches=1.0, limit_train_batches=1.0, limit_val_batches=1.0, log_gpu_memory=None, log_save_interval=100, logger=True, loss='logistic', max_epochs=1000, max_steps=None, min_epochs=1, min_steps=None, momentum=0.9, num_items=20720, num_nodes=1, num_processes=1, num_sanity_val_steps=2, num_users=138287, optimizer='sgd', overfit_batches=0.0, overfit_pct=None, precision=32, prepare_data_per_node=True, process_position=0, profiler=None, progress_bar_refresh_rate=1, reload_dataloaders_ev

In [8]:
model = ImplicitMatrixFactorization(args)

state_dict = th.load("../models/mf_example.pt")

# preprocessor = state_dict['preprocessor']
del state_dict['preprocessor']
state_dict['global_bias_idx'] = th.LongTensor([0])

model.load_state_dict(state_dict)

<All keys matched successfully>

In [9]:
if th.cuda.is_available():
    model.cuda()
    
movielens_module.dataset.to_(device=model.device)

In [10]:
val_dataloader = movielens_module.val_dataloader(by_user=True)

In [11]:
train_dataloader = movielens_module.train_dataloader(by_user=True)

### Model

In [12]:
model_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    model.eval_predict,
)

100%|██████████| 1081/1081 [01:29<00:00, 12.12it/s]


In [13]:
print(f"Precision: {model_metrics['precision']:.4f}")
print(f"Recall: {model_metrics['recall']:.4f}")
print(f"NDCG: {model_metrics['ndcg']:.4f}")

Precision: 0.0163
Recall: 0.5125
NDCG: 0.2070


### ANN Search Indexing

In [14]:
import faiss

dim = model.hparams.embedding_dim
dim

32

In [15]:
item_vectors = np.array(model.item_embeddings.weight.cpu().data)

In [16]:
res = faiss.StandardGpuResources()

flat_config = faiss.GpuIndexFlatConfig()
flat_config.device = 0

# Create an index and add item vectors
start = time.perf_counter()

exact_index = faiss.GpuIndexFlatIP(res, dim, flat_config)  
exact_index.add(item_vectors)

end = time.perf_counter()

elapsed = (end - start) * 1000

print(f"Indexed: {exact_index.ntotal} items")
print(f"Elapsed: {elapsed:.4f} ms")

Indexed: 20720 items
Elapsed: 188.8579 ms


In [17]:
# Create an index for approximate search with product quantization

start = time.perf_counter()
res = faiss.StandardGpuResources()

approx_index = faiss.index_factory(dim, "IVF1024,PQ32", faiss.METRIC_INNER_PRODUCT)
cloner_options = faiss.GpuClonerOptions()

approx_index = faiss.index_cpu_to_gpu(res, 0, approx_index, cloner_options)

approx_index.train(item_vectors)
approx_index.add(item_vectors)

approx_index.nprobe = 30

end = time.perf_counter()

elapsed = (end - start) * 1000

print(f"Indexed: {approx_index.ntotal} items")
print(f"Elapsed: {elapsed:.4f} ms")

Indexed: 20720 items
Elapsed: 24339.9576 ms


### Bloom filters

In [18]:
train_dataloader.dataset.num_users

138287

In [19]:
bloom_filters = {}

for user_id in tqdm(range(train_dataloader.dataset.num_users)):
    interactions = train_dataloader.dataset[user_id]["interactions"].coalesce()    
    item_ids = interactions.indices()[1]
    
    bloom = pbf.BloomFilter(10, 0.1)
    bloom.update(item_ids)
        
    bloom_filters[user_id] = bloom

100%|██████████| 138287/138287 [00:54<00:00, 2532.60it/s]


### Pipeline

In [20]:
def pipeline_fn(pipeline):
    def pipeline_predict(user_ids, num_items):
        user_scores = []
        for user_id in user_ids:
            user_id = int(user_id.cpu().item())

            interactions = train_dataloader.dataset[user_id]["interactions"].coalesce()
            item_ids = interactions.indices()[1]

            scores = pipeline.recommend(user_id, item_ids)

            user_scores.append(scores)

        return th.stack(user_scores)
    
    return pipeline_predict

In [21]:
num_candidates = 250
num_recs = 100

In [22]:
stages = [
    # Retrieval
    UserAvgEmbeddingFetcher(model),
    ANNSearch(approx_index, args.num_items, num_candidates),
    # Filtering
    BloomFilter(bloom_filters),
    CandidatePadding(args.num_items, num_candidates),
    # Scoring
    MatrixFactorizationScoring(model),
    # Ordering
    DitheredOrdering(num_recs, epsilon=1.5),
]

full_pipeline = RecsPipeline(*stages)

In [23]:
pipeline_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(full_pipeline)
)

print(f"\nPrecision: {pipeline_metrics['precision']:.4f}")
print(f"Recall: {pipeline_metrics['recall']:.4f}")
print(f"NDCG: {pipeline_metrics['ndcg']:.4f}")

100%|██████████| 1081/1081 [04:34<00:00,  3.93it/s]


Precision: 0.0077
Recall: 0.3055
NDCG: 0.1792





### Ideal Retrieval

In [24]:
# Learned user embedding, exact NN search, idealized results

ideal_retrieval_stages = [
    # Retrieval
    UserEmbeddingFetcher(model),
    IdealizedANNSearch(val_dataloader.dataset, exact_index, args.num_items, num_candidates),
    # Filtering
    BloomFilter(bloom_filters),
    CandidatePadding(args.num_items, num_candidates),
    # Scoring
    MatrixFactorizationScoring(model),
    # Ordering
    DitheredOrdering(num_recs, epsilon=1.5),
]

ideal_retrieval = RecsPipeline(*ideal_retrieval_stages)

In [25]:
ideal_retrieval_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(ideal_retrieval)
)

print(f"\nPrecision: {ideal_retrieval_metrics['precision']:.4f}")
print(f"Recall: {ideal_retrieval_metrics['recall']:.4f}")
print(f"NDCG: {ideal_retrieval_metrics['ndcg']:.4f}")

100%|██████████| 1081/1081 [05:13<00:00,  3.45it/s]


Precision: 0.0109
Recall: 0.4227
NDCG: 0.1997





In [26]:
# TODO: Explore combinations of exact search, learned user vs item embeddings, etc

In [27]:
# Learned user embedding, exact NN search, no idealization

learned_exact_nonideal_stages = [
    # Retrieval
    UserEmbeddingFetcher(model),
    ANNSearch(exact_index, args.num_items, num_candidates),
    # Filtering
    BloomFilter(bloom_filters),
    CandidatePadding(args.num_items, num_candidates),
    # Scoring
    MatrixFactorizationScoring(model),
    # Ordering
    DitheredOrdering(num_recs, epsilon=1.5),
]

learned_exact_nonideal_retrieval = RecsPipeline(*learned_exact_nonideal_stages)

In [28]:
learned_exact_nonideal_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(learned_exact_nonideal_retrieval)
)

print(f"\nPrecision: {learned_exact_nonideal_metrics['precision']:.4f}")
print(f"Recall: {learned_exact_nonideal_metrics['recall']:.4f}")
print(f"NDCG: {learned_exact_nonideal_metrics['ndcg']:.4f}")

100%|██████████| 1081/1081 [04:40<00:00,  3.85it/s]


Precision: 0.0103
Recall: 0.4143
NDCG: 0.2023





In [29]:
# Learned user embedding, approx NN search, no idealization

learned_approx_nonideal_stages = [
    # Retrieval
    UserEmbeddingFetcher(model),
    ANNSearch(approx_index, args.num_items, num_candidates),
    # Filtering
    BloomFilter(bloom_filters),
    CandidatePadding(args.num_items, num_candidates),
    # Scoring
    MatrixFactorizationScoring(model),
    # Ordering
    DitheredOrdering(num_recs, epsilon=1.5),
]

learned_approx_nonideal_retrieval = RecsPipeline(*learned_approx_nonideal_stages)

In [30]:
learned_approx_nonideal_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(learned_approx_nonideal_retrieval)
)

print(f"\nPrecision: {learned_approx_nonideal_metrics['precision']:.4f}")
print(f"Recall: {learned_approx_nonideal_metrics['recall']:.4f}")
print(f"NDCG: {learned_approx_nonideal_metrics['ndcg']:.4f}")

100%|██████████| 1081/1081 [04:27<00:00,  4.04it/s]


Precision: 0.0103
Recall: 0.4124
NDCG: 0.2034





In [31]:
# Item embeddings, exact NN search, idealized results

items_exact_ideal_stages = [
    # Retrieval
    ItemEmbeddingsFetcher(model),
    IdealizedANNSearch(val_dataloader.dataset, exact_index, args.num_items, num_candidates),
    # Filtering
    BloomFilter(bloom_filters),
    CandidatePadding(args.num_items, num_candidates),
    # Scoring
    MatrixFactorizationScoring(model),
    # Ordering
    DitheredOrdering(num_recs, epsilon=1.5),
]

items_exact_ideal_retrieval = RecsPipeline(*items_exact_ideal_stages)

In [32]:
items_exact_ideal_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(items_exact_ideal_retrieval)
)

print(f"\nPrecision: {items_exact_ideal_metrics['precision']:.4f}")
print(f"Recall: {items_exact_ideal_metrics['recall']:.4f}")
print(f"NDCG: {items_exact_ideal_metrics['ndcg']:.4f}")

  0%|          | 0/1081 [00:00<?, ?it/s]


ValueError: not enough values to unpack (expected 2, got 1)

In [None]:
# TODO: Item embeddings, exact NN search, idealized results
# TODO: Item embeddings, exact NN search, no idealization
# TODO: Item embeddings, approx NN search, no idealization

In [None]:
# Parts to idealize:
# - Ideal embeddings? -> always return correct items
# - Ideal search -> always find closest embeddings
# - Ideal user representations -> always match learned user representations
# - Ideal user representations, v2 -> always search near interacted items

In [None]:
# Just for fun: ANN search with averaged user embedding AND item embeddings

### Ideal Filtering

In [None]:
# TODO: Make an idealized filtering stage that filters out exactly the right items

In [None]:
ideal_filtering_stages = [
    # Retrieval
    UserAvgEmbeddingFetcher(model),
    ANNSearch(approx_index, args.num_items, num_candidates),
    # Filtering
#     BloomFilter(bloom_filters),
    CandidatePadding(args.num_items, num_candidates),
    # Scoring
    MatrixFactorizationScoring(model),
    # Ordering
    DitheredOrdering(num_recs, epsilon=1.5),
]

ideal_filtering = RecsPipeline(*ideal_filtering_stages)

In [None]:
ideal_filtering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(ideal_filtering)
)

print(f"\nPrecision: {ideal_filtering_metrics['precision']:.4f}")
print(f"Recall: {ideal_filtering_metrics['recall']:.4f}")
print(f"NDCG: {ideal_filtering_metrics['ndcg']:.4f}")

### Ideal Scoring

In [None]:
def ideal_scoring(user_ids, num_items):
    k = 250
    dithering_eps = 1.5
    
    user_scores = []
    for user_id in user_ids:
        interactions = train_dataloader.dataset[user_id]["interactions"].coalesce()
        item_ids = interactions.indices()[1]
        
        item_embeddings, user_embedding, user_avg_embedding = \
                fetch_embeddings(model, user_id, item_ids)
        
        candidates = user_embedding_candidates(approx_index, user_avg_embedding, k, args.num_items)
        filtered = filter_candidates(user_id.cpu().item(), candidates)
        raw_scores = score_candidates(model, user_avg_embedding, filtered, k, args.num_items)
        
        # Move interacted items from validation set to the top
        val_interactions = val_dataloader.dataset[user_id]["interactions"].coalesce()
        val_item_ids = val_interactions.indices()[1]
        boosted_scores = raw_scores.clone().detach()
        boosted_scores[val_item_ids] += 10.0
        
        scores = dither_scores(boosted_scores, k, dithering_eps)
        
        user_scores.append(scores)
        
    return th.stack(user_scores)

In [None]:
ideal_scoring_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    ideal_scoring
)

print(f"\nPrecision: {ideal_scoring_metrics['precision']:.4f}")
print(f"Recall: {ideal_scoring_metrics['recall']:.4f}")
print(f"NDCG: {ideal_scoring_metrics['ndcg']:.4f}")

### Ideal Ordering

In [None]:
def ideal_ordering(user_ids, num_items):
    k = 250
    dithering_eps = 1.5
    
    user_scores = []
    for user_id in user_ids:
        interactions = train_dataloader.dataset[user_id]["interactions"].coalesce()
        item_ids = interactions.indices()[1]
        
        item_embeddings, user_embedding, user_avg_embedding = \
                fetch_embeddings(model, user_id, item_ids)
        
        candidates = user_embedding_candidates(approx_index, user_avg_embedding, k, args.num_items)
        filtered = filter_candidates(user_id.cpu().item(), candidates)
        raw_scores = score_candidates(model, user_avg_embedding, filtered, k, args.num_items)
#         scores = dither_scores(raw_scores, k, dithering_eps)
        
        user_scores.append(raw_scores)
        
    return th.stack(user_scores)

In [None]:
ideal_ordering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    ideal_ordering
)

print(f"\nPrecision: {ideal_ordering_metrics['precision']:.4f}")
print(f"Recall: {ideal_ordering_metrics['recall']:.4f}")
print(f"NDCG: {ideal_ordering_metrics['ndcg']:.4f}")

### Improved Retrieval

In [None]:
def item_embedding_candidates(index, item_embeddings, k, num_items):
    if len(item_embeddings) > 0:
        neighbors_per = max(1, k // max(1,len(item_embeddings)))
        neighbor_scores, neighbor_indices = index.search(np.array(item_embeddings.cpu()), neighbors_per)
    else:
        neighbor_indices = th.randint(num_items, (k,))
        
    return th.tensor(neighbor_indices).flatten().unique()

In [None]:
def item_based_retrieval(user_ids, num_items):
    k = 250
    dithering_eps = 1.5
    
    user_scores = []
    for user_id in user_ids:  
        interactions = train_dataloader.dataset[user_id]["interactions"].coalesce()
        item_ids = interactions.indices()[1]
        
        item_embeddings, user_embedding, user_avg_embedding = \
                fetch_embeddings(model, user_id, item_ids)
        
        candidates = item_embedding_candidates(approx_index, item_embeddings, k, args.num_items)
        filtered = filter_candidates(user_id.cpu().item(), candidates)
        raw_scores = score_candidates(model, user_avg_embedding, filtered, k, args.num_items)
        scores = dither_scores(raw_scores, k, dithering_eps)
        
        user_scores.append(scores)
        
    return th.stack(user_scores)

In [None]:
item_based_retrieval_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    item_based_retrieval
)

print(f"\nPrecision: {item_based_retrieval_metrics['precision']:.4f}")
print(f"Recall: {item_based_retrieval_metrics['recall']:.4f}")
print(f"NDCG: {item_based_retrieval_metrics['ndcg']:.4f}")

In [None]:
def item_based_retrieval_ideal_filtering(user_ids, num_items):
    k = 250
    dithering_eps = 1.5
    
    user_scores = []
    for user_id in user_ids:   
        interactions = train_dataloader.dataset[user_id]["interactions"].coalesce()
        item_ids = interactions.indices()[1]
        
        item_embeddings, user_embedding, user_avg_embedding = \
                fetch_embeddings(model, user_id, item_ids)
        
        candidates = item_embedding_candidates(approx_index, item_embeddings, k, args.num_items)
#         filtered = filter_candidates(user_id.cpu().item(), candidates)
        raw_scores = score_candidates(model, user_avg_embedding, candidates, k, args.num_items)
        scores = dither_scores(raw_scores, k, dithering_eps)
        
        user_scores.append(scores)
        
    return th.stack(user_scores)

In [None]:
item_based_retrieval_ideal_filtering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    item_based_retrieval_ideal_filtering
)

print(f"\nPrecision: {item_based_retrieval_ideal_filtering_metrics['precision']:.4f}")
print(f"Recall: {item_based_retrieval_ideal_filtering_metrics['recall']:.4f}")
print(f"NDCG: {item_based_retrieval_ideal_filtering_metrics['ndcg']:.4f}")

In [None]:
def item_based_retrieval_ideal_scoring(user_ids, num_items):
    k = 250
    dithering_eps = 1.5
    
    user_scores = []
    for user_id in user_ids:
        interactions = train_dataloader.dataset[user_id]["interactions"].coalesce()
        item_ids = interactions.indices()[1]
        
        item_embeddings, user_embedding, user_avg_embedding = \
                fetch_embeddings(model, user_id, item_ids)
        
        candidates = item_embedding_candidates(approx_index, item_embeddings, k, args.num_items)
        filtered = filter_candidates(user_id.cpu().item(), candidates)
        raw_scores = score_candidates(model, user_avg_embedding, filtered, k, args.num_items)
        scores = dither_scores(raw_scores, k, dithering_eps)
        
        # Move interacted items from validation set to the top
        val_interactions = val_dataloader.dataset[user_id]["interactions"].coalesce()
        val_item_ids = val_interactions.indices()[1]
        boosted_scores = raw_scores.clone().detach()
        boosted_scores[val_item_ids] += 10.0
        
        user_scores.append(scores)
        
    return th.stack(user_scores)

In [None]:
item_based_retrieval_ideal_scoring_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    item_based_retrieval_ideal_scoring
)

print(f"\nPrecision: {item_based_retrieval_ideal_scoring_metrics['precision']:.4f}")
print(f"Recall: {item_based_retrieval_ideal_scoring_metrics['recall']:.4f}")
print(f"NDCG: {item_based_retrieval_ideal_scoring_metrics['ndcg']:.4f}")

In [None]:
def item_based_retrieval_ideal_ordering(user_ids, num_items):
    k = 250
    dithering_eps = 1.5
    
    user_scores = []
    for user_id in user_ids:
        interactions = train_dataloader.dataset[user_id]["interactions"].coalesce()
        item_ids = interactions.indices()[1]
        
        item_embeddings, user_embedding, user_avg_embedding = \
                fetch_embeddings(model, user_id, item_ids)
        
        candidates = item_embedding_candidates(approx_index, item_embeddings, k, args.num_items)
        filtered = filter_candidates(user_id.cpu().item(), candidates)
        raw_scores = score_candidates(model, user_avg_embedding, filtered, k, args.num_items)
#         scores = dither_scores(raw_scores, k, dithering_eps)
        
        user_scores.append(raw_scores)
        
    return th.stack(user_scores)

In [None]:
item_based_retrieval_ideal_ordering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    item_based_retrieval_ideal_ordering
)

print(f"\nPrecision: {item_based_retrieval_ideal_ordering_metrics['precision']:.4f}")
print(f"Recall: {item_based_retrieval_ideal_ordering_metrics['recall']:.4f}")
print(f"NDCG: {item_based_retrieval_ideal_ordering_metrics['ndcg']:.4f}")

### Improved Filtering

In [None]:
# TODO: Try out better Bloom filter parameters

### Improved Scoring

In [None]:
# TODO: Try out using the learned user embeddings for scoring

### Improved Ordering

In [None]:
# TODO: Try reducing the dithering eps