In [9]:
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [10]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
import pathlib
import time

import faiss
import numpy as np
import pybloomfilter as pbf
import torch as th
from practicalrecs_examples.ann_search import *
from practicalrecs_examples.dithering import *
from practicalrecs_examples.filtering import *
from practicalrecs_examples.matrix_factorization import *
from practicalrecs_examples.notebooks.utils import *
from practicalrecs_examples.pipeline import *
from pytorch_lightning import seed_everything
from ranking_metrics_torch.cumulative_gain import ndcg_at
from ranking_metrics_torch.precision_recall import precision_at, recall_at
from torch_factorization_models.movielens import MovielensDataModule
from tqdm import tqdm

In [12]:
# same seed used to create splits in training
seed_everything(42)  

42

### Load the model

In [13]:
model = load_model("../models/mf_example.pt")

if th.cuda.is_available():
    model.cuda()

### Load the dataset

In [14]:
movielens_module = MovielensDataModule("../datasets/ml-20m/", batch_size=128)
movielens_module.setup()

movielens_module.dataset.to_(device=model.device)

In [15]:
val_dataloader = movielens_module.val_dataloader(by_user=True)

In [16]:
train_dataloader = movielens_module.train_dataloader(by_user=True)

### Build Nearest Neighbor Search Indices

In [17]:
dim = model.hparams.embedding_dim
item_vectors = np.array(model.item_embeddings.weight.cpu().data)

In [18]:
exact_index = build_nn_search_index(item_vectors, dim, "Flat")

In [19]:
approx_index = build_nn_search_index(item_vectors, dim, "IVF1024,PQ32", nprobe=30)

### Build Bloom Filters

In [20]:
bloom_filters = build_bloom_filters(
    tqdm(train_dataloader.dataset),
    expected_items=10,
    fp_rate=0.1
)

100%|██████████| 138287/138287 [00:40<00:00, 3405.51it/s]


### Compute Raw Model-Only Metrics

In [21]:
# TODO: Pass in the number of recs to evaluate here?
model_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    model.eval_predict,
)

100%|██████████| 1081/1081 [01:23<00:00, 12.87it/s]


In [22]:
print_metrics(model_metrics)

Precision: 0.0152
Recall: 0.4629
NDCG: 0.2005


### Evaluate The Initial Pipeline

In [23]:
num_users = model.hparams.num_users
num_items = model.hparams.num_items
num_candidates = 250
num_recs = 100

In [24]:
# TODO: Move this to a method param?
model.eval_cutoff = num_recs

In [25]:
# TODO: Move this into the library?
def pipeline_fn(pipeline):
    def pipeline_predict(user_ids, num_items):
        user_scores = []
        for user_id in user_ids:
            user_id = int(user_id.cpu().item())

            interactions = train_dataloader.dataset[user_id]["interactions"].coalesce()
            item_ids = interactions.indices()[1]

            scores = pipeline.recommend(user_id, item_ids)

            user_scores.append(scores)

        return th.stack(user_scores)
    
    return pipeline_predict

In [26]:
dithering_eps=5.0

default_stages = RecsPipelineStages(
    retrieval = [
        UserAvgEmbeddingFetcher(model),
        ANNSearch(approx_index, num_items, num_candidates),
    ],
    filtering = [
        BloomFilter(bloom_filters),
        CandidatePadding(num_items, num_candidates),
    ],
    scoring = [
        MatrixFactorizationScoring(model),
    ],
    ordering = [
        DitheredOrdering(num_recs, epsilon=dithering_eps),
    ]
)

base_pipeline_builder = RecsPipelineBuilder(defaults=default_stages)

base_pipeline = base_pipeline_builder.build()

In [27]:
pipeline_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(base_pipeline)
)

100%|██████████| 1081/1081 [04:26<00:00,  4.05it/s]


In [28]:
print_times(base_pipeline, num_users)

ANNSearch: 47.94s (0.35 ms/user)
MatrixFactorizationScoring: 45.03s (0.33 ms/user)
DitheredOrdering: 29.58s (0.21 ms/user)
BloomFilter: 11.90s (0.09 ms/user)
UserAvgEmbeddingFetcher: 6.56s (0.05 ms/user)
CandidatePadding: 6.53s (0.05 ms/user)


In [29]:
print_metrics(pipeline_metrics)

Precision: 0.0058
Recall: 0.2163
NDCG: 0.1581


### Evaluate Idealized Retrieval

#### Learned user embedding, exact NN search, idealized results

In [42]:
ideal_retrieval_stages = RecsPipelineStages(
    retrieval = [
        UserEmbeddingFetcher(model),
        IdealizedANNSearch(val_dataloader.dataset, exact_index, num_items, num_candidates),
    ]
)

ideal_retrieval = base_pipeline_builder.build(overrides=ideal_retrieval_stages)

In [43]:
ideal_retrieval_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(ideal_retrieval)
)

100%|██████████| 1081/1081 [05:30<00:00,  3.27it/s]


In [44]:
print_metrics(ideal_retrieval_metrics)

Precision: 0.0115
Recall: 0.4344
NDCG: 0.2108


### Evaluate Idealized Filtering

In [45]:
ideal_filtering_stages = RecsPipelineStages(
    filtering = [
        IdealizedFilter(train_dataloader.dataset),
        CandidatePadding(num_items, num_candidates),
    ]
)

ideal_filtering = base_pipeline_builder.build(overrides=ideal_filtering_stages)

In [46]:
ideal_filtering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(ideal_filtering)
)

  candidates = th.tensor(user_recs.candidates, dtype=th.long)
100%|██████████| 1081/1081 [05:09<00:00,  3.50it/s]


In [47]:
print_metrics(ideal_filtering_metrics)

Precision: 0.0105
Recall: 0.2857
NDCG: 0.1616


#### Omit filtering entirely (for comparison)

In [48]:
no_filtering_stages = RecsPipelineStages(
    filtering = [
        CandidatePadding(num_items, num_candidates),
    ]
)

no_filtering = base_pipeline_builder.build(overrides=no_filtering_stages)

In [49]:
no_filtering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(no_filtering)
)

100%|██████████| 1081/1081 [04:15<00:00,  4.23it/s]


In [50]:
print_metrics(no_filtering_metrics)

Precision: 0.0089
Recall: 0.2587
NDCG: 0.1488


### Evaluate Idealized Scoring

In [51]:
ideal_scoring_stages = RecsPipelineStages(
    scoring = [
        IdealizedMatrixFactorizationScoring(model, val_dataloader.dataset),
    ]
)

ideal_scoring = base_pipeline_builder.build(overrides=ideal_scoring_stages)

In [52]:
ideal_scoring_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(ideal_scoring)
)

100%|██████████| 1081/1081 [05:05<00:00,  3.54it/s]


In [53]:
print_metrics(ideal_scoring_metrics)

Precision: 0.0072
Recall: 0.2816
NDCG: 0.4054


### Evaluate Idealized Ordering

#### Order descending by score (omitting dithering)

In [54]:
ideal_ordering_stages = RecsPipelineStages(ordering = [])

ideal_ordering = base_pipeline_builder.build(overrides=ideal_ordering_stages)

In [55]:
ideal_ordering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(ideal_ordering)
)

100%|██████████| 1081/1081 [04:02<00:00,  4.45it/s]


In [56]:
print_metrics(ideal_ordering_metrics)

Precision: 0.0058
Recall: 0.2163
NDCG: 0.1679


### Evaluate Idealized Everything

In [None]:
ideal_everything_stages = RecsPipelineStages(
    retrieval = [
        UserEmbeddingFetcher(model),
        IdealizedANNSearch(val_dataloader.dataset, exact_index, num_items, num_candidates),
    ],
    filtering = [
        IdealizedFilter(train_dataloader.dataset),
        CandidatePadding(num_items, num_candidates),
    ],
    scoring = [
        IdealizedMatrixFactorizationScoring(model, val_dataloader.dataset),
    ],
    ordering = []
)

ideal_everything = base_pipeline_builder.build(overrides=ideal_everything_stages)

In [None]:
ideal_everything_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(ideal_everything)
)

In [None]:
print_metrics(ideal_everything_metrics)

### Evaluate Improved Filtering

In [30]:
improved_bloom_filters = build_bloom_filters(
    tqdm(train_dataloader.dataset),
    expected_items=100,
    fp_rate=0.01
)

100%|██████████| 138287/138287 [00:46<00:00, 2960.95it/s]


In [31]:
improved_filtering_stages = RecsPipelineStages(
    filtering = [
        BloomFilter(improved_bloom_filters),
        CandidatePadding(num_items, num_candidates),
    ]
)

improved_filtering = base_pipeline_builder.build(overrides=improved_filtering_stages)

In [32]:
improved_filtering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(improved_filtering)
)

100%|██████████| 1081/1081 [05:11<00:00,  3.47it/s]


In [33]:
print_metrics(improved_filtering_metrics)

Precision: 0.0102
Recall: 0.2836
NDCG: 0.1611


#### New Baseline For Further Improvements

In [34]:
default_stages = RecsPipelineStages(
    retrieval = [
        UserAvgEmbeddingFetcher(model),
        ANNSearch(approx_index, num_items, num_candidates),
    ],
    filtering = [
        BloomFilter(improved_bloom_filters),
        CandidatePadding(num_items, num_candidates),
    ],
    scoring = [
        MatrixFactorizationScoring(model),
    ],
    ordering = [
        DitheredOrdering(num_recs, epsilon=dithering_eps),
    ]
)

improved_filtering_pipeline_builder = RecsPipelineBuilder(defaults=default_stages)

In [57]:
ideal_retrieval = improved_filtering_pipeline_builder.build(overrides=ideal_retrieval_stages)

In [58]:
ideal_retrieval_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(ideal_retrieval)
)

100%|██████████| 1081/1081 [05:13<00:00,  3.44it/s]


In [59]:
print_metrics(ideal_retrieval_metrics)

Precision: 0.0207
Recall: 0.5657
NDCG: 0.2359


In [60]:
ideal_scoring = improved_filtering_pipeline_builder.build(overrides=ideal_scoring_stages)

In [61]:
ideal_scoring_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(ideal_scoring)
)

100%|██████████| 1081/1081 [05:02<00:00,  3.57it/s]


In [62]:
print_metrics(ideal_scoring_metrics)

Precision: 0.0149
Recall: 0.4165
NDCG: 0.4253


In [63]:
ideal_ordering = improved_filtering_pipeline_builder.build(overrides=ideal_ordering_stages)

In [64]:
ideal_ordering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(ideal_ordering)
)

100%|██████████| 1081/1081 [04:03<00:00,  4.44it/s]


In [65]:
print_metrics(ideal_ordering_metrics)

Precision: 0.0102
Recall: 0.2836
NDCG: 0.1680


### Improved Retrieval

#### Learned user embedding, exact NN search, no idealization

In [None]:
learned_exact_nonideal_stages = RecsPipelineStages(
    retrieval = [
        UserEmbeddingFetcher(model),
        ANNSearch(exact_index, num_items, num_candidates),
        UserAvgEmbeddingFetcher(model)
    ]
)

learned_exact_nonideal_retrieval = improved_filtering_pipeline_builder.build(overrides=learned_exact_nonideal_stages)

In [None]:
learned_exact_nonideal_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(learned_exact_nonideal_retrieval)
)

In [None]:
print_metrics(learned_exact_nonideal_metrics)

#### Learned user embedding, approx NN search, no idealization

In [None]:
learned_approx_nonideal_stages = RecsPipelineStages(
    retrieval = [
        UserEmbeddingFetcher(model),
        ANNSearch(approx_index, num_items, num_candidates),
        UserAvgEmbeddingFetcher(model)
    ]
)

learned_approx_nonideal_retrieval = improved_filtering_pipeline_builder.build(overrides=learned_approx_nonideal_stages)

In [None]:
learned_approx_nonideal_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(learned_approx_nonideal_retrieval)
)

In [None]:
print_metrics(learned_approx_nonideal_metrics)

#### Averaged user embedding, exact NN search, no idealization

In [None]:
avg_exact_nonideal_stages = RecsPipelineStages(
    retrieval = [
        UserAvgEmbeddingFetcher(model),
        ANNSearch(exact_index, num_items, num_candidates),
    ]
)

avg_exact_nonideal_retrieval = improved_filtering_pipeline_builder.build(overrides=avg_exact_nonideal_stages)

In [None]:
avg_exact_nonideal_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(avg_exact_nonideal_retrieval)
)

In [None]:
print_metrics(avg_exact_nonideal_metrics)

#### Averaged user embedding, approx NN search, no idealization

In [None]:
avg_approx_nonideal_stages = RecsPipelineStages(
    retrieval = [
        UserAvgEmbeddingFetcher(model),
        ANNSearch(approx_index, num_items, num_candidates),
    ]
)

avg_approx_nonideal_retrieval = improved_filtering_pipeline_builder.build(overrides=avg_approx_nonideal_stages)

In [None]:
avg_approx_nonideal_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(avg_approx_nonideal_retrieval)
)

In [None]:
print_metrics(avg_approx_nonideal_metrics)

#### Item embeddings, exact NN search, idealized results

In [None]:
items_exact_ideal_stages = RecsPipelineStages(
    retrieval = [
        ItemEmbeddingsFetcher(model),
        UseItemEmbeddingsAsUserEmbeddings(),
        IdealizedANNSearch(val_dataloader.dataset, exact_index, num_items, num_candidates),
        # Re-fetching user average embeddings keeps modified retrieval from affecting scoring
        UserAvgEmbeddingFetcher(model),
    ]
)

items_exact_ideal_retrieval = improved_filtering_pipeline_builder.build(overrides=items_exact_ideal_stages)

In [None]:
items_exact_ideal_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(items_exact_ideal_retrieval)
)

In [None]:
print_metrics(items_exact_ideal_metrics)

####  Item embeddings, exact NN search, no idealization

In [None]:
items_exact_nonideal_stages = RecsPipelineStages(
    retrieval = [
        ItemEmbeddingsFetcher(model),
        UseItemEmbeddingsAsUserEmbeddings(),
        ANNSearch(exact_index, num_items, num_candidates),
        # Re-fetching user average embeddings keeps modified retrieval from affecting scoring
        UserAvgEmbeddingFetcher(model),
    ]
)

items_exact_nonideal_retrieval = improved_filtering_pipeline_builder.build(overrides=items_exact_nonideal_stages)

In [None]:
items_exact_nonideal_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(items_exact_nonideal_retrieval)
)

In [None]:
print_metrics(items_exact_nonideal_metrics)

#### Item embeddings, approx NN search, no idealization

In [66]:
items_approx_nonideal_stages = RecsPipelineStages(
    retrieval = [
        ItemEmbeddingsFetcher(model),
        UseItemEmbeddingsAsUserEmbeddings(),
        ANNSearch(approx_index, num_items, num_candidates),
        # Re-fetching user average embeddings keeps modified retrieval from affecting scoring
        UserAvgEmbeddingFetcher(model),
    ]
)

items_approx_nonideal_retrieval = improved_filtering_pipeline_builder.build(overrides=items_approx_nonideal_stages)

In [67]:
items_approx_nonideal_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(items_approx_nonideal_retrieval)
)

100%|██████████| 1081/1081 [05:09<00:00,  3.49it/s]


In [68]:
print_metrics(items_approx_nonideal_metrics)

Precision: 0.0081
Recall: 0.2979
NDCG: 0.1870


#### Item + averaged user embeddings, approx NN search, no idealization

In [None]:
both_approx_nonideal_stages = RecsPipelineStages(
    retrieval = [
        UserAvgEmbeddingFetcher(model),
        ItemEmbeddingsFetcher(model),
        UseItemEmbeddingsAsUserEmbeddings(append=True),
        ANNSearch(approx_index, num_items, num_candidates),
        # Re-fetching user average embeddings keeps modified retrieval from affecting scoring
        UserAvgEmbeddingFetcher(model),
    ]
)

both_approx_nonideal_retrieval = improved_filtering_pipeline_builder.build(overrides=both_approx_nonideal_stages)

In [None]:
both_approx_nonideal_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(both_approx_nonideal_retrieval)
)

In [None]:
print_metrics(both_approx_nonideal_metrics)

#### New baseline for further improvements

In [69]:
default_stages = RecsPipelineStages(
    retrieval = [
        ItemEmbeddingsFetcher(model),
        UseItemEmbeddingsAsUserEmbeddings(),
        ANNSearch(approx_index, num_items, num_candidates),
        # Re-fetching user average embeddings keeps modified retrieval from affecting scoring
        UserAvgEmbeddingFetcher(model),
    ],
    filtering = [
        BloomFilter(improved_bloom_filters),
        CandidatePadding(num_items, num_candidates),
    ],
    scoring = [
        MatrixFactorizationScoring(model),
    ],
    ordering = [
        DitheredOrdering(num_recs, epsilon=dithering_eps),
    ]
)

improved_retrieval_pipeline_builder = RecsPipelineBuilder(defaults=default_stages)

In [70]:
ideal_scoring = improved_retrieval_pipeline_builder.build(overrides=ideal_scoring_stages)

In [71]:
ideal_scoring_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(ideal_scoring)
)

100%|██████████| 1081/1081 [05:44<00:00,  3.14it/s]


In [72]:
print_metrics(ideal_scoring_metrics)

Precision: 0.0090
Recall: 0.3321
NDCG: 0.4041


In [73]:
ideal_ordering = improved_retrieval_pipeline_builder.build(overrides=ideal_ordering_stages)

In [74]:
ideal_ordering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(ideal_ordering)
)

100%|██████████| 1081/1081 [04:55<00:00,  3.66it/s]


In [75]:
print_metrics(ideal_ordering_metrics)

Precision: 0.0081
Recall: 0.2979
NDCG: 0.2039


### Improved Ordering

#### Reduced dithering

In [88]:
reduced_dithering_stages = RecsPipelineStages(
    ordering = [
        DitheredOrdering(num_recs, epsilon=5.0),
    ]
)

reduced_dithering = improved_retrieval_pipeline_builder.build(overrides=reduced_dithering_stages)

In [89]:
reduced_dithering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(reduced_dithering)
)

100%|██████████| 1081/1081 [05:06<00:00,  3.53it/s]


In [90]:
print_metrics(reduced_dithering_metrics)

Precision: 0.0081
Recall: 0.2978
NDCG: 0.1858


In [76]:
reduced_dithering_stages = RecsPipelineStages(
    ordering = [
        DitheredOrdering(num_recs, epsilon=3.0),
    ]
)

reduced_dithering = improved_retrieval_pipeline_builder.build(overrides=reduced_dithering_stages)

In [77]:
reduced_dithering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(reduced_dithering)
)

100%|██████████| 1081/1081 [05:12<00:00,  3.46it/s]


In [78]:
print_metrics(reduced_dithering_metrics)

Precision: 0.0081
Recall: 0.2979
NDCG: 0.1903


In [79]:
reduced_dithering_stages = RecsPipelineStages(
    ordering = [
        DitheredOrdering(num_recs, epsilon=2.5),
    ]
)

reduced_dithering = improved_retrieval_pipeline_builder.build(overrides=reduced_dithering_stages)

In [80]:
reduced_dithering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(reduced_dithering)
)

100%|██████████| 1081/1081 [05:07<00:00,  3.51it/s]


In [81]:
print_metrics(reduced_dithering_metrics)

Precision: 0.0081
Recall: 0.2979
NDCG: 0.1919


In [82]:
reduced_dithering_stages = RecsPipelineStages(
    ordering = [
        DitheredOrdering(num_recs, epsilon=2.0),
    ]
)

reduced_dithering = improved_retrieval_pipeline_builder.build(overrides=reduced_dithering_stages)

In [83]:
reduced_dithering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(reduced_dithering)
)

100%|██████████| 1081/1081 [05:08<00:00,  3.51it/s]


In [84]:
print_metrics(reduced_dithering_metrics)

Precision: 0.0081
Recall: 0.2979
NDCG: 0.1943


In [85]:
reduced_dithering_stages = RecsPipelineStages(
    ordering = [
        DitheredOrdering(num_recs, epsilon=1.5),
    ]
)

reduced_dithering = improved_retrieval_pipeline_builder.build(overrides=reduced_dithering_stages)

In [86]:
reduced_dithering_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(reduced_dithering)
)

100%|██████████| 1081/1081 [05:14<00:00,  3.43it/s]


In [87]:
print_metrics(reduced_dithering_metrics)

Precision: 0.0081
Recall: 0.2978
NDCG: 0.1981


### Improved Scoring

#### Scoring with learned user embeddings (i.e. using the actual model)

In [None]:
# TODO: Use improved retrieval

In [35]:
learned_scoring_stages = RecsPipelineStages(
    scoring = [
        UserEmbeddingFetcher(model),
        MatrixFactorizationScoring(model),
    ]
)

learned_scoring = improved_filtering_pipeline_builder.build(overrides=learned_scoring_stages)

In [36]:
learned_scoring_metrics = model.compute_validation_metrics(
    tqdm(val_dataloader),
    pipeline_fn(learned_scoring)
)

100%|██████████| 1081/1081 [04:53<00:00,  3.68it/s]


In [37]:
print_metrics(learned_scoring_metrics)

Precision: 0.0128
Recall: 0.3535
NDCG: 0.1951
