In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pathlib
import time
from collections import defaultdict

import faiss
import matplotlib.pyplot as plt
import numpy as np
import pybloomfilter as pbf
import torch as th
from pytorch_lightning import seed_everything
from ranking_metrics_torch.cumulative_gain import ndcg_at
from ranking_metrics_torch.precision_recall import precision_at, recall_at
from torch_factorization_models.movielens import MovielensDataModule
from tqdm.notebook import tqdm

from practicalrecs_examples.ann_search import *
from practicalrecs_examples.dithering import *
from practicalrecs_examples.evaluation import *
from practicalrecs_examples.filtering import *
from practicalrecs_examples.matrix_factorization import *
from practicalrecs_examples.notebooks.utils import *
from practicalrecs_examples.pipeline import *

In [3]:
# Useful for troubleshooting CUDA errors
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [4]:
# same seed used to create splits in training
random_seed = seed_everything(42)  

In [5]:
# CUDA device id
# gpu_id = th.cuda.device_count() - 1
gpu_id = 1
device_id = f"cuda:{gpu_id}"

In [6]:
# Set default parameters
dithering_eps = 3.0

### Load the dataset

In [7]:
movielens_module = MovielensDataModule("../datasets/ml-25m/", batch_size=512)
movielens_module.setup()

if th.cuda.is_available():
    movielens_module.dataset.to_(device=device_id)

In [8]:
val_dataloader = movielens_module.val_dataloader(by_user=True)

In [9]:
train_dataloader = movielens_module.train_dataloader(by_user=True)

### Create Evaluation Harness

In [10]:
harness = EvaluationHarness(
    num_candidates=250,
    num_recs=100,
    use_cuda=th.cuda.is_available(),
    gpu_id=gpu_id
)

### Build Bloom Filters

In [11]:
filters = harness.artifacts.build_bloom_filters(
    "cap10-fp0.1",
    tqdm(train_dataloader.dataset),
    expected_items=10,
    fp_rate=0.1)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=162342.0), HTML(value='')))




### Evaluate Model With Binary Cross-Entropy Loss

#### Load the model

In [12]:
harness.artifacts.load_model("bce", "../models/celestial-bee-469-bce.pt")

#### Compute Raw Model-Only Metrics

In [15]:
model_metrics = harness.evaluate_model("bce", val_dataloader)
print_metrics(model_metrics)

KeyError: 'precision'

In [16]:
model_metrics

{'recall': tensor([0.5000, 0.3333, 0.2500,  ..., 0.7500, 0.4000, 0.3333], device='cuda:1',
        dtype=torch.float64),
 'ndcg': tensor([0.2522, 0.2204, 0.1789,  ..., 0.2632, 0.3984, 0.1574], device='cuda:1',
        dtype=torch.float64)}

In [24]:
print_metrics(model_metrics)

Recall: nan
Ndcg: nan


#### Build Nearest Neighbor Search Indices

In [None]:
harness.artifacts.build_ann_indices("bce")

#### Evaluate The End-To-End Pipeline

In [None]:
harness.pipelines.create_template(
    "bce", "base",
    RecsPipelineStages(
        retrieval = [
            UserAvgEmbeddingFetcher(),
            ANNSearch(),
        ],
        filtering = [
            BloomFilter(),
            CandidatePadding(),
        ],
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(),
            MatrixFactorizationScoring(),
        ],
        ordering = [
            DitheredOrdering(epsilon=dithering_eps),
        ]
    )
)

In [None]:
harness.pipelines.create_pipeline("base")
metrics = harness.evaluate_pipeline(
    pipeline="base",
    model="bce",
    index="approx",
    filters="cap10-fp0.1",
    train=train_dataloader,
    val=tqdm(val_dataloader)
)
print_metrics(metrics)

### Evaluate Idealized Everything

In [None]:
harness.create_pipeline("ideal", template_name="base",
    stages = RecsPipelineStages(
        retrieval = [
            UserEmbeddingFetcher(),
            IdealizedANNSearch(val_dataloader.dataset),
        ],
        filtering = [
            IdealizedFilter(train_dataloader.dataset),
            CandidatePadding(),
        ],
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(),
            IdealizedMatrixFactorizationScoring(val_dataloader.dataset),
        ],
        ordering = []
    )
)

In [None]:
metrics = harness.evaluate_pipeline(
    pipeline="ideal",
    model="bce",
    index="exact",
    filters="cap10-fp0.1",
    train=train_dataloader,
    val=tqdm(val_dataloader),
)
print_metrics(metrics)

### Evaluate Idealized Retrieval

#### Learned user embedding, exact NN search, idealized results

In [None]:
harness.create_pipeline("ideal-retrieval", template_name="base",
    stages = RecsPipelineStages(
        retrieval = [
            UserEmbeddingFetcher(),
            IdealizedANNSearch(val_dataloader.dataset),
        ]
    )
)

In [None]:
metrics = harness.evaluate_pipeline(
    pipeline="ideal-retrieval",
    model="bce",
    index="exact",
    filters="cap10-fp0.1",
    train=train_dataloader,
    val=tqdm(val_dataloader),
)
print_metrics(metrics)

### Evaluate Idealized Filtering

In [None]:
harness.create_pipeline("ideal-filtering", template_name="base",
    stages = RecsPipelineStages(
        filtering = [
            IdealizedFilter(train_dataloader.dataset),
            CandidatePadding(),
        ]
    )
)

In [None]:
metrics = harness.evaluate_pipeline(
    pipeline="ideal-filtering",
    model="bce",
    index="exact",
    filters="cap10-fp0.1",
    train=train_dataloader,
    val=tqdm(val_dataloader),
)
print_metrics(metrics)

### Evaluate Idealized Scoring

In [None]:
harness.create_pipeline("ideal-scoring", template_name="base",
    stages = RecsPipelineStages(
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(),
            IdealizedMatrixFactorizationScoring(val_dataloader.dataset),
        ]
    )
)

In [None]:
metrics = harness.evaluate_pipeline(
    pipeline="ideal-scoring",
    model="bce",
    index="exact",
    filters="cap10-fp0.1",
    train=train_dataloader,
    val=tqdm(val_dataloader),
)
print_metrics(metrics)

### Evaluate Idealized Ordering

#### Order descending by score (omitting dithering)

In [None]:
harness.create_pipeline("ideal-ordering", template_name="base",
    stages = RecsPipelineStages(ordering = [])
)

In [None]:
metrics = harness.evaluate_pipeline(
    pipeline="ideal-ordering",
    model="bce",
    index="exact",
    filters="cap10-fp0.1",
    train=train_dataloader,
    val=tqdm(val_dataloader),
)
print_metrics(metrics)

### Improved Retrieval

#### Learned user embedding, exact NN search, no idealization

In [None]:
harness.create_pipeline("learned-vector-retrieval", template_name="base",
    stages = RecsPipelineStages(
        retrieval = [
            UserEmbeddingFetcher(),
            ANNSearch(),
            UserAvgEmbeddingFetcher()
        ]
    )
)

In [None]:
metrics = harness.evaluate_pipeline(
    pipeline="learned-vector-retrieval",
    model="bce",
    index="exact",
    filters="cap10-fp0.1",
    train=train_dataloader,
    val=tqdm(val_dataloader),
)
print_metrics(metrics)

#### Learned user embedding, approx NN search, no idealization

In [None]:
metrics = harness.evaluate_pipeline(
    pipeline="learned-vector-retrieval",
    model="bce",
    index="approx",
    filters="cap10-fp0.1",
    train=train_dataloader,
    val=tqdm(val_dataloader),
)
print_metrics(metrics)

#### Averaged user embedding, exact NN search, no idealization

In [None]:
harness.create_pipeline("avg-vector-retrieval", template_name="base",
    stages = RecsPipelineStages(
        retrieval = [
            UserAvgEmbeddingFetcher(),
            ANNSearch(),
        ]
    )
)

In [None]:
metrics = harness.evaluate_pipeline(
    pipeline="avg-vector-retrieval",
    model="bce",
    index="exact",
    filters="cap10-fp0.1",
    train=train_dataloader,
    val=tqdm(val_dataloader),
)
print_metrics(metrics)

#### Averaged user embedding, approx NN search, no idealization

In [None]:
metrics = harness.evaluate_pipeline(
    pipeline="avg-vector-retrieval",
    model="bce",
    index="approx",
    filters="cap10-fp0.1",
    train=train_dataloader,
    val=tqdm(val_dataloader),
)
print_metrics(metrics)

####  Item embeddings, exact NN search, no idealization

In [None]:
harness.create_pipeline("item-vectors-retrieval", template_name="base",
    stages = RecsPipelineStages(
        retrieval = [
            ItemEmbeddingsFetcher(),
            UseItemEmbeddingsAsUserEmbeddings(),
            ANNSearch(),
        ]
    )
)

In [None]:
metrics = harness.evaluate_pipeline(
    pipeline="item-vectors-retrieval",
    model="bce",
    index="exact",
    filters="cap10-fp0.1",
    train=train_dataloader,
    val=tqdm(val_dataloader),
)
print_metrics(metrics)

#### Item embeddings, approx NN search, no idealization

In [None]:
metrics = harness.evaluate_pipeline(
    pipeline="item-vectors-retrieval",
    model="bce",
    index="approx",
    filters="cap10-fp0.1",
    train=train_dataloader,
    val=tqdm(val_dataloader),
)
print_metrics(metrics)

#### New baseline for further improvements

In [None]:
harness.create_template("improved-retrieval", template_name="base",
    stages = RecsPipelineStages(
        retrieval = [
            ItemEmbeddingsFetcher(),
            UseItemEmbeddingsAsUserEmbeddings(),
            ANNSearch(),
        ],
        filtering = [
            BloomFilter(),
            CandidatePadding(),
        ],
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(),
            MatrixFactorizationScoring(),
        ],
        ordering = [
            DitheredOrdering(epsilon=dithering_eps),
        ]
    )
)

In [None]:
def evaluate_idealized_stages(model_name, base_name):
    stage_names = ["retrieval", "filtering", "scoring", "ordering"]
    metrics = {}
    
    for stage_name in stage_names:
        ideal_name = f"ideal-{stage_name}"
        combined_name = f"{base_name}-with-{ideal_name}"
        
        harness.pipelines[combined_name] = \
            harness.templates[base_name].build(
                overrides=harness.stages[ideal_name]
            )
        
        harness.metrics[combined_name] = \
            harness.models[model_name].compute_ranking_metrics(
                build_prediction_fn(harness.pipelines[combined_name], train_dataloader),
                tqdm(val_dataloader),
                harness.num_recs
            )
        
        metrics[stage_name] = harness.metrics[combined_name]

In [None]:
harness.evaluate_idealized_stages("improved-retrieval")

In [None]:
for stage_name in ["retrieval", "filtering", "scoring", "ordering"]:
        print(f"With idealized {stage_name}")
        print("==============================")
        print_metrics(harness.metrics["bce"][f"improved-retrieval-with-ideal-{stage_name}"])
        print("\n")

### Evaluate Improved Filtering

In [None]:
def sweep_filter_params(model, template, capacities, error_rates):
    filter_metrics = {}

    for error_rate in error_rates:
        metrics = []

        for capacity in capacities:
            filters = harness.build_bloom_filters(
                f"cap{capacity}-fp{error_rate}",
                tqdm(train_dataloader.dataset),
                expected_items=capacity,
                fp_rate=error_rate
            )

            stages = RecsPipelineStages(
                filtering = [
                    BloomFilter(filters),
                    CandidatePadding(),
                ]
            )

            pipeline = template.build(overrides=stages)

            m = model.compute_validation_metrics(
                build_prediction_fn(pipeline, train_dataloader),
                tqdm(val_dataloader),
                harness.num_recs
            )
            metrics.append((capacity, m))
        filter_metrics[error_rate] = metrics
        
    return filter_metrics

In [None]:
capacities = [100, 1000, 10000]
error_rates = [0.1, 0.01, 0.001]

bloom_filter_metrics = sweep_filter_params(
    harness.models["bce"],
    harness.templates["bce"]["improved-retrieval"],
    capacities,
    error_rates
)

In [None]:
capacities = [100, 1000, 10000]

In [None]:
filtering_plot_capacities = np.array(capacities)
filtering_plot_recalls = np.array([[m[1]['recall'].cpu().item() for m in bloom_filter_metrics[fp]] for fp in error_rates])
filtering_plot_ndcgs = np.array([[m[1]['ndcg'].cpu().item() for m in bloom_filter_metrics[fp]] for fp in error_rates])

In [None]:
plt.figure(num=1, dpi=150, facecolor='w', edgecolor='k')
plt.plot(filtering_plot_capacities, filtering_plot_recalls[0], label="FP Rate=0.1")
plt.plot(filtering_plot_capacities, filtering_plot_recalls[1], label="FP Rate=0.01")
plt.plot(filtering_plot_capacities, filtering_plot_recalls[2], label="FP Rate=0.001")
plt.hlines(0.2230, filtering_plot_capacities[0], filtering_plot_capacities[-1], colors='k', linestyles='dashed', label='Ideal (Estimated)')
plt.xscale("log")
plt.xlabel("Filter Capacity")
plt.ylabel("Recall@100")
plt.legend()

In [None]:
import math

def compute_bytes(capacity, error_rate):
    num_hashes = max(math.floor(math.log2(1 / error_rate)), 1)
    bits_per_hash = math.ceil(
                capacity * abs(math.log(error_rate)) /
                (num_hashes * (math.log(2) ** 2)))
    num_bits = max(num_hashes * bits_per_hash,128)
    return num_bits//8

def compute_kbytes(capacity, error_rate):
    return compute_bytes(capacity, error_rate)/1024

In [None]:
bloom_filter_sizes = {}

error_rates = [0.1, 0.01, 0.001]
capacities = [100, 300, 500, 1000, 3000, 5000, 10000]

for error_rate in error_rates:
    filter_sizes = []
    
    for capacity in capacities:
        size = compute_kbytes(capacity, error_rate)
        filter_sizes.append((capacity, size))
    bloom_filter_sizes[error_rate] = filter_sizes

In [None]:
filtering_plot_capacities = np.array(capacities)
filtering_plot_sizes = np.array([[s[1] for s in bloom_filter_sizes[fp]] for fp in error_rates])

In [None]:
plt.figure(num=1, dpi=150, facecolor='w', edgecolor='k')
plt.plot(filtering_plot_capacities, filtering_plot_sizes[0], label="FP Rate=0.1")
plt.plot(filtering_plot_capacities, filtering_plot_sizes[1], label="FP Rate=0.01")
plt.plot(filtering_plot_capacities, filtering_plot_sizes[2], label="FP Rate=0.001")
plt.hlines(3.5, filtering_plot_capacities[0], filtering_plot_capacities[-1], colors='lightgray', linestyles='dashed', label='3.5kB Budget')
plt.xscale("log")
plt.xlabel("Filter Capacity (Items)")
plt.ylabel("Filter Size (kBytes)")
plt.legend()

#### New Baseline For Further Improvements

In [None]:
harness.build_bloom_filters(
    "cap5000-fp0.1"
    tqdm(train_dataloader.dataset),
    expected_items=5000,
    fp_rate=0.1
)

In [None]:
harness.create_template("bce", "improved-filtering",
    RecsPipelineStages(
        retrieval = [
            ItemEmbeddingsFetcher(),
            UseItemEmbeddingsAsUserEmbeddings(),
            ANNSearch(),
        ],
        filtering = [
            BloomFilter(),
            CandidatePadding(),
        ],
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(),
            MatrixFactorizationScoring(),
        ],
        ordering = [
            DitheredOrdering(epsilon=dithering_eps),
        ]
    )
)

#### Re-evaluate Idealized Stages

In [None]:
evaluate_idealized_stages("bce", "improved-filtering")

In [None]:
for stage_name in ["retrieval", "filtering", "scoring", "ordering"]:
        print(f"With idealized {stage_name}")
        print("==============================")
        print_metrics(harness.metrics["bce"][f"improved-retrieval-with-ideal-{stage_name}"])
        print("\n")

### Improved Ordering

#### Reduced dithering

In [None]:
def float_range(start, stop, step):
    i = start
    while i < stop:
        yield i
        i += step

In [None]:
import copy

ordering_epsilons = list(float_range(1.0,3.75,0.25))

dithering_metrics = []

for epsilon in ordering_epsilons:
    harness.create_pipeline(
        "warp", f"improved-ordering-{epsilon}", template_name="improved-filtering",
        stages = RecsPipelineStages(
            ordering = [
                DitheredOrdering(epsilon=epsilon),
            ]
        )
    )

    dithering_pipeline = harness.pipelines["warp"][f"improved-ordering-{epsilon}"]
    dithering_pipeline.caching = True
    
    m = harness.evaluate_pipeline(
        "warp", f"improved-ordering-{epsilon}", train_dataloader, tqdm(val_dataloader)
    )
    
    initial_results = dithering_pipeline.cache
    dithering_pipeline.cache = {}
    
    for user_id in tqdm(initial_results.keys()):
        user_recs = copy.deepcopy(initial_results[user_id])
        user_recs = dithering_pipeline.components[-1].run(user_recs)
        dithering_pipeline.cache[user_id] = user_recs

    rerun_results = dithering_pipeline.cache
    
    dithering_pipeline.caching = False
    dithering_pipeline.cache = {}
    
    overlaps = []

    for user_id in tqdm(initial_results.keys()): 
        _, initial_indices = th.topk(initial_results[user_id].scores, harness.num_recs)
        _, rerun_indices = th.topk(rerun_results[user_id].scores, harness.num_recs)

        intersection = len(np.intersect1d(initial_indices, rerun_indices))
        overlaps.append(intersection)
        
    initial_results = None
    rerun_results = None
        
    m['median_overlap'] = np.median(np.array(overlaps))
    m['mean_overlap'] = np.mean(np.array(overlaps))
    m['min_overlap'] = np.min(np.array(overlaps))
    m['max_overlap'] = np.max(np.array(overlaps))
    
    dithering_metrics.append((epsilon, m))

In [None]:
dithering_plot_epsilons = np.array([m[0] for m in dithering_metrics])
dithering_plot_ndcgs = np.array([m[1]['ndcg'].cpu().item() for m in dithering_metrics])
dithering_plot_overlaps = np.array([m[1]['median_overlap'] for m in dithering_metrics])
dithering_plot_novel_items = np.array([100 - m[1]['median_overlap'] for m in dithering_metrics])

In [None]:
plt.figure(num=2, dpi=150, facecolor='w', edgecolor='k')
plt.plot(dithering_plot_epsilons, dithering_plot_ndcgs)
plt.xlabel("Dithering Epsilon")
plt.ylabel("NDCG")
plt.axvspan(1.5, 3.0, color='green', alpha=0.1, label="Typical Range")
plt.legend()

In [None]:
plt.figure(num=2, dpi=150, facecolor='w', edgecolor='k')
plt.plot(dithering_plot_epsilons, dithering_plot_overlaps)
plt.xlabel("Dithering Epsilon")
plt.ylabel("Overlap (# of Items)")
plt.axvspan(1.5, 3.0, color='green', alpha=0.1, label="Typical Range")
plt.legend()

In [None]:
plt.figure(num=2, dpi=150, facecolor='w', edgecolor='k')
plt.plot(dithering_plot_overlaps, dithering_plot_ndcgs)
plt.xlabel("Overlap (# of Items)")
plt.ylabel("NDCG")
plt.legend()

#### New Baseline For Further Improvements

In [None]:
adj_dithering_eps = 1.5

In [None]:
harness.create_template("bce", "improved-ordering",
    RecsPipelineStages(
        retrieval = [
            ItemEmbeddingsFetcher(),
            UseItemEmbeddingsAsUserEmbeddings(),
            ANNSearch(),
        ],
        filtering = [
            BloomFilter(),
            CandidatePadding(),
        ],
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(),
            MatrixFactorizationScoring(),
        ],
        ordering = [
            DitheredOrdering(epsilon=adj_dithering_eps),
        ]
    )
)

In [None]:
harness.create_pipeline("bce", "improved-ordering", template_name="improved-ordering")

In [None]:
metrics = harness.evaluate_pipeline(
    "bce", "improved-ordering", train_dataloader, tqdm(val_dataloader)
)
print_metrics(metrics)

In [None]:
evaluate_idealized_stages("bce", "improved-ordering")

In [None]:
for stage_name in ["retrieval", "filtering", "scoring", "ordering"]:
        print(f"With idealized {stage_name}")
        print("==============================")
        print_metrics(harness.metrics["bce"][f"improved-ordering-with-ideal-{stage_name}"])
        print("\n")

### Evaluate Model With BPR

#### Load the model

In [None]:
harness.load_model("bpr", "../models/pious-meadow-467-bpr.pt")

#### Compute Raw Model-Only Metrics

In [None]:
model_metrics = harness.evaluate_model("bpr", tqdm(val_dataloader))
print_metrics(model_metrics)

#### Build Nearest Neighbor Search Indices

In [None]:
harness.build_ann_indices("bpr")

#### Evaluate The End-To-End Pipelines

In [None]:
harness.create_template("bpr", "improved-ordering",
    RecsPipelineStages(
        retrieval = [
            ItemEmbeddingsFetcher(harness.models["bce"]),
            UseItemEmbeddingsAsUserEmbeddings(),
            ANNSearch(harness.indices["bce"]["approx"]),
        ],
        filtering = [
            BloomFilter(harness.filters["cap5000-fp0.1"]),
            CandidatePadding(),
        ],
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(harness.models["bce"]),
            MatrixFactorizationScoring(harness.models["bce"]),
        ],
        ordering = [
            DitheredOrdering(epsilon=adj_dithering_eps),
        ]
    )
)

In [None]:
harness.create_pipeline("bpr", "improved-scoring", template_name="improved-ordering",
    stages = RecsPipelineStages(
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(harness.models["bpr"]),
            MatrixFactorizationScoring(harness.models["bpr"]),
        ],
    )
)

In [None]:
metrics = harness.evaluate_pipeline(
    "bpr", "improved-scoring", train_dataloader, tqdm(val_dataloader)
)
print_metrics(metrics)

In [None]:
harness.create_pipeline("bpr", "improved-scoring-retrieval", template_name="improved-ordering",
    stages = RecsPipelineStages(
        retrieval = [
            UserAvgEmbeddingFetcher(harness.models["bpr"]),
            ANNSearch(harness.indices["bpr"]["approx"]),
        ],
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(harness.models["bpr"]),
            MatrixFactorizationScoring(harness.models["bpr"]),
        ],
    )
)

In [None]:
metrics = harness.evaluate_pipeline(
    "bpr", "improved-scoring-retrieval", train_dataloader, tqdm(val_dataloader)
)
print_metrics(metrics)

### Evaluate Model With WARP Loss

In [None]:
harness.load_model("warp", "../models/good-sweep-1-warp-01.pt")

#### Build Nearest Neighbor Search Indices

In [None]:
harness.build_ann_indices("warp")

#### Compute Raw Model-Only Metrics

In [None]:
model_metrics = harness.evaluate_model("warp", tqdm(val_dataloader))
print_metrics(model_metrics)

#### Evaluate The End-To-End Pipelines

In [None]:
harness.create_template("warp", "improved-ordering",
    RecsPipelineStages(
        retrieval = [
            ItemEmbeddingsFetcher(harness.models["bce"]),
            UseItemEmbeddingsAsUserEmbeddings(),
            ANNSearch(ann_indices["bce"]["approx"]),
        ],
        filtering = [
            BloomFilter(bloom_filters["cap5000-fp0.1"]),
            CandidatePadding(),
        ],
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(harness.models["bce"]),
            MatrixFactorizationScoring(harness.models["bce"]),
        ],
        ordering = [
            DitheredOrdering(harness.num_candidates, harness.num_recs, epsilon=dithering_eps),
        ]
    )
)

In [None]:
harness.create_pipeline("warp", "improved-scoring", template_name="improved-ordering",
    stages = RecsPipelineStages(
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(harness.models["warp"]),
            MatrixFactorizationScoring(harness.models["warp"]),
        ],
    )
)

In [None]:
metrics = harness.evaluate_pipeline(
    "warp", "improved-scoring", train_dataloader, tqdm(val_dataloader)
)
print_metrics(metrics)

In [None]:
harness.create_pipeline("warp", "improved-scoring-retrieval", template_name="improved-ordering",
    stages = RecsPipelineStages(
        retrieval = [
            ItemEmbeddingsFetcher(harness.models["warp"]),
            UseItemEmbeddingsAsUserEmbeddings(),
            ANNSearch(ann_indices["warp"]["approx"]),
        ],
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(harness.models["warp"]),
            MatrixFactorizationScoring(harness.models["warp"]),
        ],
    )
)

In [None]:
metrics = harness.evaluate_pipeline(
    "warp", "improved-scoring-retrieval", train_dataloader, tqdm(val_dataloader)
)
print_metrics(metrics)

#### New Baseline For Further Improvements

In [None]:
harness.create_template("warp", "improved-scoring",
    RecsPipelineStages(
        retrieval = [
            ItemEmbeddingsFetcher(harness.models["warp"]),
            UseItemEmbeddingsAsUserEmbeddings(),
            ANNSearch(ann_indices["warp"]["approx"]),
        ],
        filtering = [
            BloomFilter(bloom_filters["cap5000-fp0.1"]),
            CandidatePadding(),
        ],
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(harness.models["warp"]),
            MatrixFactorizationScoring(harness.models["warp"]),
        ],
        ordering = [
            DitheredOrdering(epsilon=adj_dithering_eps),
        ]
    )
)

#### Re-evaluate Idealized Stages

In [None]:
# TODO: Sort out how to handle creating idealized stages

In [None]:
harness.create_pipeline("warp", "ideal-retrieval", template_name="improved-scoring",
    stages = RecsPipelineStages(
        retrieval = [
            UserEmbeddingFetcher(harness.models["warp"]),
            IdealizedANNSearch(val_dataloader.dataset, ann_indices["warp"]["exact"]),
        ]
    )
)

In [None]:
harness.create_pipeline("warp", "ideal-filtering", template_name="improved-scoring",
    stages = RecsPipelineStages(
        filtering = [
            IdealizedFilter(train_dataloader.dataset),
            CandidatePadding(),
        ]
    )
)

In [None]:
harness.create_pipeline("warp", "ideal-scoring", template_name="improved-scoring",
    stages = RecsPipelineStages(
        scoring = [
            # Re-fetching user avg embeddings keeps retrieval changes from affecting scoring
            UserAvgEmbeddingFetcher(harness.models["warp"]),
            IdealizedMatrixFactorizationScoring(harness.models["warp"], val_dataloader.dataset),
        ]
    )
)

In [None]:
harness.create_pipeline("warp", "ideal-ordering", template_name="improved-scoring",
    stages = RecsPipelineStages(
        ordering = []
    )
)

In [None]:
evaluate_idealized_stages("warp", "improved-scoring")

In [None]:
evaluate_idealized_stages("warp", "improved-scoring")
for stage_name in ["retrieval", "filtering", "scoring", "ordering"]:
        print(f"With idealized {stage_name}")
        print("==============================")
        print_metrics(harness.metrics["warp"][f"improved-scoring-with-ideal-{stage_name}"])
        print("\n")

In [None]:
# TODO: Move filtering before scoring
# TODO: Compare pipeline-only improvements to model-only improvements

In [None]:
# TODO: Try injecting most popular items into the candidate set