# Ноутбук с решением кейса

Итоги всех моделей: 


| Модель                    | ndcg@10 | recall@10 | map@10 | serendipity@10 | coverage |
| :------------------------ | :------ | :-------- | :----- | :------------- | :------- |
| **Item2item + BM25**      | 0.090   | 0.072     | 0.049  | 0.081          | 0.615    |
| **LightGBM Reranker**     | 0.029   | 0.023     | 0.012  | 0.031          | 0.024    |
| **EASE_2**                | 0.028   | 0.023     | 0.013  | 0.021          | 0.264    |
| **EASE_1**                | 0.021   | 0.016     | 0.009  | 0.018          | 0.398    |
| **MultiChannelRecommender** | 0.014   | 0.009     | 0.006  | 0.013          | 0.254    |
| **SVD**                   | 0.011   | 0.007     | 0.004  | 0.012          | 0.038    |



In [1]:
import pandas as pd
import polars as pl
import numpy as np
import typing as t
import matplotlib.pyplot as plt
from abc import ABC, abstractmethod
#from tqdm.notebook import tqdm
from tqdm import tqdm
from IPython.display import HTML
from dataclasses import dataclass
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import normalize


def dataframe_to_html(df: pl.DataFrame, columns: list) -> str:
    """
    Convert a Polars DataFrame to an HTML table with specified columns.

    Args:
        df: Polars DataFrame containing the data.
        columns: List of column names to include in the HTML table.

    Returns:
        str: HTML string representing the table.
    """
    html = ["<table border='1'>"]

    html.append("<tr>")
    for col in columns:
        html.append(f"<th>{col}</th>")
    html.append("</tr>")

    for row in df.iter_rows(named=True):
        html.append("<tr>")
        for col in columns:
            value = row[col]
            if col == "image_url":
                html.append(f"<td><img src='{value}' width='100' /></td>")
            else:
                html.append(f"<td>{value}</td>")
        html.append("</tr>")

    html.append("</table>")
    return "\n".join(html)


In [2]:
data_folder = "data/"

train = pl.read_parquet(data_folder + "train.pq")
test_exploded = pl.read_parquet(data_folder + "test.pq")
test = test_exploded.group_by("user_id", maintain_order=True).agg(pl.col("item_id"))

In [3]:
books = pl.read_parquet(data_folder + "books.pq")

In [4]:
n_books = books.shape[0]
print(f"N books: {n_books}")

n_cold_books = n_books - train["item_id"].n_unique()
print(f"N 'cold' books: {n_cold_books}")

N books: 34322
N 'cold' books: 3022


In [5]:
# ACHTUNG! DO NOT TOUCH

@dataclass
class AtKMetric(ABC):
    k: int

    @property
    @abstractmethod
    def name(self) -> str:
        raise NotImplementedError

    @property
    def full_name(self) -> str:
        return f"{self.name}@{self.k}"

    @abstractmethod
    def __call__(self, *, preds_col: str = "preds", ground_truth_col: str = "ground_truth") -> pl.Expr:
        raise NotImplementedError


class NDCG(AtKMetric):
    @property
    def name(self) -> str:
        return "ndcg"

    def __call__(self, *, preds_col: str = "preds", ground_truth_col: str = "ground_truth") -> pl.Expr:
        def _dcg(scores: np.ndarray) -> float:
            # Handle empty arrays
            if len(scores) == 0:
                return 0.0
            # Use vectorized operations for better performance
            positions = np.arange(1, len(scores) + 1, dtype=np.float64)
            return np.sum((np.power(2, scores) - 1) / np.log2(positions + 1))

        def ndcg(predicted: list[t.Any], gt_items: list[t.Any]) -> float:
            # Handle empty predictions or ground truth
            if not predicted or not gt_items:
                return 0.0
                
            # Take only top-k predictions
            predicted = predicted[:self.k]
            
            # Create relevance scores (1 for relevant items, 0 otherwise)
            relevance = np.array([1 if x in gt_items else 0 for x in predicted], dtype=np.float64)
            
            rank_dcg = _dcg(relevance)
            if rank_dcg == 0.0:
                return 0.0
                
            # Ideal DCG: sort ground truth by relevance (all 1s) and take top-k
            ideal_relevance = np.ones(min(len(gt_items), self.k), dtype=np.float64)
            ideal_dcg = _dcg(ideal_relevance)
            
            if ideal_dcg == 0.0:
                return 0.0
                
            return rank_dcg / ideal_dcg

        return pl.struct([preds_col, ground_truth_col]).map_elements(
            lambda x: ndcg(x[preds_col], x[ground_truth_col]),
            return_dtype=pl.Float64
        ).alias(self.full_name)


class Recall(AtKMetric):
    @property
    def name(self) -> str:
        return "recall"

    def __call__(self, *, preds_col: str = "preds", ground_truth_col: str = "ground_truth") -> pl.Expr:
        def recall(predicted: list[t.Any], gt_items: list[t.Any]) -> float:
            # Handle empty ground truth
            if not gt_items:
                return 0.0
                
            # Take only top-k predictions
            predicted = predicted[:self.k]
            
            # Calculate intersection
            intersection = len(set(gt_items).intersection(set(predicted)))
            return intersection / len(gt_items)

        return pl.struct([preds_col, ground_truth_col]).map_elements(
            lambda x: recall(x[preds_col], x[ground_truth_col]),
            return_dtype=pl.Float64
        ).alias(self.full_name)


def coverage(df: pl.DataFrame, n_items_in_catalog: int, preds_col: str = "preds") -> float:
    if n_items_in_catalog <= 0:
        return 0.0
        
    unique_recommended = df.select(
        pl.col(preds_col).explode().n_unique()
    ).item()
    return unique_recommended / n_items_in_catalog


def evaluate_recommender(
    df: pl.DataFrame,
    model_preds_col: str,
    ground_truth_col: str = "item_id",
    n_items_in_catalog: int = n_books,
    k: int = 10,
) -> dict:
    metrics = [
        NDCG(k=k),
        Recall(k=k),
    ]
    result = {}
    
    # Create a copy to avoid modifying the original dataframe
    result_df = df.clone()
    
    for metric in metrics:
        result_df = result_df.with_columns(
            metric(preds_col=model_preds_col, ground_truth_col=ground_truth_col)
        )
        result[metric.full_name] = round(result_df.select(pl.col(metric.full_name).mean()).item(), 3)

    if n_items_in_catalog is not None:
        result["coverage"] = round(coverage(result_df, n_items_in_catalog, preds_col=model_preds_col), 3)
    
    return result

In [6]:
class MeanAveragePrecision(AtKMetric):
    @property
    def name(self) -> str:
        return "map"

    def __call__(self, *, preds_col: str = "preds", ground_truth_col: str = "ground_truth") -> pl.Expr:
        def map_k(predicted: list[t.Any], gt_items: list[t.Any]) -> float:
            if not predicted or not gt_items:
                return 0.0
            
            predicted = predicted[:self.k]
            gt_set = set(gt_items)
            
            avg_precision = 0.0
            hits = 0
            
            for i, p in enumerate(predicted):
                if p in gt_set:
                    hits += 1
                    avg_precision += hits / (i + 1)
            
            return avg_precision / min(len(gt_items), self.k)

        return pl.struct([preds_col, ground_truth_col]).map_elements(
            lambda x: map_k(x[preds_col], x[ground_truth_col]),
            return_dtype=pl.Float64
        ).alias(self.full_name)

class Serendipity(AtKMetric):
    def __init__(self, k: int, baseline_col: str = "toppopular_recs"):
        super().__init__(k=k)
        self.baseline_col = baseline_col
        
    @property
    def name(self) -> str:
        return "serendipity"
        
    def __call__(self, *, preds_col: str = "preds", ground_truth_col: str = "ground_truth") -> pl.Expr:
        def compute_serendipity(row):
            preds = row[preds_col][:self.k]
            ground_truth = row[ground_truth_col]
            baseline = row[self.baseline_col][:self.k]
            
            #не из TopPopular и релевантные 
            unexpected_correct = set(preds) - set(baseline)
            unexpected_correct &= set(ground_truth)
            
            if not ground_truth:
                return 0.0
                
            # нормализуем по числу релевантных 
            return len(unexpected_correct) / min(len(ground_truth), self.k)
            
        return pl.struct([preds_col, ground_truth_col, self.baseline_col]).map_elements(
            lambda x: compute_serendipity(x),
            return_dtype=pl.Float64
        ).alias(self.full_name)


def evaluate_recommender_modified(
    df: pl.DataFrame,
    model_preds_col: str,
    ground_truth_col: str = "item_id",
    n_items_in_catalog: int = n_books,
    k: int = 10,
    baseline_col: str = "toppopular_recs"  
) -> dict:
    metrics = [
        NDCG(k=k),
        Recall(k=k),
        MeanAveragePrecision(k=k),
    ]
    
    if baseline_col in df.columns and baseline_col != model_preds_col:
        metrics.append(Serendipity(k=k, baseline_col=baseline_col))
    
    result = {}
    
    # Create a copy to avoid modifying the original dataframe
    result_df = df.clone()
    
    for metric in metrics:
        result_df = result_df.with_columns(
            metric(preds_col=model_preds_col, ground_truth_col=ground_truth_col)
        )
        result[metric.full_name] = round(result_df.select(pl.col(metric.full_name).mean()).item(), 3)

    if n_items_in_catalog is not None:
        result["coverage"] = round(coverage(result_df, n_items_in_catalog, preds_col=model_preds_col), 3)
            
    return result

## Запустим baseline еше раз для сравнения

In [7]:
class BaseRecommender(ABC):
    def __init__(self):
        self.trained = False

    @abstractmethod
    def fit(self, df: pl.DataFrame, **kwargs) -> None:
        # реализация может быть любой, никаких ограничений
        # не забудьте про self.trained = True
        self.trained = True

    @abstractmethod
    def predict(self, df: pl.DataFrame, topn: int = 10, **kwargs) -> list[np.ndarray]:
        # реализация может быть любой, НО
        # должен возвращаться список массивов из item_id, которые есть в `books`, чтобы корректно работал подсчет метрик
        pass

In [8]:
class RandomRecommender:
    def __init__(self):
        self.trained = False
        self.item_ids = None

    def fit(self, df: pl.DataFrame, item_id_col: str = "item_id") -> None:
        # сохраняем уникальные айтемы
        self.item_ids = df[item_id_col].unique().to_numpy()
        self.trained = True

    def predict(self, df: pl.DataFrame, topn: int = 10) -> list[np.ndarray]:
        assert self.trained
        # случайно выбираем `topn` айтемов
        preds = []
        for _ in tqdm(range(len(df))):
            preds.append(np.random.choice(self.item_ids, size=topn, replace=False))
        return preds


random_rec = RandomRecommender()
random_rec.fit(books)  # выбираем из всего доступного каталога
test = test.with_columns(
    random_recs=pl.Series(random_rec.predict(test))
)
print(test.head())
evaluate_recommender_modified(df=test, model_preds_col="random_recs")

100%|██████████| 185828/185828 [00:59<00:00, 3125.10it/s]


shape: (5, 3)
┌─────────────────────────────────┬───────────────────────┬─────────────────────────┐
│ user_id                         ┆ item_id               ┆ random_recs             │
│ ---                             ┆ ---                   ┆ ---                     │
│ str                             ┆ list[i64]             ┆ array[i64, 10]          │
╞═════════════════════════════════╪═══════════════════════╪═════════════════════════╡
│ 00000377eea48021d3002730d56aca… ┆ [13252]               ┆ [4549, 3786, … 23]      │
│ 00009ab2ed8cbfceda5a59da409663… ┆ [2328]                ┆ [17711, 20522, … 14156] │
│ 00009e46d18f223a82b22da38586b6… ┆ [28636, 30197]        ┆ [30088, 5896, … 9325]   │
│ 0001085188e302fc6b2568de45a5f5… ┆ [2159, 2969, … 33630] ┆ [26856, 18786, … 5704]  │
│ 00014c578111090720e20f5705eba0… ┆ [45, 3513, … 33273]   ┆ [13082, 14100, … 7391]  │
└─────────────────────────────────┴───────────────────────┴─────────────────────────┘


{'ndcg@10': 0.001, 'recall@10': 0.0, 'map@10': 0.0, 'coverage': 1.0}

In [8]:
class TopPopular:
    def __init__(self):
        self.trained = False
        self.recommendations = None

    def fit(self, df: pl.DataFrame, item_id_col: str = "item_id") -> None:
        # считаем популярность айтемов
        self.recommendations = (
            df.lazy()
            .group_by(item_id_col)
            .agg(count=pl.len())
            .sort("count", descending=True)
            .collect()
            [item_id_col]
            .to_numpy()
        )
        self.trained = True

    def predict(self, df: pl.DataFrame, topn: int = 10) -> list[np.ndarray]:
        assert self.trained
        # возвращаем для всех одно и то же
        return [self.recommendations[:topn]] * len(df)


toppop = TopPopular()
toppop.fit(train)
test = test.with_columns(
    toppopular_recs=pl.Series(toppop.predict(test))
)
print(test.head())
evaluate_recommender_modified(df=test, model_preds_col="toppopular_recs")

shape: (5, 3)
┌─────────────────────────────────┬───────────────────────┬────────────────────────┐
│ user_id                         ┆ item_id               ┆ toppopular_recs        │
│ ---                             ┆ ---                   ┆ ---                    │
│ str                             ┆ list[i64]             ┆ array[i64, 10]         │
╞═════════════════════════════════╪═══════════════════════╪════════════════════════╡
│ 00000377eea48021d3002730d56aca… ┆ [13252]               ┆ [4058, 15514, … 13159] │
│ 00009ab2ed8cbfceda5a59da409663… ┆ [2328]                ┆ [4058, 15514, … 13159] │
│ 00009e46d18f223a82b22da38586b6… ┆ [28636, 30197]        ┆ [4058, 15514, … 13159] │
│ 0001085188e302fc6b2568de45a5f5… ┆ [2159, 2969, … 33630] ┆ [4058, 15514, … 13159] │
│ 00014c578111090720e20f5705eba0… ┆ [45, 3513, … 33273]   ┆ [4058, 15514, … 13159] │
└─────────────────────────────────┴───────────────────────┴────────────────────────┘


{'ndcg@10': 0.033, 'recall@10': 0.033, 'map@10': 0.016, 'coverage': 0.0}

In [11]:
tags_as_strings = books["tags"].map_elements(lambda x: " ".join(x), return_dtype=pl.String)

vectorizer = CountVectorizer(min_df=0.2, max_df=0.8, max_features=32)
X = vectorizer.fit_transform(tags_as_strings)
embeddings = normalize(X.toarray()).tolist()

books = books.with_columns(
    pl.Series("embedding", embeddings)
)
display(books.head())

item_id,series,tags,title,description,url,image_url,authors,embedding_new,embedding
i64,list[str],list[str],str,str,str,str,list[struct[2]],list[f64],list[f64]
0,[null],"[""e-book"", ""young-adult"", … ""y-a""]","""Hallie Hath No Fury . . .""","""There are two sides to every s…","""https://www.goodreads.com/book…","""https://images.gr-assets.com/b…","[{""1879494"",""""}]","[0.550546, 0.033266, … -0.019082]","[0.0, 0.585206, … 0.0]"
1,"[""149079""]","[""primary"", ""melissa-j--morgan"", … ""fiction""]","""Hide and Shriek: Super Special…","""The girls go on an overnight a…","""https://www.goodreads.com/book…","""https://s.gr-assets.com/assets…","[{""21740"",""""}]","[0.585932, 0.176821, … -0.004595]","[0.0, 0.0, … 0.199007]"
2,[null],"[""friendship"", ""middle-reader"", … ""my-library""]","""Dear Mom, You're Ruining My Li…","""Samantha Slayton worries about…","""https://www.goodreads.com/book…","""https://s.gr-assets.com/assets…","[{""18946"",""""}]","[0.5157, 0.278442, … -0.002641]","[0.0, 0.27735, … 0.0]"
3,"[""151088""]","[""summer-2017"", ""bullying"", … ""re-read""]","""Bratfest at Tiffany's (Clique …","""Massie Block: The Briarwood bo…","""https://www.goodreads.com/book…","""https://images.gr-assets.com/b…","[{""4605"",""""}]","[0.638572, 0.326413, … -0.04838]","[0.0, 0.0, … 0.0]"
4,"[""812067""]","[""rosemary-vernon"", ""young-adult"", … ""to-read""]","""Questions of Love (Sweet Dream…","""When Sammi Edwards is chosen t…","""https://www.goodreads.com/book…","""https://images.gr-assets.com/b…","[{""792676"",""""}]","[0.324899, 0.076244, … -0.013045]","[0.0, 0.2, … 0.0]"


In [12]:
class ItemkNN:
    def __init__(self):
        self.trained: bool = False
        self.train_most_liked_item: pl.DataFrame | None = None
        self.items_embeddings: np.ndarray | None = None
        self.items_item_ids: np.ndarray | None = None

    def fit(
        self,
        df: pl.DataFrame,
        items_df: pl.DataFrame,
        item_id_col: str = "item_id"
    ) -> None:
        self.items_embeddings = np.array(items_df["embedding"].to_list())
        self.items_item_ids = items_df[item_id_col].to_numpy()

        # find the most liked item for each user
        self.train_most_liked_item = (
            df.lazy()
            .sort("rating", descending=True)
            .group_by("user_id")
            .agg(pl.col(item_id_col).first().alias(item_id_col))
            .join(
                items_df.lazy().select([item_id_col, "embedding"]),
                on=item_id_col,
                how="left"
            )
            .collect()
        )
        self.trained = True

    def _get_batch_predictions(
        self,
        batch_user_embeddings: np.ndarray,
        batch_current_items: np.ndarray,
        batch_user_mask: np.ndarray,
        topn: int
    ) -> list[np.ndarray]:
        batch_predictions = []
        
        # compute similarity scores for all items in batch
        batch_scores = batch_user_embeddings @ self.items_embeddings.T

        for user_scores, current_item, has_embedding in zip(
            batch_scores, batch_current_items, batch_user_mask
        ):
            if not has_embedding:
                batch_predictions.append([])
                continue

            # get top (topn + 1) most similar items
            top_indices = np.argpartition(-user_scores, topn + 1)[:topn + 1]
            
            # filter out current item and take topn
            recommended_items = []
            for item_idx in top_indices:
                if self.items_item_ids[item_idx] != current_item:
                    recommended_items.append(self.items_item_ids[item_idx])
                if len(recommended_items) >= topn:
                    break

            batch_predictions.append(recommended_items)

        return batch_predictions

    def predict(
        self,
        df: pl.DataFrame,
        topn: int = 10,
        batch_size: int = 5_000
    ) -> list[np.ndarray]:
        assert self.trained
        
        user_data = (
            df.select(["user_id"])
            .join(self.train_most_liked_item, on="user_id", how="left")
        )

        user_embeddings = np.array(user_data["embedding"].to_list())
        current_items = user_data["item_id"].to_numpy()
        user_mask = user_data["embedding"].is_not_null().to_numpy()

        predictions = []
        n_users = len(user_data)

        for batch_start in tqdm(range(0, n_users, batch_size)):
            batch_end = min(batch_start + batch_size, n_users)
            
            batch_predictions = self._get_batch_predictions(
                user_embeddings[batch_start:batch_end],
                current_items[batch_start:batch_end],
                user_mask[batch_start:batch_end],
                topn
            )
            predictions.extend(batch_predictions)

        return predictions


item_knn = ItemkNN()
item_knn.fit(train, books)
test = test.with_columns(
    item_knn_recs=pl.Series(item_knn.predict(test))
)
print(test.head())
evaluate_recommender_modified(df=test, model_preds_col="item_knn_recs")

100%|██████████| 38/38 [01:38<00:00,  2.60s/it]


shape: (5, 4)
┌────────────────────────┬───────────────────────┬────────────────────────┬────────────────────────┐
│ user_id                ┆ item_id               ┆ toppopular_recs        ┆ item_knn_recs          │
│ ---                    ┆ ---                   ┆ ---                    ┆ ---                    │
│ str                    ┆ list[i64]             ┆ array[i64, 10]         ┆ list[i64]              │
╞════════════════════════╪═══════════════════════╪════════════════════════╪════════════════════════╡
│ 00000377eea48021d30027 ┆ [13252]               ┆ [4058, 15514, … 13159] ┆ [17956, 12482, …       │
│ 30d56aca…              ┆                       ┆                        ┆ 31641]                 │
│ 00009ab2ed8cbfceda5a59 ┆ [2328]                ┆ [4058, 15514, … 13159] ┆ [16282, 986, … 21243]  │
│ da409663…              ┆                       ┆                        ┆                        │
│ 00009e46d18f223a82b22d ┆ [28636, 30197]        ┆ [4058, 15514, … 13159] ┆ [

{'ndcg@10': 0.008,
 'recall@10': 0.008,
 'map@10': 0.003,
 'serendipity@10': 0.008,
 'coverage': 0.764}

## 1: Двухэтапная модель с LightGBM

Двухэтапный пайплайн, сочетающий генерацию кандидатов и их последующее ранжирование - попытка объединить базовые идеи и сделать лучше

1. Генерация кандидатов

Для каждого юзера $u$ делаем первоначальный набор кандидатов $C_u$:
   - топ-N рекомендаций от модели коллаборативной фильтрации ItemKNN
   - топ-N популярных айтемов 

$$ C_u = \text{ItemKNN}(u, N) \cup \text{TopPopular}(N) $$

2. Ранжирование с помощью LightGBM

Обучение на позитивных примерах (реальные взаимодействия (user, item) из train) и негативных примерах (hard negative sampling -  в качестве негативных примеров берутся айтемы, которые ItemKNN порекомендовала пользователю, но с которыми он в действительности не взаимодействовал)

Для каждого примера (user, item) формируем вектор признаков, включающий признаки пользователя (количество взаимодействий, средний рейтинг)и признаки товара (количество взаимодействий, средний рейтинг, эмбеддинги из CountVectorizer)

Для каждого пользователя $u$ и каждого кандидата $i \in C_u$ LightGBM вычисляет скор $S(u, i)$

In [12]:
import lightgbm as lgb
import joblib
import os

In [None]:
class LightGBMReranker:
    def __init__(self, item_knn_recommender, popular_recommender, n_candidates=100):
        self.trained = False
        self.model = lgb.LGBMClassifier(random_state=42, n_estimators=200, learning_rate=0.1, num_leaves=31)
        self.item_knn = item_knn_recommender
        self.popular = popular_recommender
        self.n_candidates = n_candidates
        
        self.user_features = None
        self.item_features = None
        self.feature_names = None

    def _prepare_features(self, df: pl.DataFrame, items_df: pl.DataFrame):
        user_features = df.group_by("user_id").agg(
            user_interactions=pl.len(),
            user_avg_rating=pl.col("rating").mean()
        )
        item_features = df.group_by("item_id").agg(
            item_interactions=pl.len(),
            item_avg_rating=pl.col("rating").mean()
        )
        item_embeddings = items_df.select(["item_id", "embedding"])
        embedding_cols = [f"emb_{i}" for i in range(len(item_embeddings['embedding'][0]))]
        item_embeddings_wide = item_embeddings.with_columns(
            [pl.col("embedding").list.get(i).alias(f"emb_{i}") for i in range(len(embedding_cols))]
        ).drop("embedding")

        self.user_features = user_features
        self.item_features = item_features.join(item_embeddings_wide, on="item_id", how="left")

    def fit(self, df: pl.DataFrame, items_df: pl.DataFrame, n_neg_samples_per_pos: int = 4, max_samples: int = 2_000_000):
        self._prepare_features(df, items_df)
        
        positives = df.select(["user_id", "item_id"]).with_columns(label=pl.lit(1, dtype=pl.Int8))
        train_users_df = df.select("user_id").unique()
        knn_candidates_list = self.item_knn.predict(train_users_df, topn=self.n_candidates)
        
        user_id_col = []
        item_id_col = []
        for user_id, candidates in zip(train_users_df["user_id"].to_list(), knn_candidates_list):
            if len(candidates) > 0:
                user_id_col.extend([user_id] * len(candidates))
                item_id_col.extend(candidates)
        
        candidates_df = pl.DataFrame({"user_id": user_id_col, "item_id": item_id_col})
        negatives = candidates_df.join(positives, on=["user_id", "item_id"], how="anti")
        negatives = negatives.with_columns(label=pl.lit(0, dtype=pl.Int8))
        
        n_pos = len(positives)
        n_neg = len(negatives)
        if n_neg > n_pos * n_neg_samples_per_pos:
            negatives = negatives.sample(n=n_pos * n_neg_samples_per_pos)

        train_data = pl.concat([positives, negatives]).sample(fraction=1.0, shuffle=True)
        
        if len(train_data) > max_samples:
            train_data = train_data.sample(n=max_samples, shuffle=True)

        train_df = train_data.join(self.user_features, on="user_id", how="left")
        train_df = train_df.join(self.item_features, on="item_id", how="left").fill_null(0)

        y = train_df["label"]
        X = train_df.drop(["user_id", "item_id", "label"])
        self.feature_names = X.columns
        self.model.fit(X.to_numpy(), y.to_numpy())
        self.trained = True

    def predict(self, df: pl.DataFrame, topn: int = 10) -> list[np.ndarray]:
        test_users_df = df.select("user_id").unique()
        candidates_knn_list = self.item_knn.predict(test_users_df, topn=self.n_candidates)
        candidates_popular = self.popular.predict(test_users_df, topn=self.n_candidates)[0]

        all_preds = []
        
        for i, user_id in enumerate(tqdm(test_users_df["user_id"].to_list())):
            user_candidates = set(candidates_knn_list[i]) | set(candidates_popular)
            if not user_candidates:
                all_preds.append(np.array([]))
                continue

            candidates_df = pl.DataFrame({"user_id": [user_id] * len(user_candidates), "item_id": list(user_candidates)})
            
            predict_df = candidates_df.join(self.user_features, on="user_id", how="left")
            predict_df = predict_df.join(self.item_features, on="item_id", how="left").fill_null(0)
    
            X_predict = predict_df[self.feature_names]
            scores = self.model.predict_proba(X_predict.to_pandas())[:, 1]
            
            results_df = pl.DataFrame({"item_id": predict_df["item_id"], "score": scores})
            top_items= results_df.sort("score", descending=True).head(topn)["item_id"].to_numpy()
            all_preds.append(top_items)
            
        return all_preds

    def save_model(self, path="models/lgbm_reranker.joblib"):
        if not os.path.exists("models"):
            os.makedirs("models")
        model_data= {'model': self.model, 'user_features': self.user_features, 'item_features': self.item_features, 'feature_names': self.feature_names}
        joblib.dump(model_data, path)

lgbm_reranker = LightGBMReranker(item_knn_recommender=item_knn, popular_recommender=toppop)
lgbm_reranker.fit(train, books)
lgbm_reranker.save_model()

100%|██████████| 70/70 [02:42<00:00,  2.33s/it]


[LightGBM] [Info] Number of positive: 517987, number of negative: 1482013
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.059393 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 9178
[LightGBM] [Info] Number of data points in the train set: 2000000, number of used features: 36
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.258993 -> initscore=-1.051206
[LightGBM] [Info] Start training from score -1.051206


In [14]:
test_users_df = test.select("user_id").unique()
lgbm_preds = lgbm_reranker.predict(test_users_df)

preds_df = pl.DataFrame({
    "user_id": test_users_df["user_id"],
    "lgbm_recs": pl.Series(lgbm_preds)
})
test = test.join(preds_df, on="user_id", how="left")
print(test.head())
evaluate_recommender_modified(df=test, model_preds_col="lgbm_recs")

100%|██████████| 38/38 [01:13<00:00,  1.92s/it]
100%|██████████| 185828/185828 [28:17<00:00, 109.45it/s]


shape: (5, 6)
┌────────────────┬────────────────┬────────────────┬───────────────┬───────────────┬───────────────┐
│ user_id        ┆ item_id        ┆ random_recs    ┆ toppopular_re ┆ item_knn_recs ┆ lgbm_recs     │
│ ---            ┆ ---            ┆ ---            ┆ cs            ┆ ---           ┆ ---           │
│ str            ┆ list[i64]      ┆ array[i64, 10] ┆ ---           ┆ list[i64]     ┆ array[i64,    │
│                ┆                ┆                ┆ array[i64,    ┆               ┆ 10]           │
│                ┆                ┆                ┆ 10]           ┆               ┆               │
╞════════════════╪════════════════╪════════════════╪═══════════════╪═══════════════╪═══════════════╡
│ 00000377eea480 ┆ [13252]        ┆ [6469, 30479,  ┆ [4058, 15514, ┆ [17956,       ┆ [960, 27314,  │
│ 21d3002730d56a ┆                ┆ … 23297]       ┆ … 13159]      ┆ 12482, …      ┆ … 5729]       │
│ ca…            ┆                ┆                ┆               ┆ 31641]  

{'ndcg@10': 0.029,
 'recall@10': 0.023,
 'map@10': 0.012,
 'serendipity@10': 0.031,
 'coverage@k': 0.024}

In [16]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["lgbm_recs"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
960,Fangirl,"This special edition includes fan art, a ribbon bookmark, a Q&A with the author, and an excerpt from her new book Carry On. In Rainbow Rowell's Fangirl, Cath is a Simon Snow fan. Okay, the whole world is a Simon Snow fan, but for Cath, being a fan is her life-and she's really good at it. She and her twin sister, Wren, ensconced themselves in the Simon Snow series when they were just kids; it's what got them through their mother leaving. Reading. Rereading. Hanging out in Simon Snow forums, writing Simon Snow fan fiction, dressing up like the characters for every movie premiere. Cath's sister has mostly grown away from fandom, but Cath can't let go. She doesn't want to. Now that they're going to college, Wren has told Cath she doesn't want to be roommates. Cath is on her own, completely outside of her comfort zone. She's got a surly roommate with a charming, always-around boyfriend, a fiction-writing professor who thinks fan fiction is the end of the civilized world, a handsome classmate who only wants to talk about words . . . And she can't stop worrying about her dad, who's loving and fragile and has never really been alone. For Cath, the question is: Can she do this? Can she make it without Wren holding her hand? Is she ready to start living her own life? And does she even want to move on if it means leaving Simon Snow behind? A New York Times Book Review Notable Children's Book of 2013 A New York TimesBest Seller!",
5398,Matilda,"Matilda is a little girl who is far too good to be true. At age five-and-a-half she's knocking off double-digit multiplication problems and blitz-reading Dickens. Even more remarkably, her classmates love her even though she's a super-nerd and the teacher's pet. But everything is not perfect in Matilda's world. For starters she has two of the most idiotic, self-centred parents who ever lived. Then there's the large, busty nightmare of a school mistress, Mrs (""The"") Trunchbull, a former hammer- throwing champion who flings children at will and is approximately as sympathetic as a bulldozer. Fortunately for Matilda, she has the inner resources to deal with such annoyances: astonishing intelligence, saintly patience and an innate talent for revenge. She warms up with some practical jokes aimed at her hapless parents, but the true test comes when she rallies in defence of her teacher, the sweet Miss Honey, against the diabolical Trunchbull. There is never any doubt that Matilda will carry the day. Even so, this wonderful story is far from predictable--the big surprise comes when Matilda discovers a new, mysterious facet of her mental dexterity. Roald Dahl, while keeping the plot moving imaginatively, also has an unerring ear for emotional truth. The truly reader cares about Matilda because, in addition to all her other gifts, she has real feelings.",
5729,Heartless,"In this modern-day take on the classic Frankenstein tale, as told from the monster's perspective, Jolene Hall is dead--sort of. She can walk, think and talk, but her heart doesn't beat and her lungs stopped breathing ages ago. After Jo is abducted and subjected to horrific experiments, she wakes up to find her body is a mosaic of jagged wounds and stapled flesh. Jo Hall has a choice: turn herself in to the authorities, or team up with her roommate Lucy and her boyfriend Eli to find a way to save herself. To Jo, the choice is clear. She'd like to know who turned her into a monster, and she'd like to live to see another sunrise. But that choice has drastic repercussions. On a trip deep into the snowy White Mountains, to a hidden laboratory filled with danger and bodies of the dead, Jo and Lucy find more 'creatures' just like her. Part body, part machine, run by batteries and electricity, these girls are killers, created by a shadowy Order with a penchant for chaos...and murder. To make matters worse, a photo on a wall of victims reveals Lucy is next in line to be ""recruited"" into this army of beautiful, walking corpses. When Jo's physical condition takes a turn for the worse, and the Order kidnaps those she loves most, saving her best friend and the man she loves might just mean sacrificing herself...or what's left of her.",
10443,"Scarlet (The Lunar Chronicles, #2)","Cinder, the cyborg mechanic, returns in the second thrilling installment of the best-selling Lunar Chronicles. She's trying to break out of prison -- even though if she succeeds, she'll be the Commonwealth's most wanted fugitive. Halfway around the world, Scarlet Benoit's grandmother is missing. It turns out there are many things Scarlet doesn't know about her grandmother or the grave danger she has lived in her whole life. When Scarlet encounters Wolf, a street fighter who may have information as to her grandmother's whereabouts, she is loath to trust this stranger, but is inexplicably drawn to him, and he to her. As Scarlet and Wolf unravel one mystery, they encounter another when they meet Cinder. Now, all of them must stay one step ahead of the vicious Lunar Queen Levana, who will do anything for the handsome Prince Kai to become her husband, her king, her prisoner. An unabridged recording (11 hours, 22 minutes).",
18061,"Illuminae (The Illuminae Files, #1)","Listening time 11hours 41 minutes This morning, Kady thought breaking up with Ezra was the hardest thing she'd have to do. This afternoon, her planet was invaded. The year is 2575, and two rival megacorporations are at war over a planet that's little more than an ice-covered speck at the edge of the universe. Too bad nobody thought to warn the people living on it. With enemy fire raining down on them, Kady and Ezra--who are barely even talking to each other--are forced to fight their way onto an evacuating fleet, with an enemy warship in hot pursuit. But their problems are just getting started. A deadly plague has broken out and is mutating, with terrifying results; the fleet's AI, which should be protecting them, may actually be their enemy; and nobody in charge will say what's really going on. As Kady hacks into a tangled web of data to find the truth, it's clear only one person can help her bring it all to light: the ex-boyfriend she sworeshe'd never speak to again. Told through a fascinating dossier of hacked documents--including emails, schematics, military files, IMs, medical reports, interviews, and more--Illuminaeis the first book in a heart-stopping, high-octane trilogy about lives interrupted, the price of truth, and the courage of everyday heroes.",
21045,"Beautiful Creatures (Caster Chronicles, #1)","Is falling in love the beginning... Or the end? In Ethan Wate's hometown there lies the darkest of secrets. There is a girl. Slowly, she pulled the hood from her head. Green eyes, black hair. Lena Duchannes. There is a curse. On the Sixteenth Moon, the Sixteenth Year, the Book will take what it's been promised. And no one can stop it. In the end, there is a grave. Lena and Ethan become bound together by a deep, powerful love. But Lena is cursed and on her sixteenth birthday, her fate will be decided. Ethan never even saw it coming. * Don't miss the Warner Brothers and Alcon Entertainment blockbuster movie of Beautiful Creatures directed by Richard LaGravenese (P.S. I Love You) and featuring an all star cast including Emma Thompson, Jeremy Irons, Viola Davies and hot young Hollywood talent Alice Englert, Alden Ehrenreich and Emmy Rossum. Praise for Beautiful Creatures: 'This novel has been generating Twilight-level buzz.'-Teen Vogue About the authors: @kamigarciais a superstitious American southerner who can make biscuits by hand and pies from scratch! She attended George Washington University and is a teacher and reading specialist. She lives in Los Angeles, California with her family. @mstohlhas written and designed many successful video games, which is why her two beagles are named Zelda and Kirby. She has degrees from Yale and Stanford Universities in the US and has also studied in the prestigious creative writing department at UEA, Norwich. She lives in Santa Monica, California with her family. Also available in the series: Beautiful Darkness, Beautiful Chaosand Beautiful Redemption.",
24530,"The Crown (The Selection, #5)","Listening time 7 hours 12 minutes Kiera Cass's #1 New York Times bestselling Selection series has captured the hearts of readers from its very first page. Now the end of the journey is here. Prepare to be swept off your feet by The Crown--the eagerly awaited, wonderfully romantic fifth and final book in the Selection series. In The Heir, a new era dawned in the world of The Selection. Twenty years have passed since America Singer and Prince Maxon fell in love, and their daughter is the first princess to hold a Selection of her own. Eadlyn didn't think she would find a real partner among the Selection's thirty-five suitors, let alone true love. But sometimes the heart has a way of surprising you...and now Eadlyn must make a choice that feels more difficult--and more important--than she ever expected.",
24546,"Aristotle and Dante Discover the Secrets of the Universe (Aristotle and Dante Discover the Secrets of the Universe, #1)","A lyrical novel about family and friendship from critically acclaimed author Benjamin Alire Saenz. Aristotle is an angry teen with a brother in prison. Dante is a know-it-all who has an unusual way of looking at the world. When the two meet at the swimming pool, they seem to have nothing in common. But as the loners start spending time together, they discover that they share a special friendship-the kind that changes lives and lasts a lifetime. And it is through this friendship that Ari and Dante will learn the most important truths about themselves and the kind of people they want to be. Duration:7 hours 32 minutes",
27314,"Vampire Academy (Vampire Academy, #1)","Richelle Mead celebrates 10 years of Vampire Academy with an exclusive, never-before-seen collection of stories that sheds new light on the world and its players: The Turn and the Flame takes a deeper look into the dark stain on the Ozera dynasty... From the Journal of Vasilisa Dragomir unearths the princess's private thoughts from a transformative period of her life... The Meeting gives us a glimpse of Rose Hathaway through Dimitri's eyes... Hello My Name Is Rose Hathaway tracks the shenanigans that ensue when Rose and Dimitri become unlikely teammates in a high-stakes scavenger hunt... Lissa Dragomir is a Moroi princess: a mortal vampire with a rare gift for harnessing the earth's magic. She must be protected at all times from Strigoi; the fiercest vampires--the ones who never die. The powerful blend of human and vampire blood that flows through Rose Hathaway, Lissa's best friend, makes her a Dhampir. Rose is dedicated to a dangerous life of protecting Lissa from Strigoi, who are hell-bent on making Lissa one of them. After two years of freedom, Rose and Lissa are caught and dragged back to St. Vladimir's Academy, where vampire royalty and their guardians-to-be prepare for a life fraught with danger. Rose and Lissa must navigate their treacherous world and never once let their guard down, lest the evil undead make Lissa one of them forever. But soon Rose finds herself gripped by temptation of forbidden love, leaving Lissa exposed to Strigoi attack. Now she must choose between the best friend she lives for, and the man she can't live without....",
30523,"The Heir (The Selection, #4)","Princess Eadlyn has grown up hearing endless stories about how her mother and father met. Twenty years ago, America Singer entered the Selection and won the heart of Prince Maxon--and they lived happily ever after. Eadlyn has always found their fairy-tale story romantic, but she has no interest in trying to repeat it. If it were up to her, she'd put off marriage for as long as possible. But a princess's life is never entirely her own, and Eadlyn can't escape her very own Selection--no matter how fervently she protests. Eadlyn doesn't expect her story to end in romance. But as the competition begins, one entry may just capture Eadlyn's heart, showing her all the possibilities that lie in front of her . . . and proving that finding her own happily ever after isn't as impossible as she's always thought.",


In [52]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["lgbm_recs"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
960,Fangirl,"This special edition includes fan art, a ribbon bookmark, a Q&A with the author, and an excerpt from her new book Carry On. In Rainbow Rowell's Fangirl, Cath is a Simon Snow fan. Okay, the whole world is a Simon Snow fan, but for Cath, being a fan is her life-and she's really good at it. She and her twin sister, Wren, ensconced themselves in the Simon Snow series when they were just kids; it's what got them through their mother leaving. Reading. Rereading. Hanging out in Simon Snow forums, writing Simon Snow fan fiction, dressing up like the characters for every movie premiere. Cath's sister has mostly grown away from fandom, but Cath can't let go. She doesn't want to. Now that they're going to college, Wren has told Cath she doesn't want to be roommates. Cath is on her own, completely outside of her comfort zone. She's got a surly roommate with a charming, always-around boyfriend, a fiction-writing professor who thinks fan fiction is the end of the civilized world, a handsome classmate who only wants to talk about words . . . And she can't stop worrying about her dad, who's loving and fragile and has never really been alone. For Cath, the question is: Can she do this? Can she make it without Wren holding her hand? Is she ready to start living her own life? And does she even want to move on if it means leaving Simon Snow behind? A New York Times Book Review Notable Children's Book of 2013 A New York TimesBest Seller!",
5398,Matilda,"Matilda is a little girl who is far too good to be true. At age five-and-a-half she's knocking off double-digit multiplication problems and blitz-reading Dickens. Even more remarkably, her classmates love her even though she's a super-nerd and the teacher's pet. But everything is not perfect in Matilda's world. For starters she has two of the most idiotic, self-centred parents who ever lived. Then there's the large, busty nightmare of a school mistress, Mrs (""The"") Trunchbull, a former hammer- throwing champion who flings children at will and is approximately as sympathetic as a bulldozer. Fortunately for Matilda, she has the inner resources to deal with such annoyances: astonishing intelligence, saintly patience and an innate talent for revenge. She warms up with some practical jokes aimed at her hapless parents, but the true test comes when she rallies in defence of her teacher, the sweet Miss Honey, against the diabolical Trunchbull. There is never any doubt that Matilda will carry the day. Even so, this wonderful story is far from predictable--the big surprise comes when Matilda discovers a new, mysterious facet of her mental dexterity. Roald Dahl, while keeping the plot moving imaginatively, also has an unerring ear for emotional truth. The truly reader cares about Matilda because, in addition to all her other gifts, she has real feelings.",
5729,Heartless,"In this modern-day take on the classic Frankenstein tale, as told from the monster's perspective, Jolene Hall is dead--sort of. She can walk, think and talk, but her heart doesn't beat and her lungs stopped breathing ages ago. After Jo is abducted and subjected to horrific experiments, she wakes up to find her body is a mosaic of jagged wounds and stapled flesh. Jo Hall has a choice: turn herself in to the authorities, or team up with her roommate Lucy and her boyfriend Eli to find a way to save herself. To Jo, the choice is clear. She'd like to know who turned her into a monster, and she'd like to live to see another sunrise. But that choice has drastic repercussions. On a trip deep into the snowy White Mountains, to a hidden laboratory filled with danger and bodies of the dead, Jo and Lucy find more 'creatures' just like her. Part body, part machine, run by batteries and electricity, these girls are killers, created by a shadowy Order with a penchant for chaos...and murder. To make matters worse, a photo on a wall of victims reveals Lucy is next in line to be ""recruited"" into this army of beautiful, walking corpses. When Jo's physical condition takes a turn for the worse, and the Order kidnaps those she loves most, saving her best friend and the man she loves might just mean sacrificing herself...or what's left of her.",
10443,"Scarlet (The Lunar Chronicles, #2)","Cinder, the cyborg mechanic, returns in the second thrilling installment of the best-selling Lunar Chronicles. She's trying to break out of prison -- even though if she succeeds, she'll be the Commonwealth's most wanted fugitive. Halfway around the world, Scarlet Benoit's grandmother is missing. It turns out there are many things Scarlet doesn't know about her grandmother or the grave danger she has lived in her whole life. When Scarlet encounters Wolf, a street fighter who may have information as to her grandmother's whereabouts, she is loath to trust this stranger, but is inexplicably drawn to him, and he to her. As Scarlet and Wolf unravel one mystery, they encounter another when they meet Cinder. Now, all of them must stay one step ahead of the vicious Lunar Queen Levana, who will do anything for the handsome Prince Kai to become her husband, her king, her prisoner. An unabridged recording (11 hours, 22 minutes).",
12952,"Glass Sword (Red Queen, #2)","If there's one thing Mare Barrow knows, it's that she's different. Mare Barrow's blood is red--the color of common folk--but her Silver ability, the power to control lightning, has turned her into a weapon that the royal court tries to control. The crown calls her an impossibility, a fake, but as she makes her escape from Maven, the prince--the friend--who betrayed her, Mare uncovers something startling: she is not the only one of her kind. Pursued by Maven, now a vindictive king, Mare sets out to find and recruit other Red-and-Silver fighters to join in the struggle against her oppressors. But Mare finds herself on a deadly path, at risk of becoming exactly the kind of monster she is trying to defeat. Will she shatter under the weight of the lives that are the cost of rebellion? Or have treachery and betrayal hardened her forever? The electrifying next installment in the Red Queen series escalates the struggle between the growing rebel army and the blood-segregated world they've always known--and pits Mare against the darkness that has grown in her soul.?",
18061,"Illuminae (The Illuminae Files, #1)","Listening time 11hours 41 minutes This morning, Kady thought breaking up with Ezra was the hardest thing she'd have to do. This afternoon, her planet was invaded. The year is 2575, and two rival megacorporations are at war over a planet that's little more than an ice-covered speck at the edge of the universe. Too bad nobody thought to warn the people living on it. With enemy fire raining down on them, Kady and Ezra--who are barely even talking to each other--are forced to fight their way onto an evacuating fleet, with an enemy warship in hot pursuit. But their problems are just getting started. A deadly plague has broken out and is mutating, with terrifying results; the fleet's AI, which should be protecting them, may actually be their enemy; and nobody in charge will say what's really going on. As Kady hacks into a tangled web of data to find the truth, it's clear only one person can help her bring it all to light: the ex-boyfriend she sworeshe'd never speak to again. Told through a fascinating dossier of hacked documents--including emails, schematics, military files, IMs, medical reports, interviews, and more--Illuminaeis the first book in a heart-stopping, high-octane trilogy about lives interrupted, the price of truth, and the courage of everyday heroes.",
19472,"Truthwitch (The Witchlands, #1)","On a continent ruled by three empires, some are born with a ""witchery"", a magical skill that sets them apart from others. In the Witchlands, there are almost as many types of magic as there are ways to get in trouble--as two desperate young women know all too well. Safiya is a Truthwitch, able to discern truth from lie. It's a powerful magic that many would kill to have on their side, especially amongst the nobility to which Safi was born. So Safi must keep her gift hidden, lest she be used as a pawn in the struggle between empires. Iseult, a Threadwitch, can see the invisible ties that bind and entangle the lives around her--but she cannot see the bonds that touch her own heart. Her unlikely friendship with Safi has taken her from life as an outcast into one of reckless adventure, where she is a cool, wary balance to Safi's hotheaded impulsiveness. Safi and Iseult just want to be free to live their own lives, but war is coming to the Witchlands. With the help of the cunning Prince Merik (a Windwitch and ship's captain) and the hindrance of a Bloodwitch bent on revenge, the friends must fight emperors, princes, and mercenaries alike, who will stop at nothing to get their hands on a Truthwitch. Length: 14 hrs and 41 mins",
24530,"The Crown (The Selection, #5)","Listening time 7 hours 12 minutes Kiera Cass's #1 New York Times bestselling Selection series has captured the hearts of readers from its very first page. Now the end of the journey is here. Prepare to be swept off your feet by The Crown--the eagerly awaited, wonderfully romantic fifth and final book in the Selection series. In The Heir, a new era dawned in the world of The Selection. Twenty years have passed since America Singer and Prince Maxon fell in love, and their daughter is the first princess to hold a Selection of her own. Eadlyn didn't think she would find a real partner among the Selection's thirty-five suitors, let alone true love. But sometimes the heart has a way of surprising you...and now Eadlyn must make a choice that feels more difficult--and more important--than she ever expected.",
24546,"Aristotle and Dante Discover the Secrets of the Universe (Aristotle and Dante Discover the Secrets of the Universe, #1)","A lyrical novel about family and friendship from critically acclaimed author Benjamin Alire Saenz. Aristotle is an angry teen with a brother in prison. Dante is a know-it-all who has an unusual way of looking at the world. When the two meet at the swimming pool, they seem to have nothing in common. But as the loners start spending time together, they discover that they share a special friendship-the kind that changes lives and lasts a lifetime. And it is through this friendship that Ari and Dante will learn the most important truths about themselves and the kind of people they want to be. Duration:7 hours 32 minutes",
27314,"Vampire Academy (Vampire Academy, #1)","Richelle Mead celebrates 10 years of Vampire Academy with an exclusive, never-before-seen collection of stories that sheds new light on the world and its players: The Turn and the Flame takes a deeper look into the dark stain on the Ozera dynasty... From the Journal of Vasilisa Dragomir unearths the princess's private thoughts from a transformative period of her life... The Meeting gives us a glimpse of Rose Hathaway through Dimitri's eyes... Hello My Name Is Rose Hathaway tracks the shenanigans that ensue when Rose and Dimitri become unlikely teammates in a high-stakes scavenger hunt... Lissa Dragomir is a Moroi princess: a mortal vampire with a rare gift for harnessing the earth's magic. She must be protected at all times from Strigoi; the fiercest vampires--the ones who never die. The powerful blend of human and vampire blood that flows through Rose Hathaway, Lissa's best friend, makes her a Dhampir. Rose is dedicated to a dangerous life of protecting Lissa from Strigoi, who are hell-bent on making Lissa one of them. After two years of freedom, Rose and Lissa are caught and dragged back to St. Vladimir's Academy, where vampire royalty and their guardians-to-be prepare for a life fraught with danger. Rose and Lissa must navigate their treacherous world and never once let their guard down, lest the evil undead make Lissa one of them forever. But soon Rose finds herself gripped by temptation of forbidden love, leaving Lissa exposed to Strigoi attack. Now she must choose between the best friend she lives for, and the man she can't live without....",


По метрикам - хуже TopPopular, но лучше остальных моделей (исключая coverage).Тут походу появились айтемы, рекомендуемые всем, которые не относятся к самым популярным - судя по serendipity, холодных айтемов стало больше, по сравнению с предыдущими моделями, но мы полностью пожертвовали точностью остальных рекомендаций. Короче, делаем дальше, перебор

## 2: Матричная факторизация: SVD

In [13]:
from scipy.sparse import csr_matrix
from sklearn.decomposition import TruncatedSVD

Коллаборативная фильтрация на основе SVD: используем SVD для матричной факторизации

1.  Исходная матрица взаимодействий $R$ (размера $|U| \times |I|$) аппроксимируется произведением двух матриц меньшей размерности: матрицы латентных векторов пользователей $P$ и айтемов $Q$
    $$R \approx P \times Q^T$$
2.  Скор релевантности для пары (user, item)  $\hat{r}_{ui} = \langle p_u,q_i \rangle $, далее сортируем по скору

In [13]:
class TruncatedSVDRecommender:
    def __init__(self, n_components=100, random_state=42):
        self.trained = False
        self.model = TruncatedSVD(n_components=n_components, random_state=random_state)
        
        self.user_map, self.item_map = None, None
        self.user_embeddings, self.item_embeddings = None, None
        self.train_interactions = None
        self.item_inv_map_array = None

    def fit(self, df: pl.DataFrame):
        interactions_df = df.group_by("user_id").agg(pl.col("item_id"))
    
        self.train_interactions = dict(zip(
            interactions_df["user_id"].to_list(), 
            interactions_df["item_id"].to_list()
        ))

        self.user_map = {uid: i for i, uid in enumerate(df["user_id"].unique())}
        self.item_map = {iid: i for i, iid in enumerate(df["item_id"].unique())}
        
        item_inv_map = {i: iid for iid, i in self.item_map.items()}
        self.item_inv_map_array = np.array([item_inv_map[i] for i in range(len(item_inv_map))])

        rows = df["user_id"].replace(self.user_map)
        cols = df["item_id"].replace(self.item_map)
        values = df["rating"] if "rating" in df.columns else np.ones(len(df))
        
        interactions_matrix = csr_matrix((values, (rows, cols)), shape=(len(self.user_map), len(self.item_map)))
        self.user_embeddings = self.model.fit_transform(interactions_matrix)
        self.item_embeddings = self.model.components_.T
        
        self.trained = True

    def predict(self, df: pl.DataFrame, topn: int = 10, batch_size: int = 4096) -> list[np.ndarray]:
        test_user_ids = df["user_id"].unique().to_list()
        
        final_preds = [np.array([])] * len(test_user_ids)
        
        user_jobs = [(i, self.user_map.get(uid)) for i, uid in enumerate(test_user_ids)]
        known_user_jobs = [(orig_idx, user_idx) for orig_idx, user_idx in user_jobs if user_idx is not None]

        if not known_user_jobs:
            return final_preds
        
        original_indices, known_user_indices = zip(*known_user_jobs)

        for i in tqdm(range(0, len(known_user_indices), batch_size), desc="Predicting in batches"):
            batch_user_indices = known_user_indices[i : i + batch_size]
            batch_original_indices = original_indices[i : i + batch_size]
            
            batch_scores = self.user_embeddings[list(batch_user_indices)] @ self.item_embeddings.T

            batch_user_ids = [test_user_ids[orig_idx] for orig_idx in batch_original_indices]
            for j, user_id in enumerate(batch_user_ids):
                seen_items = self.train_interactions.get(user_id, [])
                if seen_items: 
                    seen_indices = [self.item_map[iid] for iid in seen_items if iid in self.item_map]
                    batch_scores[j, seen_indices] = -np.inf

            top_indices = np.argpartition(-batch_scores, topn, axis=1)[:, :topn]
            top_scores = batch_scores[np.arange(batch_scores.shape[0])[:, None], top_indices]
            sorted_within_top_indices = np.argsort(-top_scores, axis=1)
            final_batch_indices = top_indices[np.arange(top_indices.shape[0])[:, None], sorted_within_top_indices]
            
            recommendations = self.item_inv_map_array[final_batch_indices]
            
            for j, orig_idx in enumerate(batch_original_indices):
                final_preds[orig_idx] = recommendations[j]
                
        return final_preds

svd_rec = TruncatedSVDRecommender(n_components=128)
svd_rec.fit(train)

test_users_df = test.select("user_id").unique()
svd_preds = svd_rec.predict(test_users_df)

preds_df = pl.DataFrame({
    "user_id": test_users_df["user_id"],
    "svd_recs": pl.Series(svd_preds)
})

test = test.join(preds_df, on="user_id", how="left")

print(test.head())
evaluate_recommender_modified(df=test, model_preds_col="svd_recs")

Predicting in batches: 100%|██████████| 46/46 [04:50<00:00,  6.31s/it]


shape: (5, 6)
┌────────────────┬────────────────┬────────────────┬───────────────┬───────────────┬───────────────┐
│ user_id        ┆ item_id        ┆ random_recs    ┆ toppopular_re ┆ item_knn_recs ┆ svd_recs      │
│ ---            ┆ ---            ┆ ---            ┆ cs            ┆ ---           ┆ ---           │
│ str            ┆ list[i64]      ┆ array[i64, 10] ┆ ---           ┆ list[i64]     ┆ array[i64,    │
│                ┆                ┆                ┆ array[i64,    ┆               ┆ 10]           │
│                ┆                ┆                ┆ 10]           ┆               ┆               │
╞════════════════╪════════════════╪════════════════╪═══════════════╪═══════════════╪═══════════════╡
│ 00000377eea480 ┆ [13252]        ┆ [10746, 32113, ┆ [4058, 15514, ┆ [17956,       ┆ [22896,       │
│ 21d3002730d56a ┆                ┆ … 22743]       ┆ … 13159]      ┆ 12482, …      ┆ 22898, …      │
│ ca…            ┆                ┆                ┆               ┆ 31641]  

{'ndcg@10': 0.011,
 'recall@10': 0.007,
 'map@10': 0.004,
 'serendipity@10': 0.012,
 'coverage@k': 0.038}

In [14]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["svd_recs"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
1471,"Crank (Crank, #1)","In Crank, Ellen Hopkins chronicles the turbulent and often disturbing relationship between Kristina, a character based on her own daughter, and the ""monster,"" the highly addictive drug crystal meth, or ""crank."" Kristina is introduced to the drug while visiting her largely absent and ne'er-do-well father. While under the influence of the monster, Kristina discovers her sexy alter-ego, Bree: ""there is no perfect daughter, / no gifted high school junior, / no Kristina Georgia Snow. / There is only Bree."" Bree will do all the things good girl Kristina won't, including attracting the attention of dangerous boys who can provide her with a steady flow of crank. Soon, her grades plummet, her relationships with family and friends deteriorate, and she needs more and more of the monster just to get through the day. Kristina hits her lowest point when she is raped by one of her drug dealers and becomes pregnant as a result. Her decision to keep the baby slows her drug use, but doesn't stop it, and the author leaves the reader with the distinct impression that Kristina/Bree may never be free from her addiction.",
2683,"The Sisterhood of the Traveling Pants (Sisterhood, #1)","FOUR VERY DIFFERENT FRIENDS. ONE PAIR OF MAGICAL PANTS. AND A SUMMER APART ... We, the Sisterhood, hereby instate the following rules to govern the use of the Traveling Pants: 1. You must never wash the Pants. 2. You must never double-cuff the Pants. It's tacky. There will never be a time when this will not be tacky. 3. You must never say the word ""phat"" while wearing the Pants. You must also never think ""I am fat"" while wearing the Pants. 4. You must never let a boy take off the Pants (although you may take them off yourself in his presence). 5. You must not pick your nose while wearing the Pants. You may, however, scratch casually at your nostril while really kind of picking. 6. Upon our reunion, you must follow the proper procedures for documenting your time in the Pants. 7. You must write to your Sisters throughout the summer, no matter how much fun you are having without them. 8. You must pass the Pants along to your Sister according to the specifications set down by the Sisterhood. Failures to comply will result in a sever spanking upon our reunion. 9. You must not wear the Pants with a tucked-in shirt and belt. See rule #2. 10. Remember: Pants = love. Love your pals. Love yourself.",
2870,"The Princess Diaries (The Princess Diaries, #1)","She's just a New York City girl living with her artist mom.... But, as it turns out, her Dad is the Prince of Genovia (So that's why a limo meets her at the airport). Shock of the Century! Like it or not, Mia Thermopolis is prime princess material. Mia must take princess lessons from her dreaded grandme re, the dowager princess of Genovia, who thinks Mia has a thing or two to learn before she steps up to the throne. Well, her father can lecture her until he's royal-blue in the face about her princessly duty -- no way is she moving to Genovia and leaving Manhattan behind. But what's a girl to do when her name is Princess Amelia Mignonette Grimaldi Thermopolis Renaldo? The Princess Diaries was among the 2001 Quick Picks for Reluctant Young Readers (American Library Association), Books for the Teen Age 2001 (New York Public Library), and the 2001 Best Books for Young Adults (American Library Association).""",
7097,This Is Where It Ends,"10:00 a.m.The principal of Opportunity High School finishes her speech, welcoming the entire student body to a new semester and encouraging them to excel and achieve. 10:02 a.m.The students get up to leave the auditorium for their next class. 10:03 a.m.The auditorium doors won't open. 10:05 a.m.Someone starts shooting. Told from four different perspectives over the span of fifty-four harrowing minutes, terror reigns as one student's calculated revenge turns into the ultimate game of survival.",
7375,Hate List,"The powerful and timely contemporary classic by Jennifer Brown -- now including the bonus Hate Listnovella Say Something! Five months ago, Valerie Leftman's boyfriend, Nick, opened fire on their school cafeteria. Shot trying to stop him, Valerie inadvertently saved the life of a classmate, but was implicated in the shootings because of the list she helped create. A list of people and things she and Nick hated. The list he used to pick his targets. Now, after a summer of seclusion, Val is forced to confront her guilt as she returns to school to complete her senior year. Haunted by the memory of the boyfriend she still loves and navigating rocky relationships with her family, former friends, and the girl whose life she saved, Val must come to grips with the tragedy that took place and her role in it, in order to make amends and move on with her life. Jennifer Brown's critically acclaimed novel now features a brand-new cover and the bonus novella Say Something, another arresting story that digs deeper into the powerful themes of Hate List.",
11965,Wintergirls,"In her most emotionally wrenching, lyrically written book since the multiple-award-winning Speak, Laurie Halse Anderson explores a girl's descent into the powerful vortex of anorexia, and her painful path toward recovery. ""Dead girl walking,"" the boys say in the halls. ""Tell us your secret,"" the girls whisper, one toilet to another. I am that girl. I am the space between my thighs, daylight shining through. I am the bones they want, wired on a porcelain frame. Lia and Cassie are best friends, wintergirls frozen in matchstick bodies, competitors in a deadly contest to see who can be the skinniest. But what comes after size zero and size double-zero? When Cassie succumbs to the demons within, Lia feels she is being haunted by her friend's restless spirit. In her most emotionally wrenching, lyrically written book since the multiple-award-winning Speak, Laurie Halse Anderson explores Lia's descent into the powerful vortex of anorexia, and her painful path toward recovery.",
16005,"Princess in Love (The Princess Diaries, #3)","It would seem that 14-year-old Mia Thermopolis (""five foot nine inches tall, with no visible breasts, feet the size of snowshoes"") has the kind of life every Manhattan teenager could only dream of: She is, in her spare time, the princess of the European country of Genovia. Alas, the Royal Privilege is more like a Predicament. Not only does she have to endure daily princess lessons from her critical Grandmere (""It isn't as if I'm going to show up at the castle and start hurling olives at the ladies-in-waiting""), but her new stepfather is also her algebra teacher, her mother is pregnant and vomiting, she doesn't like her boyfriend very much, and she's convinced the real love of her life--her best friend's older brother--thinks of her as a kid. Written in diary form like Louise Rennison's award-winning Angus, Thongs, and Full-Frontal Snogging, Meg Cabot's endearing and often hilarious novel Princess in Love--third in the series after The Princess Diariesand Princess in the Spotlight--is sure to appeal to teen readers who will be able to relate to Mia--a young woman who would like people to know that ""behind this mutant facade beats the heart of a person who is striving, just like everybody else in this world, to find self-actualization."" (Ages 12 and older) --Karin Snelson",
23926,Stolen: A Letter to My Captor,"It happened like this. I was stolen from an airport. Taken from everything I knew, everything I was used to. Taken to sand and heat, dirt and danger. And he expected me to love him. This is my story. A letter from nowhere. Sixteen-year-old Gemma is kidnapped from Bangkok airport and taken to the Australian Outback. This wild and desolate landscape becomes almost a character in the book, so vividly is it described. Ty, her captor, is no stereotype. He is young, fit and completely gorgeous. This new life in the wilderness has been years in the planning. He loves only her, wants only her. Under the hot glare of the Australian sun, cut off from the world outside, can the force of his love make Gemma love him back? The story takes the form of a letter, written by Gemma to Ty, reflecting on those strange and disturbing months in the outback. Months when the lines between love and obsession, and love and dependency, blur until they don't exist--almost.",
30486,"Princess in the Spotlight (The Princess Diaries, #2)","She's just a New York City girl living with her artist mom...News Flash: Dad is prince of Genovia. (So that's why a limo meets her at the airport ) Downer: Dad can't have any more kids. (So no heir to the throne.) Shock of the Century: Like it or not, Mia Thermopolis is prime princess material. Mia must take princess lessons from her dreaded grandmere, the dowager princess of Genovia, who thinks Mia has a thing or two to learn before she steps up to the throne. Well, her father can lecture her until he's royal-blue in the face about her princessly duty--no way is she moving to Genovia and leaving Manhattan behind. But what's a girl to do when her name is ""Princess Amelia Mignonette Grimaldi Thermopolis Renaldo""?",
34048,"Princess in Waiting (The Princess Diaries, #4)","Never before has the world seen such a princess. Nor have her own subjects, for that matter. Mia's royal introduction to Genovia has mixed results: while her fashion sense is widely applauded, her position on the installation of public parking meters is met with resistance. But the politics of bureaucracy are nothing next to Mia's real troubles. Between canceled dates with her long--sought--after royal consort, a second semester of the dreaded Algebra, more princess lessons from Grandmere as a result of the Genovian parking--meter thing, and the inability to stop gnawing on her fingernails, isn't there anything Mia is good at besides inheriting an unwanted royal title?",


In [15]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["svd_recs"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
2889,Simon vs. the Homo Sapiens Agenda,"Sixteen-year-old and not-so-openly gay Simon Spier prefers to save his drama for the school musical. But when an email falls into the wrong hands, his secret is at risk of being thrust into the spotlight. Now Simon is actually being blackmailed: if he doesn't play wingman for class clown Martin, his sexual identity will become everyone's business. Worse, the privacy of Blue, the pen name of the boy he's been emailing, will be compromised. With some messy dynamics emerging in his once tight-knit group of friends, and his email correspondence with Blue growing more flirtatious every day, Simon's junior year has suddenly gotten all kinds of complicated. Now, change-averse Simon has to find a way to step out of his comfort zone before he's pushed out--without alienating his friends, compromising himself, or fumbling a shot at happiness with the most confusing, adorable guy he's never met.",
3625,"Will Grayson, Will Grayson","This is an Alternate Cover Edition for ISBN-13: 978-0142418475 ; ISBN-10: 0142418471 Will Grayson meets Will Grayson. One cold night, in a most unlikely corner of Chicago, two strangers are about to cross paths. From that moment on, their world will collide and Iives intertwine. It's not that far from Evanston to Naperville, but Chiacago suburbanites Will Grayson and Will Grayson might as well live on different planets. When fate delivers them both to the same surprising crossroads, the Will Graysons find their lives overlapping and hurtling in new and unexpected directions. With a push from friends new and old - including the massive, and massively fabulous, Tiny Cooper, offensive lineman and musical theater auteur extraordinaire - Will and Will begin building toward respective romantic turns-of-heart and the epic production of history's most awesome high school musical.",
8303,"Every Day (Every Day, #1)","Every day a different body. Every day a different life. Every day in love with the same girl. There's never any warning about where it will be or who it will be. A has made peace with that, even established guidelines by which to live: Never get too attached. Avoid being noticed. Do not interfere. It's all fine until the morning that A wakes up in the body of Justin and meets Justin's girlfriend, Rhiannon. From that moment, the rules by which A has been living no longer apply. Because finally A has found someone he wants to be with--day in, day out, day after day.",
10019,The Disreputable History of Frankie Landau-Banks,"Frankie Landau-Banks, at age 16: No longer the kind of girl to take ""no"" for an answer and possibly a criminal mastermind. This is the story of how she got that way. Frankie Landau-Banks at age 14: Debate Club. Her father's ""bunny rabbit."" A mildly geeky girl attending a highly competitive boarding school. Frankie Landau-Banks at age 15: A knockout figure. A sharp tongue. A chip on her shoulder. And a gorgeous new senior boyfriend: the supremely goofy, word-obsessed Matthew Livingston. Frankie Landau-Banks. No longer the kind of girl to take ""no"" for an answer. Especially when ""no"" means she's excluded from her boyfriend's all-male secret society. Not when her ex-boyfriend shows up in the strangest of places. Not when she knows she's smarter than any of them. When she knows Matthew's lying to her. And when there are so many, many pranks to be done. Frankie Landau-Banks, at age 16: Possibly a criminal mastermind. This is the story of how she got that way.",
14981,Eleanor and Park,"Two misfits. One extraordinary love. Eleanor... Red hair, wrong clothes. Standing behind him until he turns his head. Lying beside him until he wakes up. Making everyone else seem drabber and flatter and never good enough...Eleanor. Park... He knows she'll love a song before he plays it for her. He laughs at her jokes before she ever gets to the punch line. There's a place on his chest, just below his throat, that makes her want to keep promises...Park. Set over the course of one school year, this is the story of two star-crossed sixteen-year-olds--smart enough to know that first love almost never lasts, but brave and desperate enough to try.",
18847,Only Ever Yours,"Where women are created for the pleasure of men, beauty is the first duty of every girl. In Louise O'Neill's world of Only Every Yours women are no longer born naturally, girls (called ""eves"") are raised in Schools and trained in the arts of pleasing men until they come of age. Freida and Isabel are best friends. Now, aged sixteen and in their final year, they expect to be selected as companions--wives to powerful men. All they have to do is ensure they stay in the top ten beautiful girls in their year. The alternatives--life as a concubine, or a chastity (teaching endless generations of girls)--are too horrible to contemplate. But as the intensity of final year takes hold, the pressure to be perfect mounts. Isabel starts to self-destruct, putting her beauty--her only asset--in peril. And then into this sealed female environment, the boys arrive, eager to choose a bride. Freida must fight for her future--even if it means betraying the only friend, the only love, she has ever known.",
19528,Dangerous Girls,"Elise is dead. And someone must pay. Anna, her boyfriend Tate, her bst friend Elise and a group of close friends set off on a debaucherous Spring Break trip to Aruba. But paradise soon turns into a living nightmare when Elise is brutally murdered. Soon Anna finds herself trapped in a foreign country, fighting for her freedom. As she awaits the judge's decree, it becomes clear that everyone is questioning her innocence. To the rest of the world, Anna isn't just guilty, but dangerous. And the truth is more shocking than you could ever imagine . . . 'The best teen thriller I've ever read' Wondrous Reads 'Told with the same ""who can I trust?"" tone of The Girl On the Train, Dangerous Girls is the perfect mystery' bustle.com 'I haven't wanted to talk about a book this much since Gone Girl' goodreads.com 'Tension to the very last page . . . a compulsively readable, hair-raising snapshot of [the] 21st-century' Kirkus Reviews",
28766,"Girl Online (Girl Online, #1)","GIRL ONLINE is the stunning debut romance novel by YouTube phenomenon Zoe Sugg, aka Zoella. I had no idea GirlOnline would take off the way it has - I can't believe I now have 5432 followers, thanks so much! - and the thought of opening up to you all about this is terrifying, but here goes... Penny has a secret. Under the alias GirlOnline, she blogs about school dramas, boys, her mad, whirlwind family - and the panic attacks she's suffered from lately. When things go from bad to worse, her family whisks her away to New York, where she meets the gorgeous, guitar-strumming Noah. Suddenly Penny is falling in love - and capturing every moment of it on her blog. But Noah has a secret too. One that threatens to ruin Penny's cover - and her closest friendship - forever.",
32281,The Strange and Beautiful Sorrows of Ava Lavender,"A 2015 William C. Morris YA Debut Award Finalist Magical realism, lyrical prose, and the pain and passion of human love haunt this hypnotic generational saga. Foolish love appears to be the Roux family birthright, an ominous forecast for its most recent progeny, Ava Lavender. Ava in all other ways a normal girl is born with the wings of a bird. In a quest to understand her peculiar disposition and a growing desire to fit in with her peers, sixteen-year old Ava ventures into the wider world, ill-prepared for what she might discover and naive to the twisted motives of others. Others like the pious Nathaniel Sorrows, who mistakes Ava for an angel and whose obsession with her grows until the night of the summer solstice celebration. That night, the skies open up, rain and feathers fill the air, and Ava s quest and her family s saga build to a devastating crescendo. First-time author Leslye Walton has constructed a layered and unforgettable mythology of what it means to be born with hearts that are tragically, exquisitely human. """,
33329,Panic (Panic #1),"From New York Times bestselling author Lauren Oliver comes an extraordinary novel of fear, friendship, courage, and hope. Panic began as so many things do in Carp, a dead-end town of 12,000 people in the middle of nowhere: because it was summer, and there was nothing else to do. Heather never thought she would compete in Panic, a legendary game played by graduating seniors, where the stakes are high and the payoff is even higher. She'd never thought of herself as fearless, the kind of person who would fight to stand out. But when she finds something, and someone, to fight for, she will discover that she is braver than she ever thought. Dodge has never been afraid of Panic. His secret will fuel him, and get him all the way through the game, he's sure of it. But what he doesn't know is that he's not the only one with a secret. Everyone has something to play for. For Heather and Dodge, the game will bring new alliances, unexpected revelations, and the possibility of first love for each of them--and the knowledge that sometimes the very things we fear are those we need the most. Already optioned by Universal Pictures in a major deal, this gritty, spellbinding novel captures both the raw energy of fear mixed with excitement, as well as the aching need to find a place to belong.",


Метрики получились низкие, но и надеяться на нормальное качество рекомендаций тут наверное и не стоило, попробуем улучшить. Модель предлагает много книг в рамках одной и той же серии, но честно говоря, не вижу объяснения, почему coverage получается настолько низким, постараемся сделать и его лучше в том числе

In [17]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp313-cp313-macosx_14_0_arm64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp313-cp313-macosx_14_0_arm64.whl (3.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m1.8 MB/s[0m  [33m0:00:01[0m eta [36m0:00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0


Гибридная рекомендательная система с взвешенным скорингом

Модель вычисляет итоговую релевантность товара для пользователя, объединяя рекомендаций из трех различных источников - с целью попробовать поднять coverage 

1. Итоговый скор $S_{final}$ для юзера $u$ и айтема $i$ формируется как взвешенная сумма нормализованных оценок

$$S_{final}(u, i) = w_{collab} \cdot \text{norm}(S_{collab}) + w_{content} \cdot \text{norm}(S_{content}) + w_{pop} \cdot \text{norm}(S_{pop})$$

2. Составляющие
    * Коллаборативная фильтрация ($S_{collab}$):SVD, обученный на матрице взаимодействий user-item. Вычисляется как скалярное произведение латентных векторов \(p_u \cdot q_i\) (тут только теплые айтемы)

    * Контентная фильтрация ($S_{content}$): усредняем контентные эмбеддинги книг с рейтингом, для ускорения используется FAISS (здесь все айтемы)

    * Наиболее популярные: скор на основе логарифма числа взаимодействий

In [14]:
import faiss 
from sklearn.preprocessing import minmax_scale

class MultiChannelRecommender:
    def __init__(self, n_components=128, random_state=42, weights=None):
        if weights is None:
            self.weights = {'collab': 1.0, 'content': 0.7, 'pop': 0.2}
        else:
            self.weights = weights
            
        self.svd = TruncatedSVD(n_components=n_components, random_state=random_state)
        self.trained = False
        self.user_map, self.warm_item_map, self.all_item_map = None, None, None
        self.user_embeddings, self.warm_item_embeddings = None, None
        self.user_content_profiles, self.all_item_embeddings = None, None
        self.pop_scores = None
        self.train_interactions = None
        self.all_inv_map_array = None
        self.warm_item_indices_in_all = None
        self.faiss_index = None 

    def fit(self, df: pl.DataFrame, items_df: pl.DataFrame):
        interactions_df = df.group_by("user_id").agg(pl.col("item_id"))
        self.train_interactions = dict(zip(interactions_df["user_id"].to_list(), interactions_df["item_id"].to_list()))

        warm_items = df["item_id"].unique().to_list()
        all_items = items_df["item_id"].unique().to_list()
        
        self.user_map = {uid: i for i, uid in enumerate(df["user_id"].unique())}
        self.warm_item_map = {iid: i for i, iid in enumerate(warm_items)}
        self.all_item_map = {iid: i for i, iid in enumerate(all_items)}
        self.all_inv_map_array = np.array(all_items)
        self.warm_item_indices_in_all = np.array([self.all_item_map[iid] for iid in warm_items])

        rows = df["user_id"].replace(self.user_map)
        cols = df["item_id"].replace(self.warm_item_map)
        values = df["rating"] if "rating" in df.columns else np.ones(len(df))
        interactions_matrix = csr_matrix((values, (rows, cols)), shape=(len(self.user_map), len(self.warm_item_map)))
        self.user_embeddings = self.svd.fit_transform(interactions_matrix)
        self.warm_item_embeddings = self.svd.components_.T

        item_emb_map = {row["item_id"]: row["embedding"] for row in items_df.iter_rows(named=True)}
        self.all_item_embeddings = np.array(items_df["embedding"].to_list()).astype('float32')
        
        # нормализуем эмбеддинги для FAISS (L2-норма)
        faiss.normalize_L2(self.all_item_embeddings)
        
        #FAISS индекс
        d = self.all_item_embeddings.shape[1]
        self.faiss_index = faiss.IndexFlatIP(d) 
        self.faiss_index.add(self.all_item_embeddings)
        
        self.user_content_profiles = np.zeros((len(self.user_map), d), dtype='float32')
        for user_id, user_idx in self.user_map.items():
            user_items = self.train_interactions.get(user_id, [])
            if user_items:
                embs = [item_emb_map[iid] for iid in user_items if iid in item_emb_map]
                if embs: self.user_content_profiles[user_idx] = np.mean(embs, axis=0)
        
        # нормализуем профили пользователей
        faiss.normalize_L2(self.user_content_profiles)
        
        pop_counts_df = df['item_id'].value_counts()
        pop_counts = dict(zip(pop_counts_df["item_id"].to_list(), pop_counts_df["count"].to_list()))
        self.pop_scores = np.log1p([pop_counts.get(iid, 0) for iid in all_items]).astype('float32')

        self.trained = True

    def predict(self, df: pl.DataFrame, topn: int = 10, batch_size: int = 2048) -> list[np.ndarray]:
        test_user_ids = df["user_id"].unique().to_list()
        final_preds = [np.array([])] * len(test_user_ids)
        
        user_jobs = [(i, self.user_map.get(uid)) for i, uid in enumerate(test_user_ids)]
        known_user_jobs = [(orig_idx, user_idx) for orig_idx, user_idx in user_jobs if user_idx is not None]

        if not known_user_jobs: return final_preds
        
        original_indices, known_user_indices = zip(*known_user_jobs)
        for i in tqdm(range(0, len(known_user_indices), batch_size), desc="Predicting in batches"):
            batch_user_indices = list(known_user_indices[i : i + batch_size])
            batch_original_indices = list(original_indices[i : i + batch_size])
            batch_size_current = len(batch_user_indices)
            
            collab_scores = self.user_embeddings[batch_user_indices] @ self.warm_item_embeddings.T
            
            #кандидат и их оценки от FAISS
            profiles = self.user_content_profiles[batch_user_indices]
            content_scores_raw, content_indices = self.faiss_index.search(profiles, 50)
            minmax_scale(collab_scores, axis=1, copy=False)
            pop_scores_norm = minmax_scale(self.pop_scores)
            
            final_scores = np.zeros((batch_size_current, len(self.all_item_map)))
            rows = np.arange(batch_size_current)[:, np.newaxis]
            final_scores[rows, content_indices] = self.weights['content'] * content_scores_raw

            #добавляем остальные оценки
            final_scores += self.weights['pop'] * pop_scores_norm
            final_scores[:, self.warm_item_indices_in_all] += self.weights['collab'] * collab_scores

            batch_user_ids = [test_user_ids[orig_idx] for orig_idx in batch_original_indices]
            for j, user_id in enumerate(batch_user_ids):
                seen_items = self.train_interactions.get(user_id, [])
                if seen_items:
                    seen_indices = [self.all_item_map[iid] for iid in seen_items if iid in self.all_item_map]
                    final_scores[j, seen_indices] = -np.inf

            top_indices = np.argpartition(-final_scores, topn, axis=1)[:, :topn]
            recs = self.all_inv_map_array[top_indices]
            
            top_scores = final_scores[np.arange(batch_size_current)[:, None], top_indices]
            sorted_indices = np.argsort(-top_scores, axis=1)
            final_recs = recs[np.arange(batch_size_current)[:, None], sorted_indices]

            for j, orig_idx in enumerate(batch_original_indices):
                final_preds[orig_idx] = final_recs[j]
                
        return final_preds

mc_rec = MultiChannelRecommender(n_components=128)
mc_rec.fit(train, books)

test_users_df = test.select("user_id").unique()
mc_preds = mc_rec.predict(test_users_df)

preds_df = pl.DataFrame({
    "user_id": test_users_df["user_id"],
    "mc_recs": pl.Series(mc_preds)
})

test = test.join(preds_df, on="user_id", how="left")

print(test.head())
evaluate_recommender_modified(df=test, model_preds_col="mc_recs")

Predicting in batches: 100%|██████████| 91/91 [06:18<00:00,  4.16s/it]


shape: (5, 6)
┌────────────────┬────────────────┬────────────────┬───────────────┬───────────────┬───────────────┐
│ user_id        ┆ item_id        ┆ random_recs    ┆ toppopular_re ┆ item_knn_recs ┆ mc_recs       │
│ ---            ┆ ---            ┆ ---            ┆ cs            ┆ ---           ┆ ---           │
│ str            ┆ list[i64]      ┆ array[i64, 10] ┆ ---           ┆ list[i64]     ┆ array[i64,    │
│                ┆                ┆                ┆ array[i64,    ┆               ┆ 10]           │
│                ┆                ┆                ┆ 10]           ┆               ┆               │
╞════════════════╪════════════════╪════════════════╪═══════════════╪═══════════════╪═══════════════╡
│ 00000377eea480 ┆ [13252]        ┆ [21638, 33394, ┆ [4058, 15514, ┆ [17956,       ┆ [17311,       │
│ 21d3002730d56a ┆                ┆ … 13087]       ┆ … 13159]      ┆ 12482, …      ┆ 18150, …      │
│ ca…            ┆                ┆                ┆               ┆ 31641]  

{'ndcg@10': 0.014,
 'recall@10': 0.009,
 'map@10': 0.006,
 'serendipity@10': 0.013,
 'coverage': 0.254}

In [15]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["mc_recs"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
8092,Tiger Lily,"Before Peter Pan belonged to Wendy, he belonged to the girl with the crow feather in her hair. . . . Fifteen-year-old Tiger Lily doesn't believe in love stories or happy endings. Then she meets the alluring teenage Peter Pan in the forbidden woods of Neverland and immediately falls under his spell. Peter is unlike anyone she's ever known. Impetuous and brave, he both scares and enthralls her. As the leader of the Lost Boys, the most fearsome of Neverland's inhabitants, Peter is an unthinkable match for Tiger Lily. Soon, she is risking everything--her family, her future--to be with him. When she is faced with marriage to a terrible man in her own tribe, she must choose between the life she's always known and running away to an uncertain future with Peter. With enemies threatening to tear them apart, the lovers seem doomed. But it's the arrival of Wendy Darling, an English girl who's everything Tiger Lily is not, that leads Tiger Lily to discover that the most dangerous enemies can live inside even the most loyal and loving heart. From the New York Times bestselling author of Peaches comes a magical and bewitching story of the romance between a fearless heroine and the boy who wouldn't grow up.",
8303,"Every Day (Every Day, #1)","Every day a different body. Every day a different life. Every day in love with the same girl. There's never any warning about where it will be or who it will be. A has made peace with that, even established guidelines by which to live: Never get too attached. Avoid being noticed. Do not interfere. It's all fine until the morning that A wakes up in the body of Justin and meets Justin's girlfriend, Rhiannon. From that moment, the rules by which A has been living no longer apply. Because finally A has found someone he wants to be with--day in, day out, day after day.",
13726,"Library of Souls (Miss Peregrine’s Peculiar Children, #3)","A New York Times #1 best seller A boy with extraordinary powers. An army of deadly monsters. An epic battle for the future of peculiardom. The adventure that began with Miss Peregrine's Home for Peculiar Childrenand continued in Hollow Citycomes to a thrilling conclusion with Library of Souls. As the story opens, sixteen-year-old Jacob discovers a powerful new ability, and soon he's diving through history to rescue his peculiar companions from a heavily guarded fortress. Accompanying Jacob on his journey are Emma Bloom, a girl with fire at her fingertips, and Addison MacHenry, a dog with a nose for sniffing out lost children. They'll travel from modern-day London to the labyrinthine alleys of Devil's Acre, the most wretched slum in all of Victorian England. It's a place where the fate of peculiar children everywhere will be decided once and for all. Like its predecessors, Library of Soulsblends thrilling fantasy with never-before-published vintage photography to create a one-of-a-kind reading experience. From the Hardcover edition.",
16498,"The Wrath and the Dawn (The Wrath and the Dawn, #1)","This is an alternate cover edition for . One Life to One Dawn. In a land ruled by a murderous boy-king, each dawn brings heartache to a new family. Khalid, the eighteen-year-old Caliph of Khorasan, is a monster. Each night he takes a new bride only to have a silk cord wrapped around her throat come morning. When sixteen-year-old Shahrzad's dearest friend falls victim to Khalid, Shahrzad vows vengeance and volunteers to be his next bride. Shahrzad is determined not only to stay alive, but to end the caliph's reign of terror once and for all. Night after night, Shahrzad beguiles Khalid, weaving stories that enchant, ensuring her survival, though she knows each dawn could be her last. But something she never expected begins to happen: Khalid is nothing like what she'd imagined him to be. This monster is a boy with a tormented heart. Incredibly, Shahrzad finds herself falling in love. How is this possible? It's an unforgivable betrayal. Still, Shahrzad has come to understand all is not as it seems in this palace of marble and stone. She resolves to uncover whatever secrets lurk and, despite her love, be ready to take Khalid's life as retribution for the many lives he's stolen. Can their love survive this world of stories and secrets? Inspired by A Thousand and One Nights, The Wrath and the Dawnis a sumptuous and enthralling read from beginning to end.",
18847,Only Ever Yours,"Where women are created for the pleasure of men, beauty is the first duty of every girl. In Louise O'Neill's world of Only Every Yours women are no longer born naturally, girls (called ""eves"") are raised in Schools and trained in the arts of pleasing men until they come of age. Freida and Isabel are best friends. Now, aged sixteen and in their final year, they expect to be selected as companions--wives to powerful men. All they have to do is ensure they stay in the top ten beautiful girls in their year. The alternatives--life as a concubine, or a chastity (teaching endless generations of girls)--are too horrible to contemplate. But as the intensity of final year takes hold, the pressure to be perfect mounts. Isabel starts to self-destruct, putting her beauty--her only asset--in peril. And then into this sealed female environment, the boys arrive, eager to choose a bride. Freida must fight for her future--even if it means betraying the only friend, the only love, she has ever known.",
23906,"The Naturals (The Naturals, #1)","Seventeen-year-old Cassie is a natural at reading people. Piecing together the tiniest details, she can tell you who you are and what you want. But it's not a skill that she's ever taken seriously. That is, until the FBI come knocking: they've begun a classified program that uses exceptional teenagers to crack infamous cold cases, and they need Cassie. What Cassie doesn't realize is that there's more at risk than a few unsolved homicides--especially when she's sent to live with a group of teens whose gifts are as unusual as her own. Sarcastic, privileged Michael has a knack for reading emotions, which he uses to get inside Cassie's head--and under her skin. Brooding Dean shares Cassie's gift for profiling, but keeps her at arm's length. Soon, it becomes clear that no one in the Naturals program is what they seem. And when a new killer strikes, danger looms closer than Cassie could ever have imagined. Caught in a lethal game of cat and mouse with a killer, the Naturals are going to have to use all of their gifts just to survive.",
26228,"These Broken Stars (Starbound, #1)","It's a night like any other on board the Icarus. Then, catastrophe strikes: the massive luxury spaceliner is yanked out of hyperspace and plummets to the nearest planet. Lilac LaRoux and Tarver Merendsen survive. And they seem to be alone. Lilac is the daughter of the richest man in the universe. Tarver comes from nothing, a young war hero who learned long ago that girls like Lilac are more trouble than they're worth. But with only each other to rely on, Lilac and Tarver must work together, making a tortuous journey across the eerie, deserted terrain to seek help. Then, against all odds, Lilac and Tarver find a strange blessing in the tragedy that has thrown them into each other's arms. Without the hope of a future together in their own world, they begin to wonder - would they be better off staying in this place forever? Everything changes when they uncover the truth behind the chilling whispers that haunt their every step. Lilac and Tarver may find a way off this planet. But they won't be the same people who landed on it. The first in a sweeping science fiction trilogy, These Broken Stars is a timeless love story about hope and survival in the face of unthinkable odds.",
27003,"Six of Crows (Six of Crows, #1)",Ketterdam: a bustling hub of international trade where anything can be had for the right price - and no one knows that better than criminal prodigy Kaz Brekker. Kaz is offered a chance at a deadly heist that could make him rich beyond his wildest dreams. But he can't pull it off alone. A convict with a thirst for revenge. A sharpshooter who can't walk away from a wager. A runaway with a privileged past. A spy known as the Wraith. A Heartrender using her magic to survive the slums. A thief with a gift for unlikely escapes. Six dangerous outcasts. One impossible heist. Kaz's crew is the only thing that might stand between the world and destruction - if they don't kill each other first. Listening Length: 15 hours and 25 minutes,
28589,"Cress (The Lunar Chronicles, #3)","The #1 New York Times Bestselling Series! In this third book in Marissa Meyer's bestselling Lunar Chronicles series, Cinder and Captain Thorne are fugitives on the run, now with Scarlet and Wolf in tow. Together, they're plotting to overthrow Queen Levana and prevent her army from invading Earth. Their best hope lies with Cress, a girl trapped on a satellite since childhood who's only ever had her netscreens as company. All that screen time has made Cress an excellent hacker. Unfortunately, she's being forced to work for Queen Levana, and she's just received orders to track down Cinder and her handsome accomplice. When a daring rescue of Cress goes awry, the group is splintered. Cress finally has her freedom, but it comes at a higher price than she'd ever expected. Meanwhile, Queen Levana will let nothing prevent her marriage to Emperor Kai, especially the cyborg mechanic. Cress, Scarlet, and Cinder may not have signed up to save the world, but they may be the only hope the world has.",
33273,The Rest of Us Just Live Here,"Six starred reviews! A new YA novel from novelist Patrick Ness, author of the Carnegie Medal- and Kate Greenaway Medal-winning A Monster Callsand the critically acclaimed Chaos Walking trilogy, The Rest of Us Just Live Hereis a bold and irreverent novel that powerfully reminds us that there are many different types of remarkable. What if you aren't the Chosen One? The one who's supposed to fight the zombies, or the soul-eating ghosts, or whatever the heck this new thing is, with the blue lights and the death? What if you're like Mikey? Who just wants to graduate and go to prom and maybe finally work up the courage to ask Henna out before someone goes and blows up the high school. Again. Because sometimes there are problems bigger than this week's end of the world, and sometimes you just have to find the extraordinary in your ordinary life. Even if your best friend is worshipped by mountain lions. ALA Best Fiction for Young Adults * Cooperative Children's Book Center CCBC Choice * Michael Printz Award shortlist * Kirkus Best Book of the Year * VOYAPerfect Ten * NYPL Top Ten Best Books of the Year for Teens * Chicago Public Library Best Teen Books of the Year * Publishers Marketplace Buzz Books * ABC Best Books for Children * Bank Street Best Books List",


In [17]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["mc_recs"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
1166,"Snow Like Ashes (Snow Like Ashes, #1)","A heartbroken girl. A fierce warrior. A hero in the making. Sixteen years ago the Kingdom of Winter was conquered and its citizens enslaved, leaving them without magic or a monarch. Now, the Winterians' only hope for freedom is the eight survivors who managed to escape, and who have been waiting for the opportunity to steal back Winter's magic and rebuild the kingdom ever since. Orphaned as an infant during Winter's defeat, Meira has lived her whole life as a refugee, raised by the Winterians' general, Sir. Training to be a warrior--and desperately in love with her best friend, and future king, Mather -- she would do anything to help her kingdom rise to power again. So when scouts discover the location of the ancient locket that can restore Winter's magic, Meira decides to go after it herself. Finally, she's scaling towers, fighting enemy soldiers, and serving her kingdom just as she's always dreamed she would. But the mission doesn't go as planned, and Meira soon finds herself thrust into a world of evil magic and dangerous politics - and ultimately comes to realize that her destiny is not, never has been, her own. Sara Raasch's debut fantasy is a lightning-fast tale of loyalty, love, and finding one's destiny.",
5237,"The Revenge of Seven (Lorien Legacies, #5)","The fifth book of the #1 New York Timesbestselling I Am Number Four series! The Garde have suffered an unbearable loss. Number Five has betrayed them. Eight is gone forever. Ella has been kidnapped. The others are now scattered. In Chicago, John makes the unlikeliest of allies: a Mogadorian named Adam, who has switched sides. Adam knows the Mogs secrets, and he knows where to hit them: their command base near Washington, DC. But during the assault, John learns he and the Garde might be too late. The Mogadorians have commenced their ultimate invasion plans. Meanwhile, Six, Nine, and Marina make their way through the Florida Everglades, hot on the trail of the traitorous Five. With the development of a new Legacy, Marina finally has the power to fight back--if her thirst for revenge doesn't consume her first. The Garde may be broken and divided once more, but they will not be defeated. As long as one still stands, the battle for Earth's survival is not lost. Michael Bay, director of Transformers, raved: ""Number Four is a hero for this generation."" This epic young adult series is perfect for fans of action-packed science fiction like The Fifth Wave series by Rick Yancey, The Maze Runner series by James Dashner, and Orson Scott Card's Ender's Game. The battle for Earth's survival wages on. Read the rest of the series: #6: The Fate of Ten, and #7: United as One. Don't miss the first book in the brand-new I Am Number Four spin-off series: Generation One.",
13738,"Half Bad (The Half Bad Trilogy, #1)","approx 8h 36m Wanted by no one. Hunted by everyone. Sixteen-year-old Nathan lives in a cage: beaten, shackled, trained to kill. In a modern-day England where two warring factions of witches live amongst humans, Nathan is an abomination, the illegitimate son of the world's most terrifying and violent witch, Marcus. Nathan's only hope for survival is to escape his captors, track down Marcus, and receive the three gifts that will bring him into his own magical powers--before it's too late. But how can Nathan find his father when there is no one safe to trust, not even family, not even the girl he loves? Half Badis an international sensation and the start of a brilliant trilogy: a gripping tale of alienation and the indomitable will to survive.",
13774,"Storm Siren (Storm Siren, #1)","""I raise my chin as the buyers stare. Yes. Look. You don't want me. Because, eventually, accidentally, I will destroy you."" In a world at war, a slave girl's lethal curse could become one kingdom's weapon of salvation. If the curse--and the girl--can be controlled. As a slave in the war-weary kingdom of Faelen, seventeen-year-old Nym isn't merely devoid of rights, her Elemental kind are only born male and always killed at birth -- meaning, she shouldn't even exist. Standing on the auction block beneath smoke-drenched mountains, Nym faces her fifteenth sell. But when her hood is removed and her storm-summoning killing curse revealed, Nym is snatched up by a court advisor and given a choice: be trained as the weapon Faelen needs to win the war, or be killed. Choosing the former, Nym is unleashed into a world of politics, bizarre parties, and rumors of an evil more sinister than she's being prepared to fight . . . not to mention the handsome trainer whose dark secrets lie behind a mysterious ability to calm every lightning strike she summons. But what if she doesn't want to be the weapon they've all been waiting for? Set in a beautifully eclectic world of suspicion, super abilities, and monsters, Storm Siren is a story of power. And whoever controls that power will win.",
13817,"This Shattered World (Starbound, #2)","Jubilee Chase and Flynn Cormac should never have met. Lee is captain of the forces sent to Avon to crush the terraformed planet's rebellious colonists, but she has her own reasons for hating the insurgents. Rebellion is in Flynn's blood. His sister died in the original uprising against the powerful corporate conglomerate that rules Avon with an iron fist. These corporations make their fortune by terraforming uninhabitable planets across the universe and recruiting colonists to make the planets livable, with the promise of a better life for their children. But they never fulfilled their promise on Avon, and decades later, Flynn is leading the rebellion. Desperate for any advantage against the military occupying his home, Flynn does the only thing that makes sense when he and Lee cross paths: he returns to base with her as prisoner. But as his fellow rebels prepare to execute this tough-talking girl with nerves of steel, Flynn makes another choice that will change him forever. He and Lee escape base together, caught between two sides in a senseless war. The stunning second novel in the Starbound trilogy is an unforgettable story of love and forgiveness in a world torn apart by war.",
14656,"Unspoken (The Lynburn Legacy, #1)","A modern, magical twist on the Gothic Romance and Girl Detective genres, this book will appeal to fans of both Beautiful Creaturesand the Mortal Instrumentsseries. Reviewers have praised the take-charge heroine and the spellbinding romance. Bound together. Worlds apart. Kami Glass is in love with someone she's never met--a boy she's talked to in her head since she was born. This has made her an outsider in the sleepy English town of Sorry-in-the-Vale, but she has learned ways to turn that to her advantage. Her life seems to be in order, until disturbing events begin to occur. There has been screaming in the woods and the manor overlooking the town has lit up for the first time in 10 years. . . . The Lynburn family, who ruled the town a generation ago and who all left without warning, have returned. Now Kami can see that the town she has known and loved all her life is hiding a multitude of secrets--and a murderer. The key to it all just might be the boy in her head. The boy she thought was imaginary is real, and definitely and deliciously dangerous. ""A sparklingfantasy that will make you laughand break your heart."" --Cassandra Clare, New York Timesbestselling author ""A darkly funny, deliciously thrilling Gothic."" --Kelley Armstrong, New York Timesbestselling author ""Readers will laugh, shiver, and maybe even swoonover this modern Gothic novel."" --Melissa Marr, New York Timesbestselling author ""Breathtaking--a compulsive, rocketing read.""--Tamora Pierce, New York Timesbestselling author ""Captures the reader with true magic.""--Esther Friesner, author of Nobody's Princess ""A laugh-out-loud delight."" --Publishers Weekly From the Hardcover edition.",
27796,"The Retribution of Mara Dyer (Mara Dyer, #3)","It had to end sometime, but Mara had no idea it would end like this. Experience the mind-blowing conclusion to the New York Times bestselling Mara Dyer trilogy. Mara Dyer wants to believe there's more to the lies she's been told. There is. She doesn't stop to think about where her quest for the truth might lead. She should. She never had to imagine how far she would go for vengeance. She will now. Loyalties are betrayed, guilt and innocence tangle, and fate and chance collide in this shocking conclusion to Mara Dyer's story. Retribution has arrived.",
28505,"Dreams of Gods & Monsters (Daughter of Smoke & Bone, #3)","The incredible final novel in Laini Taylor's breathtakingly imaginative Daughter of Smoke and Bone series, DREAMS OF GODS AND MONSTERS. By way of a staggering deception, Karou has taken control of the chimaera rebellion and is intent on steering its course away from dead-end vengeance. The future rests on her, if there can even be a future for the chimaera in war-ravaged Eretz. Common enemy, common cause. When Jael's brutal seraph army trespasses into the human world, the unthinkable becomes essential, and Karou and Akiva must ally their enemy armies against the threat. It is a twisted version of their long-ago dream, and they begin to hope that it might forge a way forward for their people. And, perhaps, for themselves. Toward a new way of living, and maybe even love. But there are bigger threats than Jael in the offing. A vicious queen is hunting Akiva, and, in the skies of Eretz ... something is happening. Massive stains are spreading like bruises from horizon to horizon; the great winged stormhunters are gathering as if summoned, ceaselessly circling, and a deep sense of wrong pervades the world. What power can bruise the sky From the streets of Rome to the caves of the Kirin and beyond, humans, chimaera and seraphim will fight, strive, love, and die in an epic theater that transcends good and evil, right and wrong, friend and enemy. At the very barriers of space and time, what do gods and monsters dream of? And does anything else matter?",
29065,"The Queen of Zombie Hearts (White Rabbit Chronicles, #3)","We're all mad here. I thought I had nothing left to give. I thought wrong. They started the war. Now I will end it. In the stunning conclusion to the wildly popular White Rabbit Chronicles, Alice 'Ali' Bell thinks the worst is behind her. She's ready to take the next step with boyfriend Cole Holland, the leader of the zombie slayers...until Anima Industries, the agency controlling the zombies, launches a sneak attack, killing four of her friends. It's then she realizes that humans can be more dangerous than monsters...and the worst has only begun. As the surviving slayers prepare for war, Ali discovers she, too, can control the zombies...and she isn't the girl she thought she was. She's connected to the woman responsible for killing and turning Cole's mother. How can their relationship endure? As secrets come to light, and more slayers are taken or killed, Ali will fight harder than ever to bring down Anima even sacrificing her own life for those she loves. ""I enjoyed every moment of this book... If you want a completely new twist on zombies, then pick up the White Rabbit Chronicles. You won't be sorry!"" USA Today's Happy Ever After blog on Alice in Zombieland",
31381,"Shatter Me (Shatter Me, #1)","This is an alternate cover edition for ASIN B00526VVN2. I have a curse I have a gift I am a monster I'm more than human My touch is lethal My touch is power I am their weapon I will fight back Juliette hasn't touched anyone in exactly 264 days. The last time she did, it was an accident, but The Reestablishment locked her up for murder. No one knows why Juliette's touch is fatal. As long as she doesn't hurt anyone else, no one really cares. The world is too busy crumbling to pieces to pay attention to a 17-year-old girl. Diseases are destroying the population, food is hard to find, birds don't fly anymore, and the clouds are the wrong color. The Reestablishment said their way was the only way to fix things, so they threw Juliette in a cell. Now so many people are dead that the survivors are whispering war - and The Reestablishment has changed its mind. Maybe Juliette is more than a tortured soul stuffed into a poisonous body. Maybe she's exactly what they need right now. Juliette has to make a choice: Be a weapon. Or be a warrior.",


Пока, мне кажется, наиболее удачное сочетание метрик получилось - coverage стал сильно более адекватным, по recall, precision, ndcg тоже улучшили обычный SVD, но в меньшей степени. Глазами видно, что рекомендации разнообразные и вроде бы адекватные, хотя и разрозненные и с уклоном в мистику/детективы/триллеры. Попробую чуть подкрутить веса

In [18]:
mc_rec = MultiChannelRecommender(n_components=128, weights={'collab': 0.8, 'content': 0.8, 'pop': 0.3})
mc_rec.fit(train, books)

test_users_df = test.select("user_id").unique()
mc_preds = mc_rec.predict(test_users_df)

preds_df = pl.DataFrame({
    "user_id": test_users_df["user_id"],
    "mc_recs_2": pl.Series(mc_preds)
})

test = test.join(preds_df, on="user_id", how="left")

print(test.head())
evaluate_recommender_modified(df=test, model_preds_col="mc_recs_2")

Predicting in batches: 100%|██████████| 91/91 [10:41<00:00,  7.05s/it]


shape: (5, 7)
┌──────────────┬─────────────┬─────────────┬─────────────┬─────────────┬─────────────┬─────────────┐
│ user_id      ┆ item_id     ┆ random_recs ┆ toppopular_ ┆ item_knn_re ┆ mc_recs     ┆ mc_recs_2   │
│ ---          ┆ ---         ┆ ---         ┆ recs        ┆ cs          ┆ ---         ┆ ---         │
│ str          ┆ list[i64]   ┆ array[i64,  ┆ ---         ┆ ---         ┆ array[i64,  ┆ array[i64,  │
│              ┆             ┆ 10]         ┆ array[i64,  ┆ list[i64]   ┆ 10]         ┆ 10]         │
│              ┆             ┆             ┆ 10]         ┆             ┆             ┆             │
╞══════════════╪═════════════╪═════════════╪═════════════╪═════════════╪═════════════╪═════════════╡
│ 00000377eea4 ┆ [13252]     ┆ [21638,     ┆ [4058,      ┆ [17956,     ┆ [17311,     ┆ [1164,      │
│ 8021d3002730 ┆             ┆ 33394, …    ┆ 15514, …    ┆ 12482, …    ┆ 18150, …    ┆ 21429, …    │
│ d56aca…      ┆             ┆ 13087]      ┆ 13159]      ┆ 31641]      ┆ 1831

{'ndcg@10': 0.015,
 'recall@10': 0.009,
 'map@10': 0.007,
 'serendipity@10': 0.014,
 'coverage': 0.246}

В целом, примерно то же самое

In [24]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["mc_recs_2"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
2804,"The Unbecoming of Mara Dyer (Mara Dyer, #1)","Mara Dyer doesn't think life can get any stranger. She wakes from a coma in hospital with no memory of how she got there or of the bizarre accident that caused the deaths of her best friends and her boyfriend, yet left her mysteriously unharmed. The doctors suggest that starting over in a new city, a new school, would be good for her and just to let the memories gradually come back on their own. But Mara's new start is anything but comforting. She sees the faces of her dead friends everywhere, and when she suddenly begins to see other people's deaths right before they happen, Mara wonders whether she's going crazy! And if dealing with all this wasn't enough, Noah Shaw, the most beautiful boy she has ever seen can't seem to leave her alone... but as her life unravels around her, Mara can't help but wonder if Noah has another agenda altogether...",
9454,"Ruin and Rising (The Grisha, #3)","The capital has fallen. The Darkling rules Ravka from his shadow throne. Now the nation's fate rests with a broken Sun Summoner, a disgraced tracker, and the shattered remnants of a once-great magical army. Deep in an ancient network of tunnels and caverns, a weakened Alina must submit to the dubious protection of the Apparat and the zealots who worship her as a Saint. Yet her plans lie elsewhere, with the hunt for the elusive firebird and the hope that an outlaw prince still survives. Alina will have to forge new alliances and put aside old rivalries as she and Mal race to find the last of Morozova's amplifiers. But as she begins to unravel the Darkling's secrets, she reveals a past that will forever alter her understanding of the bond they share and the power she wields. The firebird is the one thing that stands between Ravka and destruction--and claiming it could cost Alina the very future she's fighting for. Ruin and Risingis the thrilling final installment in Leigh Bardugo's Grisha Trilogy.",
16096,"Siege and Storm (The Grisha, #2)","The second book in The Grisha Trilogy by the #1 New York Timesbestselling author of Six of Crows. Darkness never dies. Hunted across the True Sea, haunted by the lives she took on the Fold, Alina must try to make a life with Mal in an unfamiliar land, all while keeping her identity as the Sun Summoner a secret. But she can't outrun her past or her destiny for long. The Darkling has emerged from the Shadow Fold with a terrifying new power and a dangerous plan that will test the very boundaries of the natural world. With the help of a notorious privateer, Alina returns to the country she abandoned, determined to fight the forces gathering against Ravka. But as her power grows, Alina slips deeper into the Darkling's game of forbidden magic, and farther away from Mal. Somehow, she will have to choose between her country, her power, and the love she always thought would guide her--or risk losing everything to the oncoming storm. Siege and Storm is the second book in The Grisha Trilogy by Leigh Bardugo. Book one, Shadow and Bone, is a New York Timesbestseller, and book 3, Ruin and Rising, is an Amazon Best Book of the Year and a USA Todaybestseller. This title has Common Core connections, and this deluxe paperback edition features bonus materials like an interview with Leigh Bardugo, a bonus short story, ""The Tailor,"" and more.",
17256,"Shadow and Bone (The Grisha, #1)","Alina Starkov doesn't expect much from life. Orphaned by the Border Wars, the one thing she could rely on was her best friend and fellow refugee, Mal. And lately not even that seems certain. Drafted into the army of their war-torn homeland, they're sent on a dangerous mission into the Fold, a swath of unnatural darkness crawling with monsters who feast on human flesh. When their convoy is attacked, all seems lost until Alina reveals a dormant power that not even she knew existed. Ripped from everything she knows, she is whisked away to the royal court to be trained as a member of the Grisha, magical elite led by the mysterious Darkling. He believes she is the answer the people have been waiting for: the one person with the power to destroy the Fold. Swept up in a world of luxury and illusion, envied as the Darkling's favorite, Alina struggles to fit into her new life without Mal by her side. But as the threat to the kingdom mounts, Alina discovers a secret that sets her on a collision course with the most powerful forces in the kingdom. Now only her past can save her... and only she can save the future. An unabridged recording on 8 CDs (8 hours, 55 minutes).",
26228,"These Broken Stars (Starbound, #1)","It's a night like any other on board the Icarus. Then, catastrophe strikes: the massive luxury spaceliner is yanked out of hyperspace and plummets to the nearest planet. Lilac LaRoux and Tarver Merendsen survive. And they seem to be alone. Lilac is the daughter of the richest man in the universe. Tarver comes from nothing, a young war hero who learned long ago that girls like Lilac are more trouble than they're worth. But with only each other to rely on, Lilac and Tarver must work together, making a tortuous journey across the eerie, deserted terrain to seek help. Then, against all odds, Lilac and Tarver find a strange blessing in the tragedy that has thrown them into each other's arms. Without the hope of a future together in their own world, they begin to wonder - would they be better off staying in this place forever? Everything changes when they uncover the truth behind the chilling whispers that haunt their every step. Lilac and Tarver may find a way off this planet. But they won't be the same people who landed on it. The first in a sweeping science fiction trilogy, These Broken Stars is a timeless love story about hope and survival in the face of unthinkable odds.",
26920,"The Kiss of Deception (The Remnant Chronicles, #1)","A princess must find her place in a reborn world. She flees on her wedding day. She steals ancient documents from the Chancellor's secret collection. She is pursued by bounty hunters sent by her own father. She is Princess Lia, seventeen, First Daughter of the House of Morrighan. The Kingdom of Morrighan is steeped in tradition and the stories of a bygone world, but some traditions Lia can't abide. Like having to marry someone she's never met to secure a political alliance. Fed up and ready for a new life, Lia flees to a distant village on the morning of her wedding. She settles in among the common folk, intrigued when two mysterious and handsome strangers arrive--and unaware that one is the jilted prince and the other an assassin sent to kill her. Deceptions swirl and Lia finds herself on the brink of unlocking perilous secrets--secrets that may unravel her world--even as she feels herself falling in love.",
27796,"The Retribution of Mara Dyer (Mara Dyer, #3)","It had to end sometime, but Mara had no idea it would end like this. Experience the mind-blowing conclusion to the New York Times bestselling Mara Dyer trilogy. Mara Dyer wants to believe there's more to the lies she's been told. There is. She doesn't stop to think about where her quest for the truth might lead. She should. She never had to imagine how far she would go for vengeance. She will now. Loyalties are betrayed, guilt and innocence tangle, and fate and chance collide in this shocking conclusion to Mara Dyer's story. Retribution has arrived.",
28025,"The Assassin's Blade (Throne of Glass, #0.1-0.5)","Discover where Celaena Sardothien's thrilling saga began Celaena Sardothien is her kingdom's most feared assassin. Though she works for the powerful and ruthless Assassin's Guild, Celaena yields to no one and trusts only her fellow killer-for-hire, Sam. When Celaena's scheming master, Arobynn Hamel, dispatches her on missions that take her from remote islands to hostile deserts, she finds herself acting independently of his wishes--and questioning her own allegiance. Along the way, she makes friends and enemies alike, and discovers that she feels far more for Sam than just friendship. But by defying Arobynn's orders, Celaena risks unimaginable punishment, and with Sam by her side, he is in danger, too. They will have to risk it all if they hope to escape Arobynn's clutches--and if they fail, they'll lose not just a chance at freedom, but their lives... A prequel to Throne of Glass, this collection of five novellas offers listeners a deeper look into the history of this cunning assassin and her enthralling--and deadly--world. Included in this volume: The Assassin and the Pirate Lord The Assassin and the Healer The Assassin and the Desert The Assassin and the Underworld The Assassin and the Empire",
29366,"Monsters of Men (Chaos Walking, #3)","In the riveting conclusion to the acclaimed dystopian series, a boy and girl caught in the chaos of war face devastating choices that will decide the fate of a world. As a world-ending war surges around them, Todd and Viola face monstrous decisions. The indigenous Spackle, thinking and acting as one, have mobilized to avenge their murdered people. Ruthless human leaders prepare to defend their factions at all costs, even as a convoy of new settlers approaches. And as the ceaseless Noise lays all thoughts bare, the projected will of the few threatens to overwhelm the desperate desire of the many. The consequences of each action, each word, are unspeakably vast: To follow a tyrant or a terrorist? To save the life of the one you love most, or thousands of strangers? To believe in redemption, or assume it is lost? Becoming adults amid the turmoil, Todd and Viola question all they have known, racing through horror and outrage toward a shocking finale.",
32892,"The Crown of Embers (Fire and Thorns, #2)","The second book in Rae Carson's award-winning The Girl of Fire and Thorns fantasy trilogy, perfect for fans of Game of Thronesand Kristin Cashore. Tamora Pierce called the first book, The Girl of Fire and Thorns, ""A unique and engrossing read!"" A seventeen-year-old princess turned war queen faces sorcery, adventure, untold power, and romance as she fulfills her epic destiny. In The Girl of Fire and Thorns, Elisa won the war. She saved her kingdom. But no one prepared her for how hard it is to recover from a battle, or to rule a people who still don't trust her. She's still fighting--against assassination attempts and more--and her enemies lie both outside her court and within it. So Elisa will cross the ocean in search of the perilous, uncharted, and mythical source of the Godstone's power. With her go a one-eyed warrior, a loyal friend, an enemy defector, and the man she is falling in love with. A breathtaking, romantic, and dangerous second volume to Rae Carson's ambitious trilogy. Cinda Williams Chima proclaimed about the first book, The Girl of Fire and Thorns, ""I LOVED this book!"" and Veronica Roth agreed, saying, ""Definitely recommended.""",


In [25]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["mc_recs_2"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
3395,"Twilight / Life and Death (Twilight, #1, 1.75)","WHEN ISABELLA SWAN MOVES TO THEgloomy town of Forks and meets the mysterious, alluring Edward Cullen, her life takes a thrilling and terrifying turn. With his porcelain skin, golden eyes, mesmerizing voice, and supernatural gifts, Edward is both irresistible and enigmatic. WHAT BELLA DOESN'T REALIZE IS THE CLOSERshe gets to him, the more she is putting herself and those around her at risk. And, it might be too late to turn back... DEEPLY SEDUCTIVE AND EXTRAORDINARILYsuspenseful, Twilighthas enraptured millions and become a modern classic, redefining genres within young adult literature and inspiring a phenomenon that has had readers yearning for more. This special tenth anniversary dual edition includes a foreword by the author as well as a complete reimagining of the original novel. Turn this book over to read Life and Death. / WHEN BEAUFORT SWAN MOVES TO THEgloomy town of Forks and meets the mysterious, alluring Edythe Cullen, his life takes a thrilling and terrifying turn. With her porcelain skin, golden eyes, mesmerizing voice, and supernatural gifts, Edythe is both irresistible and enigmatic. WHAT BEAU DOESN'T REALIZE IS THEcloser he gets to her, the more he is putting himself and those around him at risk. And, it might be too late to turn back... IN CELEBRATION OF THE TENTH ANNIVERSARYof Twilight, Stephenie Meyer has crafted Life and Death, a bold and compelling reimagining of the iconic love story that will surprise and enthrall readers. This special dual edition includes a foreword by the author as well as the complete original novel. Turn this book over to read Twilight.",
9238,Sanctum (Asylum #2),"In this chilling, fast-paced sequel to the New York Timesbestselling Asylum, which Publishers Weeklycalled ""a strong YA debut that reveals the enduring impact of buried trauma on a place,"" three teens must return to the asylum that still haunts their dreams to end the nightmare once and for all. With the page-turning suspense and horror that made Asylumsuch a standout, and featuring found photographs from real vintage carnivals, Sanctumis a mind-bending reading experience that's perfect for fans of the smash hit Miss Peregrine's Home for Peculiar Children. Dan, Abby, and Jordan remain traumatized by the summer they shared in the Brookline asylum. Much as they'd love to move on, many questions remain, and someone is determined to keep the terror alive, sending the teens photos of an old-timey carnival, with no note and no name. Forsaking their plan never to go back, the teens return to New Hampshire College under the guise of a weekend for prospective students, and there they realize that the carnival from the photos is not only real, it's here on campus, apparently for the first time in many years. Sneaking away from sample classes and college parties, Dan and his friends lead a tour of their own--one through the abandoned houses and hidden places of a surrounding town. Camford is hiding a terrible past, and the truth behind Dan's connection to the asylum's evil warden is more terrifying than Dan ever imagined.",
9454,"Ruin and Rising (The Grisha, #3)","The capital has fallen. The Darkling rules Ravka from his shadow throne. Now the nation's fate rests with a broken Sun Summoner, a disgraced tracker, and the shattered remnants of a once-great magical army. Deep in an ancient network of tunnels and caverns, a weakened Alina must submit to the dubious protection of the Apparat and the zealots who worship her as a Saint. Yet her plans lie elsewhere, with the hunt for the elusive firebird and the hope that an outlaw prince still survives. Alina will have to forge new alliances and put aside old rivalries as she and Mal race to find the last of Morozova's amplifiers. But as she begins to unravel the Darkling's secrets, she reveals a past that will forever alter her understanding of the bond they share and the power she wields. The firebird is the one thing that stands between Ravka and destruction--and claiming it could cost Alina the very future she's fighting for. Ruin and Risingis the thrilling final installment in Leigh Bardugo's Grisha Trilogy.",
14106,"The Sword of Summer (Magnus Chase and the Gods of Asgard, #1)","Magnus Chase has seen his share of trouble. Ever since that terrible night two years ago when his mother told him to run, he has lived alone on the streets of Boston, surviving by his wits, staying one step ahead of the police and truant officers. One day, he's tracked down by an uncle he barely knows-a man his mother claimed was dangerous. Uncle Randolph tells him an impossible secret: Magnus is the son of a Norse god. The Viking myths are true. The gods of Asgard are preparing for war. Trolls, giants, and worse monsters are stirring for doomsday. To prevent Ragnarok, Magnus must search the Nine Worlds for a weapon that has been lost for thousands of years. When an attack by fire giants forces him to choose between his own safety and the lives of hundreds of innocents, Magnus makes a fatal decision. Sometimes, the only way to start a new life is to die . . .",
16096,"Siege and Storm (The Grisha, #2)","The second book in The Grisha Trilogy by the #1 New York Timesbestselling author of Six of Crows. Darkness never dies. Hunted across the True Sea, haunted by the lives she took on the Fold, Alina must try to make a life with Mal in an unfamiliar land, all while keeping her identity as the Sun Summoner a secret. But she can't outrun her past or her destiny for long. The Darkling has emerged from the Shadow Fold with a terrifying new power and a dangerous plan that will test the very boundaries of the natural world. With the help of a notorious privateer, Alina returns to the country she abandoned, determined to fight the forces gathering against Ravka. But as her power grows, Alina slips deeper into the Darkling's game of forbidden magic, and farther away from Mal. Somehow, she will have to choose between her country, her power, and the love she always thought would guide her--or risk losing everything to the oncoming storm. Siege and Storm is the second book in The Grisha Trilogy by Leigh Bardugo. Book one, Shadow and Bone, is a New York Timesbestseller, and book 3, Ruin and Rising, is an Amazon Best Book of the Year and a USA Todaybestseller. This title has Common Core connections, and this deluxe paperback edition features bonus materials like an interview with Leigh Bardugo, a bonus short story, ""The Tailor,"" and more.",
16498,"The Wrath and the Dawn (The Wrath and the Dawn, #1)","This is an alternate cover edition for . One Life to One Dawn. In a land ruled by a murderous boy-king, each dawn brings heartache to a new family. Khalid, the eighteen-year-old Caliph of Khorasan, is a monster. Each night he takes a new bride only to have a silk cord wrapped around her throat come morning. When sixteen-year-old Shahrzad's dearest friend falls victim to Khalid, Shahrzad vows vengeance and volunteers to be his next bride. Shahrzad is determined not only to stay alive, but to end the caliph's reign of terror once and for all. Night after night, Shahrzad beguiles Khalid, weaving stories that enchant, ensuring her survival, though she knows each dawn could be her last. But something she never expected begins to happen: Khalid is nothing like what she'd imagined him to be. This monster is a boy with a tormented heart. Incredibly, Shahrzad finds herself falling in love. How is this possible? It's an unforgivable betrayal. Still, Shahrzad has come to understand all is not as it seems in this palace of marble and stone. She resolves to uncover whatever secrets lurk and, despite her love, be ready to take Khalid's life as retribution for the many lives he's stolen. Can their love survive this world of stories and secrets? Inspired by A Thousand and One Nights, The Wrath and the Dawnis a sumptuous and enthralling read from beginning to end.",
17206,"Never Fade (The Darkest Minds, #2)","I used to dream about turning back time, about reclaiming the things I'd lost and the person I used to be. But not anymore. Ruby never asked for the abilities that almost cost her her life. Now she must call upon them on a daily basis, leading dangerous missions to bring down a corrupt government and breaking into the minds of her enemies. Other kids in the Children's League call Ruby ""Leader"", but she knows what she really is: a monster. When Ruby is entrusted with an explosive secret, she must embark on her most dangerous mission yet: leaving the Children's League behind. Crucial information about the disease that killed most of America's children--and turned Ruby and the others who lived into feared and hated outcasts -- has survived every attempt to destroy it. But the truth is only saved in one place: a flashdrive in the hands of Liam Stewart, the boy Ruby once believed was her future -- and who now wouldn't recognize her. As Ruby sets out across a desperate, lawless country to find Liam -- and answers about the catastrophe that has ripped both her life and America apart -- she is torn between old friends and the promise she made to serve the League. Ruby will do anything to protect the people she loves. But what if winning the war means losing herself?",
22565,"The Young Elites (The Young Elites, #1)","I am tired of being used, hurt, and cast aside. Adelina Amouteru is a survivor of the blood fever. A decade ago, the deadly illness swept through her nation. Most of the infected perished, while many of the children who survived were left with strange markings. Adelina's black hair turned silver, her lashes went pale, and now she has only a jagged scar where her left eye once was. Her cruel father believes she is a malfetto, an abomination, ruining their family's good name and standing in the way of their fortune. But some of the fever's survivors are rumored to possess more than just scars--they are believed to have mysterious and powerful gifts, and though their identities remain secret, they have come to be called the Young Elites. Teren Santoro works for the king. As Leader of the Inquisition Axis, it is his job to seek out the Young Elites, to destroy them before they destroy the nation. He believes the Young Elites to be dangerous and vengeful, but it's Teren who may possess the darkest secret of all. Enzo Valenciano is a member of the Dagger Society. This secret sect of Young Elites seeks out others like them before the Inquisition Axis can. But when the Daggers find Adelina, they discover someone with powers like they've never seen. Adelina wants to believe Enzo is on her side, and that Teren is the true enemy. But the lives of these three will collide in unexpected ways, as each fights a very different and personal battle. But of one thing they are all certain: Adelina has abilities that shouldn't belong in this world. A vengeful blackness in her heart. And a desire to destroy all who dare to cross her. It is my turn to use. My turn to hurt.",
26228,"These Broken Stars (Starbound, #1)","It's a night like any other on board the Icarus. Then, catastrophe strikes: the massive luxury spaceliner is yanked out of hyperspace and plummets to the nearest planet. Lilac LaRoux and Tarver Merendsen survive. And they seem to be alone. Lilac is the daughter of the richest man in the universe. Tarver comes from nothing, a young war hero who learned long ago that girls like Lilac are more trouble than they're worth. But with only each other to rely on, Lilac and Tarver must work together, making a tortuous journey across the eerie, deserted terrain to seek help. Then, against all odds, Lilac and Tarver find a strange blessing in the tragedy that has thrown them into each other's arms. Without the hope of a future together in their own world, they begin to wonder - would they be better off staying in this place forever? Everything changes when they uncover the truth behind the chilling whispers that haunt their every step. Lilac and Tarver may find a way off this planet. But they won't be the same people who landed on it. The first in a sweeping science fiction trilogy, These Broken Stars is a timeless love story about hope and survival in the face of unthinkable odds.",
34228,"Miss Peregrine’s Home for Peculiar Children (Miss Peregrine’s Peculiar Children, #1)","Librarian's Note: This is alternate cover edition #1 ISBN 10: 1594744769 ISBN13: 9781594744761 See:  A mysterious island. An abandoned orphanage. A strange collection of curious photographs. A horrific family tragedy sets sixteen-year-old Jacob journeying to a remote island off the coast of Wales, where he discovers the crumbling ruins of Miss Peregrine's Home for Peculiar Children. As Jacob explores its abandoned bedrooms and hallways, it becomes clear that the children were more than just peculiar. They may have been dangerous. They may have been quarantined on a deserted island for good reason. And somehow--impossible though it seems--they may still be alive. A spine-tingling fantasy illustrated with haunting vintage photography. Note: All information herein, such as number of pages, publisher, etc., refer to this alternate cover edition and may or may not coincide with the main entry for this ISBN or any other alternate covers. ~",


## 3: Линейные модели: EASE (SANSA)

In [28]:
!pip install numpy==2.2

Collecting numpy==2.2
  Downloading numpy-2.2.0-cp313-cp313-macosx_14_0_arm64.whl.metadata (62 kB)
Downloading numpy-2.2.0-cp313-cp313-macosx_14_0_arm64.whl (5.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/5.1 MB[0m [31m3.2 MB/s[0m  [33m0:00:02[0m eta [36m0:00:01[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.6
    Uninstalling numpy-2.2.6:
      Successfully uninstalled numpy-2.2.6
Successfully installed numpy-2.2.0


In [10]:
#EASE (Embarrassingly Shallow Autoencoder) через SANSA
from scipy.sparse import csr_matrix
from sansa import SANSA, SANSAConfig, ICFGramianFactorizerConfig, UMRUnitLowerTriangleInverterConfig

class EASERecommender:
    def __init__(self, l2=500.0, weight_matrix_density=7e-5):
        factorizer_config = ICFGramianFactorizerConfig()
        inverter_config = UMRUnitLowerTriangleInverterConfig()
        config = SANSAConfig(
            l2=l2,
            weight_matrix_density=weight_matrix_density,
            gramian_factorizer_config=factorizer_config,
            lower_triangle_inverter_config=inverter_config
        )
        self.model = SANSA(config)
        self.trained = False
        
        self.user_map = None
        self.item_map = None
        self.train_interactions = None
        self.item_inv_map_array = None
        self.popular_items_indices = None

    def fit(self, df: pl.DataFrame):
        interactions_df = df.group_by("user_id").agg(pl.col("item_id"))
        self.train_interactions = dict(zip(
            interactions_df["user_id"].to_list(),
            interactions_df["item_id"].to_list()
        ))
        
        self.user_map = {uid: i for i, uid in enumerate(df["user_id"].unique())}
        self.item_map = {iid: i for i, iid in enumerate(df["item_id"].unique())}
        
        item_inv_map = {i: iid for iid, i in self.item_map.items()}
        self.item_inv_map_array = np.array([item_inv_map[i] for i in range(len(item_inv_map))])

        popular_items = df['item_id'].value_counts().head(20)['item_id'].to_list()
        self.popular_items_indices = [self.item_map[iid] for iid in popular_items if iid in self.item_map]

        rows = df["user_id"].replace(self.user_map)
        cols = df["item_id"].replace(self.item_map)
        values = np.ones(len(df))
        
        interactions_matrix = csr_matrix((values, (rows, cols)), shape=(len(self.user_map), len(self.item_map)))
        self.model.fit(interactions_matrix)
        self.trained = True

    def predict(self, df: pl.DataFrame, topn: int = 10) -> list[np.ndarray]:
        assert self.trained
        
        test_user_ids = df["user_id"].unique().to_list()
        
        test_user_map = {uid: i for i, uid in enumerate(test_user_ids)}
        
        rows, cols, data = [], [], []
        for user_id, row_idx in test_user_map.items():
            user_history = self.train_interactions.get(user_id, [])
            
            #дополняем короткие истории популярными 
            if 0 < len(user_history) < topn:
                history_indices = {self.item_map[iid] for iid in user_history if iid in self.item_map}
                
                #добавляем популярные, пока история не станет достаточно длинной
                for pop_idx in self.popular_items_indices:
                    if len(history_indices) >= topn: break
                    history_indices.add(pop_idx)
                
                for item_idx in history_indices:
                    rows.append(row_idx)
                    cols.append(item_idx)
                    data.append(1)
            
            #для пользователей с достаточной историей или без оставляем как есть
            elif len(user_history) >= topn:
                 for item_id in user_history:
                    if item_id in self.item_map:
                        rows.append(row_idx)
                        cols.append(self.item_map[item_id])
                        data.append(1)

        test_interactions_matrix = csr_matrix(
            (data, (rows, cols)),
            shape=(len(test_user_ids), len(self.item_map))
        )
        
        top_k_indices, _ = self.model.recommend(test_interactions_matrix, k=topn, mask_input=True)
        recommendations = self.item_inv_map_array[top_k_indices]
        return list(recommendations)

In [11]:
ease_rec = EASERecommender()
ease_rec.fit(train)
test_users_df = test.select("user_id").unique()
ease_preds = ease_rec.predict(test_users_df)

preds_df = pl.DataFrame({
    "user_id": test_users_df["user_id"],
    "ease_recs": pl.Series(ease_preds)
})

test = test.join(preds_df, on="user_id", how="left")

INFO:sansa.model:Computing column norms of X^TX...
INFO:sansa.model:Scaling columns of X by computed norms...
INFO:sansa.model:Computing LDL^T decomposition of permuted item-item matrix...
INFO:sansa.core.factorizers:Computing incomplete Cholesky decomposition of X^TX + 500.0*I...
INFO:sansa.core.factorizers:Finding a fill-in reducing ordering (method = colamd)...
INFO:sansa.core.factorizers:Computing X^TX...
INFO:sansa.core.factorizers:
                X^TX info:
                    shape = (31300, 31300) 
                    nnz = 264972836 
                    density = 27.046600% 
                    size = 2119.9 MB
                
                Attempting incomplete factorization of a relatively dense matrix (27.046600% dense). 
                This is unstable:
                 - the factorization might fail and automatically restart with additional regularization
                 - the resulting approximate factor might be of lesser quality
                You may want to tr

In [12]:
print(test.head())
evaluate_recommender_modified(df=test, model_preds_col="ease_recs")

shape: (5, 5)
┌───────────────────┬───────────────────┬───────────────────┬───────────────────┬──────────────────┐
│ user_id           ┆ item_id           ┆ random_recs       ┆ toppopular_recs   ┆ ease_recs        │
│ ---               ┆ ---               ┆ ---               ┆ ---               ┆ ---              │
│ str               ┆ list[i64]         ┆ array[i64, 10]    ┆ array[i64, 10]    ┆ array[i64, 10]   │
╞═══════════════════╪═══════════════════╪═══════════════════╪═══════════════════╪══════════════════╡
│ 00000377eea48021d ┆ [13252]           ┆ [4549, 3786, …    ┆ [4058, 15514, …   ┆ [27003, 15514, … │
│ 3002730d56aca…    ┆                   ┆ 23]               ┆ 13159]            ┆ 24671]           │
│ 00009ab2ed8cbfced ┆ [2328]            ┆ [17711, 20522, …  ┆ [4058, 15514, …   ┆ [11475, 10108, … │
│ a5a59da409663…    ┆                   ┆ 14156]            ┆ 13159]            ┆ 15193]           │
│ 00009e46d18f223a8 ┆ [28636, 30197]    ┆ [30088, 5896, …   ┆ [4058, 15514, …

{'ndcg@10': 0.021,
 'recall@10': 0.016,
 'map@10': 0.009,
 'serendipity@10': 0.018,
 'coverage': 0.398}

In [13]:
ease_rec = EASERecommender(l2=250, weight_matrix_density=2e-4)
ease_rec.fit(train)

INFO:sansa.model:Computing column norms of X^TX...
INFO:sansa.model:Scaling columns of X by computed norms...
INFO:sansa.model:Computing LDL^T decomposition of permuted item-item matrix...
INFO:sansa.core.factorizers:Computing incomplete Cholesky decomposition of X^TX + 250*I...
INFO:sansa.core.factorizers:Finding a fill-in reducing ordering (method = colamd)...
INFO:sansa.core.factorizers:Computing X^TX...
INFO:sansa.core.factorizers:
                X^TX info:
                    shape = (31300, 31300) 
                    nnz = 264972836 
                    density = 27.046600% 
                    size = 2119.9 MB
                
                Attempting incomplete factorization of a relatively dense matrix (27.046600% dense). 
                This is unstable:
                 - the factorization might fail and automatically restart with additional regularization
                 - the resulting approximate factor might be of lesser quality
                You may want to try 

In [14]:
test_users_df = test.select("user_id").unique()
ease_preds = ease_rec.predict(test_users_df)

preds_df = pl.DataFrame({
    "user_id": test_users_df["user_id"],
    "ease_recs": pl.Series(ease_preds)
})

test = test.join(preds_df, on="user_id", how="left")

print(test.head())
evaluate_recommender_modified(df=test, model_preds_col="ease_recs_right")

shape: (5, 6)
┌────────────────┬────────────────┬────────────────┬───────────────┬───────────────┬───────────────┐
│ user_id        ┆ item_id        ┆ random_recs    ┆ toppopular_re ┆ ease_recs     ┆ ease_recs_rig │
│ ---            ┆ ---            ┆ ---            ┆ cs            ┆ ---           ┆ ht            │
│ str            ┆ list[i64]      ┆ array[i64, 10] ┆ ---           ┆ array[i64,    ┆ ---           │
│                ┆                ┆                ┆ array[i64,    ┆ 10]           ┆ array[i64,    │
│                ┆                ┆                ┆ 10]           ┆               ┆ 10]           │
╞════════════════╪════════════════╪════════════════╪═══════════════╪═══════════════╪═══════════════╡
│ 00000377eea480 ┆ [13252]        ┆ [4549, 3786, … ┆ [4058, 15514, ┆ [27003,       ┆ [33370,       │
│ 21d3002730d56a ┆                ┆ 23]            ┆ … 13159]      ┆ 15514, …      ┆ 28386, …      │
│ ca…            ┆                ┆                ┆               ┆ 24671]  

{'ndcg@10': 0.028,
 'recall@10': 0.023,
 'map@10': 0.013,
 'serendipity@10': 0.021,
 'coverage': 0.264}

P.S: столбец ease_recs содержит результаты запуска модели с параметрами по умолчанию, ease_recs_right - с другими (l2=250, density=2e-4), попытка сделать модель чуть сложнее. Как видно, получили прирост всех метрик, кроме coverage - получается, хотя и модель находит больше сложных взаимосвязей, рекомендует она все-таки меньшую часть каталога. Заметно, что есть трейд офф между чрезмерной рекомендацией холодных айтемов и теплых

Сравним результаты 

ease_recs - наиболее разреженная и быстрая модель

In [15]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["ease_recs"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
3792,"Thomas’s First Memory of the Flare (Maze Runner, #2.5)","Short flashback that occurs between ""The Scorch Trials"" and ""The Death Cure.""",
7478,The Red Pyramid (Kane Chronicles #1),"From the author of the blockbuster best-selling series Percy Jackson and the Olympianscomes the first installment of the Kane Chronicles, combining nonstop action, humor, and suspense in an epic tale of loyalty and heroism. Since his mother's death six years ago, Carter Kane has been living out of a suitcase, traveling the globe with his father, the brilliant Egyptologist Dr. Julius Kane. But while Carter's been homeschooled, his younger sister, Sadie, has been living with their grandparents in London. Sadie has just what Carter wants -- school friends and a chance at a ""normal"" life. But Carter has just what Sadie longs for -- time with their father. After six years of living apart, the siblings have almost nothing in common. Until now. On Christmas Eve, Sadie and Carter are reunited when their father brings them to the British Museum, with a promise that he's going to ""make things right."" But all does not go according to plan; Carter and Sadie watch as Julius summons a mysterious figure, who quickly banishes their father and causes a fiery explosion. Soon Carter and Sadie discover that the gods of Ancient Egypt are waking, and the worst of them -- Set -- has a frightening scheme. To save their father, they must embark on a dangerous journey -- a quest that brings them ever closer to the truth about their family and its links to the House of Life, a secret order that has existed since the time of the pharaohs. An unabridged recording on 12 CDs (14 hrs, 42 mins).",
15513,"The Scorch Trials (The Maze Runner, #2)",Solving the Maze was supposed to be the end. Thomas was sure that escape from the Maze would mean freedom for him and the Gladers. But WICKED isn't done yet. Phase Two has just begun. The Scorch. There are no rules. There is no help. You either make it or you die. The Gladers have two weeks to cross through the Scorch--the most burned-out section of the world. And WICKED has made sure to adjust the variables and stack the odds against them. Friendships will be tested. Loyalties will be broken. All bets are off. There are others now. Their survival depends on the Gladers' destruction--and they're determined to survive.,
15728,"United as One (Lorien Legacies, #7)","The seventh and final book in the #1 New York Times bestselling I Am Number Four series! With United as One, this action-packed series comes to a surprising, breathtaking, and utterly satisfying conclusion. The Garde didn't start this war, but they'll do whatever it takes to end it once and for all. . . . The Mogadorians have invaded Earth. Their warships loom over our most populated cities, and no country will risk taking them head on. The Garde are all that stand in the way, but they'll need an army of their own to win this fight. They've teamed up with the US military, but it might not be enough. The Garde need reinforcements, and they've found them in the most unexpected place. Teenagers from across the globe, like John Smith's best friend, Sam, have developed abilities. So John and the others must get to them before the Mogs, because if they don't their enemies will use these gifted teens for their own sinister plan. But after all the Mogadorians have taken from John--his home, his family, his friends, and the person he loves most--he might not want to put any more lives in danger. He's got nothing left to lose, and he's just discovered he has been given an incredible new Legacy. Now he can turn himself into the ultimate weapon. So will he risk his life to save the world, or will he realize that power in numbers will save us all?",
18406,The Kill Order,"Read the fourth book in the #1 New York Timesbestselling Maze Runner series, perfect for fans of The Hunger Gamesand Divergent. The first book, The Maze Runner, is now a major motion picture featuring the star of MTV's Teen Wolf, Dylan O'Brien; Kaya Scodelario; Aml Ameen; Will Poulter; and Thomas Brodie-Sangster and the second book, The Scorch Trials, will soon be a movie that hits theaters September 18, 2015! Also look for James Dashner's newest novels, The Eye of Mindsand The Rule of Thoughts, the first two books in the Mortality Doctrine series. They thought the end came in a flash. Before WICKED was formed, before the Glade was built, before Thomas entered the Maze, sun flares hit the earth, killing most of the population. The worst is yet to come. Mark and Trina were there when it happened. They survived. But now a virus is spreading. A virus that fills humans with murderous rage. There is no cure. No escape. They're convinced that there's a way to save those who are left--ifthey can stay alive. Because in this new, devastated world, every life has a price. And to some you're worth more dead than alive. The end is only the beginning. Praise for the Maze Runner series: ""[A] mysterious survival saga that passionate fans describe as a fusion of Lord of the Flies, The Hunger Games, and Lost.""--EW.com ""Wonderful action writing--fast-paced...but smart and well observed.""--Newsday ""[A] nail-biting must-read.""--Seventeen.com ""Breathless, cinematic action."" --Publishers Weekly ""Heart-pounding to the very last moment."" --Kirkus Reviews ""Exclamation-worthy."" --Romantic Times [STAR] ""James Dashner's illuminating prequel [The Kill Order] will thrill fans of this Maze Runner [series] and prove just as exciting for readers new to the series.""--Shelf Awareness, Starred From the Hardcover edition.",
19449,Demigods & Magicians: Percy and Annabeth Meet the Kanes,"Magic, monsters, and mayhem abound when Percy Jackson and Annabeth Chase meet Carter and Sadie Kane for the first time. Weird creatures are appearing in unexpected places, and the demigods and magicians have to team up to take them down. As they battle with Celestial Bronze and glowing hieroglyphs, the four heroes find that they have a lot in common--and more power than they ever thought possible. But will their combined forces be enough to foil an ancient enemy who is mixing Greek and Egyptian incantations for an evil purpose? Rick Riordan wields his usual storytelling magic in this adrenaline-fueled adventure.",
23005,"The Fever Code (The Maze Runner, #0.6)","All will be revealed in the fifth book in James Dashner's #1 New York Times bestselling Maze Runner series. This is the story that fans all over the world have been waiting for--the story of how Thomas and WICKED built the Maze. You will not want to miss it. Once there was a world's end. The forests burned, the lakes and rivers dried up, and the oceans swelled. Then came a plague, and fever spread across the globe. Families died, violence reigned, and man killed man. Next came WICKED, who were looking for an answer. And then they found the perfect boy. The boy's name was Thomas, and Thomas built a maze. Now there are secrets. There are lies. And there are loyalties history could never have foreseen. This is the story of that boy, Thomas, and how he built a maze that only he could tear down. All will be revealed. A prequel to the worldwide Maze Runner phenomenon, The Fever Code is the book that holds all the answers. How did WICKED find the Gladers? Who are Group B? And what side are Thomas and Teresa really on? Lies will be exposed. Secrets will be uncovered. Loyalties will be proven. Fans will never see the truth coming. Before there was the Maze, there was The Fever Code.",
29890,"The Son of Neptune (The Heroes of Olympus, #2)","Seven half-bloods shall answer the call, To storm or fire the world must fall. An oath to keep with a final breath, And foes bear arms to the Doors of Death. Percy is confused.When he awoke from his long sleep, he didn't know much more than his name. His brain fuzz is lingering, even after the wolf Lupa told him he is a demigod and trained him to fight with the pen/sword in his pocket. Somehow Percy manages to make it to a camp for half-bloods, despite the fact that he has to keep killing monsters along the way. But the camp doesn't ring any bells with him. The only thing he can recall from his past is another name: Annabeth. Hazel is supposed to be dead.When she lived before, she didn't do a very good job of it. Sure, she was an obedient daughter, even when her mother was possessed by greed. But that was the problem -- when the Voice took over her mother and commanded Hazel to use her ""gift"" for an evil purpose, Hazel couldn't say no. Now because of her mistake, the future of the world is at risk. Hazel wished she could ride away from it all on the stallion that appears in her dreams. Frank is a klutz.His grandmother says he is descended from heroes and can be anything he wants to be, but he doesn't see it. He doesn't even know who his father is. He keeps hoping Apollo will claim him, because the only thing he is good at is archery -- although not good enough to win camp war games. His bulky physique makes him feel like an ox, especially infront of Hazel, his closest friend at camp. He trusts her completely -- enough to share the secret he holds close to his heart. Beginning at the ""other"" camp for half-bloods and extending as far as the land beyond the gods, this breathtaking second installment of the Heroes of Olympus series introduces new demigods, revives fearsome monsters, and features other remarkable creatures, all destined to play a part in the Prophesy of Seven.",
30154,"Gunner Skale (The Mortality Doctrine, #0.5)","From James Dashner, the #1 New York Timesbestselling author of the Maze Runner series, which includes The Maze Runnerand The Scorch Trials--both #1 movies worldwide--comes an original thirty-page e-short returning fans to the fast-paced, high-tech world from The Eye of Minds and The Rule of Thoughts, the first two books in the Mortality Doctrine series. Find out how Gunner Skale, the best gamer on the VirtNet, becomes a legend in this story available exclusively online. Praise for The Rule of Thoughts, book two in the Mortality Doctrine series: ""[An] excellent sequel...smartly pacedand paired with well-balanced characters that feel real.""-Kirkus Reviews ""An exhilarating adventure story with touches of Anthony Horowitz's Alex Rider books and Orson Scott Card's Ender's Game.""-Booklist.com ""Dashner's descriptions are screenplay-ready...This book will satisfythe author's fans...[and readers] in search of an adrenaline rush.""-School Library Journal Praise for The Eye of Minds, Book One in the Mortality Doctrine series ""More realistic and addictive than any video game--The Eye of Minds sucked me in from the very first page. The Girl with the Dragon Tattoo meets The Matrixin a vividly rendered world of gamers, hackers, and cyber-terrorists. I can't wait to read the next book in the series!""--Kami Garcia, #1 New York Times bestselling coauthor of Beautiful Creaturesand author of Unbreakable ""A gripping page-turner, Dashner's latest is sure to please.""--BookPage ""Full of action [and] a rather surprising twistthat will leave you flipping pages.""--fanboynation.com ""An adrenaline rush.""--School Library Journal ""In typical Dashner style, this is quick and involving, with the main frustration being the wait time until the next book.""--Booklist ""High on concept, this is an intriguing read for the digital generation.""--Kirkus Reviews ""Dashner once again creates a dystopian world in which nothing is what it seems.""-VOYA A Junior Library Guild Selection A YALSA Teens Top Ten Pick",
30197,"Red Queen (Red Queen, #1)","Mare Barrow's world is divided by blood--those with red and those with silver. Mare and her family are lowly Reds, destined to serve the Silver elite whose supernatural abilities make them nearly gods. Mare steals what she can to help her family survive, but when her best friend is conscripted into the army she gambles everything to win his freedom. A twist of fate leads her to the royal palace itself, where, in front of the king and all his nobles, she discovers a power of her own--an ability she didn't know she had. Except . . . her blood is Red. To hide this impossibility, the king forces her into the role of a lost Silver princess and betroths her to one of his own sons. As Mare is drawn further into the Silver world, she risks her new position to aid the Scarlet Guard--the leaders of a Red rebellion. Her actions put into motion a deadly and violent dance, pitting prince against prince--and Mare against her own heart. From debut author Victoria Aveyard comes a lush, vivid fantasy series where loyalty and desire can tear you apart and the only certainty is betrayal.",


In [16]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["ease_recs"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
4911,"Sweet, Hereafter (Heaven, #3)",Coretta Scott King Award-winner Angela Johnson concludes her Heaven trilogy with a poignant tale of discovering where--and with whom--you belong.,
5222,"Predator's Gold (The Hungry City Chronicles, #2)","The Jenny was drifting across the shoulder of a big volcano. Beyond it there were no more mountains, just an endless blue-white plain stretching to the horizon. They were at the mercy of the wind, and it was carrying them helplessly into the Ice Wastes. After two years of carefree traveling in the Jenny Haniver, Tom and Hester find themselves back in danger. Fleeing from the grim aviators of the Green Storm, they stumble onto the ice city of Anchorage just in time. But Anchorage is not a safe refuge: Devastated by plague, and haunted by thieves -- or perhaps ghosts -- the city is barely lurching along. The savage Huntsmen of Arkangel are closing in, and the young margravine must make a last desperate bid for survival. She sets a course for the Dead Continent -- America ... In this breathtaking sequel to the award-winning Mortal Engines, Philip Reeve plunges us into a ruthless and terrifyingly believable world where cities eat each other, betrayal is as common as the blasted land the cities traverse, and loyalty holds the only chance of survival.",
8245,"The Opal Deception (Artemis Fowl, #4)","The evil pixie Opal Koboi has spent the last year in a self-induced coma, plotting her revenge on all those who foiled her attempt to destroy the LEPrecon fairy police. And Artemis Fowl is at the top of her list. After his last run-in with the fairies, Artemis had his mind wiped of his memories of the world belowground. But they have not forgotten about him. Once again, he must stop the human and fairy worlds from colliding--only this time, Artemis faces an enemy who may have finally outsmarted him.",
10389,Running Out of Time,"Run For Your Life Jessie lives with her family in the frontier village of Clifton, Indiana, in 1840 -- or so she believes. When diphtheria strikes the village and the children of Clifton start dying, Jessie's mother reveals a shocking secret -- it's actually 1996, and they are living in a reconstructed village that serves as a tourist site. In the world outside, medicine exists that can cure the dread disease, and Jessie's mother is sending her on a dangerous mission to bring back help. But beyond the walls of Clifton, Jessie discovers a world even more alien and threatening than she could have imagined, and soon she finds her own life in jeopardy. Can she get help before the children of Clifton, and Jessie herself, run out of time?",
15514,"The Hunger Games (The Hunger Games, #1)","Winning will make you famous. Losing means certain death. The nation of Panem, formed from a post-apocalyptic North America, is a country that consists of a wealthy Capitol region surrounded by 12 poorer districts. Early in its history, a rebellion led by a 13th district against the Capitol resulted in its destruction and the creation of an annual televised event known as the Hunger Games. In punishment, and as a reminder of the power and grace of the Capitol, each district must yield one boy and one girl between the ages of 12 and 18 through a lottery system to participate in the games. The 'tributes' are chosen during the annual Reaping and are forced to fight to the death, leaving only one survivor to claim victory. When 16-year-old Katniss's young sister, Prim, is selected as District 12's female representative, Katniss volunteers to take her place. She and her male counterpart Peeta, are pitted against bigger, stronger representatives, some of whom have trained for this their whole lives. , she sees it as a death sentence. But Katniss has been close to death before. For her, survival is second nature.",
19710,"The First Part Last (Heaven, #2)","Bobby's a classic urban teenager. He's restless. He's impulsive. But the thing that makes him different is this: He's going to be a father. His girlfriend, Nia, is pregnant, and their lives are about to change forever. Instead of spending time with friends, they'll be spending time with doctors, and next, diapers. They have options: keeping the baby, adoption. They want to do the right thing. If only it was clear what the right thing was.",
22288,"Day 21 (The Hundred, #2)","No one has set foot on Earth in centuries -- until now. It's been 21 days since the hundred landed on Earth. They're the only humans to set foot on the planet in centuries...or so they thought. Facing an unknown enemy, Wells attempts to keep the group together. Clarke strikes out for Mount Weather, in search of other Colonists, while Bellamy is determined to rescue his sister, no matter the cost. And back on the ship, Glass faces an unthinkable choice between the love of her life and life itself. In this pulse-pounding sequel to Kass Morgan's The 100, secrets are revealed, beliefs are challenged, and relationships are tested. And the hundred will struggle to survive the only way they can -- together.",
22663,Quarantine (Roswell #4),"No One Is Safe. There's a new corporation in town -- Meta-chem -- and Liz is psyched to land a scholarship program working in the lab. But when the giant pharmaceutical company creates a disease that accidentally infects the town, Liz is among the first to fall ill -- and, despite his powers, Max is unable to heal her. There's also a new girl in town, Sadie, who's come to Roswell searching for her half sister, who she believes is Maria. Maria doesn't know how to feel: what if she and Sadie do share a father? Maria hasn't seen him since she was seven years old, and she's not sure she wants to see him now. but when news of the quarantine breaks, Maria has no choice but to keep Sadie with her in town, all while trying to find out who is sick and whether anyone can be saved in time....",
25899,A Confusion of Princes,"Garth Nix, bestselling author of the Keys to the Kingdom series and Shade's Children, combines space opera with a coming-of-age story in his YA novelA Confusion of Princes. Superhuman. Immortal. Prince in a Galactic Empire. There has to be a catch.... Khemri learns the minute he becomes a Prince that princes need to be hard to kill--for they are always in danger. Their greatest threat? Other Princes. Every Prince wants to become Emperor and the surest way to do so is to kill, dishonor, or sideline any potential competitor. There are rules, but as Khemri discovers, rules can be bent and even broken. There are also mysteries. Khemri is drawn into the hidden workings of the Empire and is dispatched on a secret mission. In the ruins of space battle, he meets a young woman, called Raine, who challenges his view of the Empire, of Princes, and of himself. But Khemri is a Prince, and even if he wanted to leave the Empire behind, there are forces there that have very definite plans for his future.",
31822,"Holes (Holes, #1)","(Librarian's Note: Alternate Cover Edition for ISBN 9780440414803) Stanley Yelnats is under a curse. A curse that began with his no-good-dirty-rotten-pig-stealing-great-great-grandfather and has since followed generations of Yelnatses. Now Stanley has been unjustly sent to a boys' detention center, Camp Green Lake, where the boys build character by spending all day, every day digging holes exactly five feet wide and five feet deep. There is no lake at Camp Green Lake. But there are an awful lot of holes. It doesn't take long for Stanley to realize there's more than character improvement going on at Camp Green Lake. The boys are digging holes because the warden is looking for something. But what could be buried under a dried-up lake? Stanley tries to dig up the truth in this inventive and darkly humorous tale of crime and punishment--and redemption.",


ease_recs_right - менее разреженная и быстрая модель

In [18]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["ease_recs_right"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
1164,My Life Next Door,"""One thing my mother never knew, and would disapprove of most of all, was that I watched the Garretts. All the time."" The Garretts are everything the Reeds are not. Loud, messy, affectionate. And every day from her rooftop perch, Samantha Reed wishes she was one of them . . . until one summer evening, Jase Garrett climbs up next to her and changes everything. As the two fall fiercely for each other, stumbling through the awkwardness and awesomeness of first love, Jase's family embraces Samantha - even as she keeps him a secret from her own. Then something unthinkable happens, and the bottom drops out of Samantha's world. She's suddenly faced with an impossible decision. Which perfect family will save her? Or is it time she saved herself? A transporting debut about family, friendship, first romance, and how to be true to one person you love without betraying another.",
12211,"The Transfer (Divergent, #0.1)","Complete your Divergent library with the Four stories! Fans of the Divergent series by #1 New York Times bestselling author Veronica Roth will be captivated by ""Four: The Transfer,"" the first of four stories, each between fifty and seventy-five pages long, set in the world of Divergent and told from Tobias's perspective. In ""The Transfer,"" readers witness Tobias's aptitude test, Choosing Day, and the moment he is given the infamous nickname ""Four.""",
13064,"Throne of Glass (Throne of Glass, #1)","There is an alternate cover edition for this ASIN . After serving out a year of hard labor in the salt mines of Endovier for her crimes, 18-year-old assassin Celaena Sardothien is dragged before the Crown Prince. Prince Dorian offers her her freedom on one condition: she must act as his champion in a competition to find a new royal assassin. Her opponents are men-thieves and assassins and warriors from across the empire, each sponsored by a member of the king's council. If she beats her opponents in a series of eliminations, she'll serve the kingdom for three years and then be granted her freedom. Celaena finds her training sessions with the captain of the guard, Westfall, challenging and exhilirating. But she's bored stiff by court life. Things get a little more interesting when the prince starts to show interest in her... but it's the gruff Captain Westfall who seems to understand her best. Then one of the other contestants turns up dead... quickly followed by another. Can Celaena figure out who the killer is before she becomes a victim? As the young assassin investigates, her search leads her to discover a greater destiny than she could possibly have imagined.",
19293,"The Blood of Olympus (The Heroes of Olympus, #5)","The Blood of Olympus is the fifth book in the bestselling Heroes of Olympusseries - set in the high-octane world of Percy Jackson. Though the Greek and Roman crew members of the Argo II have made progress in their many quests, they still seem no closer to defeating the earth mother, Gaea. Her giants have risen - all of them - and they're stronger than ever. They must be stopped before the Feast of Spes, when Gaea plans to have two demigods sacrificed in Athens. She needs their blood -the blood of Olympus - in order to wake. The demigods are having more frequent visions of a terrible battle at Camp Half-Blood. The Roman legion from Camp Jupiter, led by Octavian, is almost within striking distance. Though it is tempting to take the Athena Parthenos to Athens to use as a secret weapon, the friends know that the huge statue belongs back on Long Island, where it might be able to stop a war between the two camps. The Athena Parthenos will go west; the Argo II will go east. The gods, still suffering from multiple personality disorder, are useless. How can a handful of young demigods hope to persevere against Gaea's army of powerful giants? As dangerous as it is to head to Athens, they have no other option. They have sacrificed too much already. And if Gaea wakes, it is game over... Rick Riordan has now sold an incredible 55 million copies of his books worldwide 'A cracking read' - Sunday Express 'Explosive' - Big Issue 'Action-packed' - Telegraph Rick Riordan is an award-winning mystery writer. He lives in San Antonio, Texas, with his wife and two sons. Percy Jackson and the Lightning Thief, Rick's first novel featuring the heroic young demigod, was the overall winner of the Red House Children's Book Award in 2006 and is now the first in a blockbuster film franchise, starring Logan Lerman.",
23524,Amy & Roger's Epic Detour,"Amy Curry is having a terrible year. Her mother has decided to move across the country and needs Amy to get their car from California to Connecticut. There's just one small problem: Since her dad died this past spring, Amy hasn't been able to get behind the wheel. Enter Roger, the nineteen-year-old son of an old family friend, who turns out to be unexpectedly cute . . . and dealing with some baggage of his own. Meeting new people and coming to terms with her father's death were not what Amy had planned on this trip. And traveling the Loneliest Road in America, seeing the Colorado mountains, crossing the Kansas plains, and visiting diners, dingy motels, and Graceland were definitely not on the itinerary. But as they drive, Amy finds that the people you least expected are the ones you may need the most -- and that sometimes you have to get lost in order to find your way home. ""One of the most touching, irresistible, and feel-good road trips I've been on in a long, long while."" --Deb Caletti, National Book Award Finalist",
29421,"The Voicemail of Magnus Bane (The Bane Chronicles, #11)","The voice mail of Magnus Bane, High Warlock of Brooklyn, in the days following a certain incident in City of Lost Souls.",
30197,"Red Queen (Red Queen, #1)","Mare Barrow's world is divided by blood--those with red and those with silver. Mare and her family are lowly Reds, destined to serve the Silver elite whose supernatural abilities make them nearly gods. Mare steals what she can to help her family survive, but when her best friend is conscripted into the army she gambles everything to win his freedom. A twist of fate leads her to the royal palace itself, where, in front of the king and all his nobles, she discovers a power of her own--an ability she didn't know she had. Except . . . her blood is Red. To hide this impossibility, the king forces her into the role of a lost Silver princess and betroths her to one of his own sons. As Mare is drawn further into the Silver world, she risks her new position to aid the Scarlet Guard--the leaders of a Red rebellion. Her actions put into motion a deadly and violent dance, pitting prince against prince--and Mare against her own heart. From debut author Victoria Aveyard comes a lush, vivid fantasy series where loyalty and desire can tear you apart and the only certainty is betrayal.",
30704,"The Lost Herondale (Tales from the Shadowhunter Academy, #2)","Simon learns the worst crime a Shadowhunter can commit: desertion of their comrades. One of ten adventures in Tales from the Shadowhunter Academy. In the early nineteenth century, Tobias Herondale abandoned his fellow Shadowhunters in the heat of battle and left them to die. His life was forfeit, but Tobias never returned, and the Clave claimed his wife's life in exchange for Tobias's. Simon and his fellow students are shocked to learn of this brutality, especially when it is revealed the woman was pregnant. But what if the child survived...could there be a lost Herondale line out in the world today? This standalone e-only short story follows the adventures of Simon Lewis, star of the #1 New York Times bestselling series The Mortal Instruments, as he trains to become a Shadowhunter. Tales from the Shadowhunter Academy features characters from Cassandra Clare's Mortal Instruments, Infernal Devices, and the upcoming Dark Artifices and Last Hours series. The Lost Herondale is written by Cassandra Clare and Robin Wasserman.",
33370,Eleanor & Park,"""Eleanor & Parkreminded me not just what it's like to be young and in love with a girl, but also what it's like to be young and in love with a book.""-John Green, The New York Times Book Review Bono met his wife in high school, Park says. So did Jerry Lee Lewis, Eleanor answers. I'm not kidding, he says. You should be, she says, we're 16. What about Romeo and Juliet? Shallow, confused, then dead. I love you, Park says. Wherefore art thou, Eleanor answers. I'm not kidding,he says. You should be. Set over the course of one school year in 1986, this is the story of two star-crossed misfits-smart enough to know that first love almost never lasts, but brave and desperate enough to try. When Eleanor meets Park, you'll remember your own first love-and just how hard it pulled you under. A New York TimesBest Seller! A 2014 Michael L. Printz Honor Book for Excellence in Young Adult Literature Eleanor & Parkis the winner of the 2013 Boston GlobeHorn Book Award for Best Fiction Book. A Publishers WeeklyBest Children's Book of 2013 A New York Times Book Review Notable Children's Book of 2013 A Kirkus Reviews Best Teen Book of 2013 An NPR Best Book of 2013",
33851,All The Bright Places,"If a book like 'The fault in your stars' fills your empty moments, then this could be your next read. A New York best seller and soon to be a movie 'All Bright Places' starring Elle Fanning, is a compelling and beautiful love story that revolves around a boy who is eager to give-up living as death fascinates him. He tries several ways to kill himself but is always being saved by kind encounters. Theodore Finch (main male character) meets Violet Markey (main female character), who has bright hopes from future and is waiting her graduation days to end, while her aching heart struggles to overcome the grief of her sister's death. Story takes a quick leap with their encounter on the ledge of the bell tower at school, which leave a vague notion as who saves whom. Later when paired up for a project, both discover a new phase of their state. Flinch realizes that it's only with Violet that he can be in his true self that is being weird. On the other hand, Violet discovers that she has actually started living her day instead of counting them just because of Finch. Love blooms unaware of the fate's cruel arrangement. What's in future for the couple? Will Violet accept Finch's reality? It is a shattering, intense and powerful story of love. About the author :- Jennifer Niven is the author of 'All the Bright Places, ' which is her first novel for young adults. She has also written two narrative non-fiction books, naming The Ice Master and Ada Blackjack. Jennifer lives in Los Angeles. Her novel was first published on January 2015 by Knopf Publishing Group. The Ice Master holds position in the top ten nonfiction books of the year by Entertainment Weekly, while Ada Blackjack was listed amongst a Book Sense Top Ten Pick.",


In [19]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["ease_recs_right"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
10260,"Saving Raphael Santiago (The Bane Chronicles, #6)","One of the more interesting jobs Magnus has ever been involved in... A distraught mother in 1950s New York hires Magnus to find her son, Raphael, but when Magnus finds him young Raphael is already beyond saving. Or is he? This standalone e-only short story illuminates the life of the enigmatic Magnus Bane, whose alluring personality populates the pages of the number-one New York Times bestselling series The Mortal Instruments and The Infernal Devices.",
13442,"The Rise of the Hotel Dumort (The Bane Chronicles, #5)","Magnus Bane watches the once-glamorous Hotel Dumont become something else altogether in 1970s New York City. One of ten adventures in The Bane Chronicles. Fifty years after the Jazz Age rise of the Hotel Dumont, immortal warlock Magnus Bane knows the Manhattan landmark is on the decline. The once-beautiful Hotel Dumont has fallen into a decayed thing, a ruin, as dead as a place can be. But the vampires don't mind... This standalone e-only short story illuminates the life of the enigmatic Magnus Bane, whose alluring personality populates the pages of the #1 New York Times bestselling series, The Mortal Instruments and The Infernal Devices series. This story in The Bane Chronicles, The Fall of the Hotel Dumort, is written by Maureen Johnson and Cassandra Clare.",
16918,"The Hammer of Thor (Magnus Chase and the Gods of Asgard, #2)","Thor's hammer is missing again. The thunder god has a disturbing habit of misplacing his weapon - the mightiest force in the Nine Worlds. But this time the hammer isn't just lost, it has fallen into enemy hands. If Magnus Chase and his friends can't retrieve the hammer quickly, the mortal worlds will be defenseless against an onslaught of giants. Ragnarok will begin. The Nine Worlds will burn. Unfortunately, the only person who can broker a deal for the hammer's return is the gods' worst enemy, Loki - and the price he wants is veryhigh.",
17377,"The Fall of the Hotel Dumort (The Bane Chronicles, #7)","Fifty years after the Jazz Age and the rise of the Hotel Dumort, immortal warlock Magnus Bane knows the Manhattan landmark is on the decline. The once-beautiful Hotel Dumort has fallen into a decayed thing, a ruin, as dead as a place can be. But the vampires don't mind... One of ten eBook short stories in The Bane Chronicles, a series about the enigmatic Magnus Bane from Cassandra Clare's internationally bestselling series The Infernal Devices and The Mortal Instruments.",
19080,"The Mark of Athena (The Heroes of Olympus, #3)","Annabeth is terrified. Just when she's about to be reunited with Percy--after six months of being apart, thanks to Hera--it looks like Camp Jupiter is preparing for war. As Annabeth and her friends Jason, Piper, and Leo fly in on the Argo II, she can't blame the Roman demigods for thinking the ship is a Greek weapon. With its steaming bronze dragon masthead, Leo's fantastical creation doesn't appear friendly. Annabeth hopes that the sight of their praetor Jason on deck will reassure the Romans that the visitors from Camp Half-Blood are coming in peace. And that's only one of her worries. In her pocket Annabeth carries a gift from her mother that came with an unnerving demand: Follow the Mark of Athena. Avenge me. Annabeth already feels weighed down by the prophecy that will send seven demigods on a quest to find--and close-- the Doors of Death. What more does Athena want from her? Annabeth's biggest fear, though, is that Percy might have changed. What if he's now attached to Roman ways? Does he still need his old friends? As the daughter of the goddess of war and wisdom, Annabeth knows she was born to be a leader, but never again does she want to be without Seaweed Brain by her side. Narrated by four different demigods, The Mark of Athena is an unforgettable journey across land and sea to Rome, where important discoveries, surprising sacrifices, and unspeakable horrors await. Climb aboard the Argo II, if you dare. . . . In The Son of Neptune, Percy, Hazel, and Frank met in Camp Jupiter, the Roman equivalent of Camp Halfblood, and traveled to the land beyond the gods to complete a dangerous quest. The third book in the Heroes of Olympus series will unite them with Jason, Piper, and Leo. But they number only six--who will complete the Prophecy of Seven? The Greek and Roman demigods will have to cooperate in order to defeat the giants released by the Earth Mother, Gaea. Then they will have to sail together to the ancient land to find the Doors of Death. What exactly are the Doors of Death? Much of the prophesy remains a mystery. . . . With old friends and new friends joining forces, a marvelous ship, fearsome foes, and an exotic setting, The Mark of Athena promises to be another unforgettable adventure by master storyteller Rick Riordan.",
23986,"The House of Hades (The Heroes of Olympus, #4)","At the conclusion of The Mark of Athena, Annabeth and Percy tumble into a pit leading straight to the Underworld. The other five demigods have to put aside their grief and follow Percy's instructions to find the mortal side of the Doors of Death. If they can fight their way through Gaea's forces, and Percy and Annabeth can survive the House of Hades, then the Seven will be able to seal the Doors from both sides and prevent the giants from raising Gaea. But, Leo wonders, if the Doors are sealed, how will Percy and Annabeth be able to escape? They have no choice. If the demigods don't succeed, Gaea's armies will never die. They have no time. In about a month, the Romans will march on Camp Half-Blood. The stakes are higher than ever in this adventure that dives into the depths of Tartarus. This paperback edition will include a new short story in which Percy and Annabeth fight alongside Carter and Sadie Kane.",
27096,"What to Buy the Shadowhunter Who Has Everything (The Bane Chronicles, #8)","Magnus Bane may or may not be dating Alec Lightwood, but he definitely needs to find him the perfect birthday present. One of ten adventures in The Bane Chronicles. Set in the time between City of Ashes and City of Glass, warlock Magnus Bane is determined to find the best birthday present possible for Alec Lightwood, the Shadowhunter he may or may not be dating. And he's also got to deal with the demon he's conjured up for a very irritating client...",
29890,"The Son of Neptune (The Heroes of Olympus, #2)","Seven half-bloods shall answer the call, To storm or fire the world must fall. An oath to keep with a final breath, And foes bear arms to the Doors of Death. Percy is confused.When he awoke from his long sleep, he didn't know much more than his name. His brain fuzz is lingering, even after the wolf Lupa told him he is a demigod and trained him to fight with the pen/sword in his pocket. Somehow Percy manages to make it to a camp for half-bloods, despite the fact that he has to keep killing monsters along the way. But the camp doesn't ring any bells with him. The only thing he can recall from his past is another name: Annabeth. Hazel is supposed to be dead.When she lived before, she didn't do a very good job of it. Sure, she was an obedient daughter, even when her mother was possessed by greed. But that was the problem -- when the Voice took over her mother and commanded Hazel to use her ""gift"" for an evil purpose, Hazel couldn't say no. Now because of her mistake, the future of the world is at risk. Hazel wished she could ride away from it all on the stallion that appears in her dreams. Frank is a klutz.His grandmother says he is descended from heroes and can be anything he wants to be, but he doesn't see it. He doesn't even know who his father is. He keeps hoping Apollo will claim him, because the only thing he is good at is archery -- although not good enough to win camp war games. His bulky physique makes him feel like an ox, especially infront of Hazel, his closest friend at camp. He trusts her completely -- enough to share the secret he holds close to his heart. Beginning at the ""other"" camp for half-bloods and extending as far as the land beyond the gods, this breathtaking second installment of the Heroes of Olympus series introduces new demigods, revives fearsome monsters, and features other remarkable creatures, all destined to play a part in the Prophesy of Seven.",
30197,"Red Queen (Red Queen, #1)","Mare Barrow's world is divided by blood--those with red and those with silver. Mare and her family are lowly Reds, destined to serve the Silver elite whose supernatural abilities make them nearly gods. Mare steals what she can to help her family survive, but when her best friend is conscripted into the army she gambles everything to win his freedom. A twist of fate leads her to the royal palace itself, where, in front of the king and all his nobles, she discovers a power of her own--an ability she didn't know she had. Except . . . her blood is Red. To hide this impossibility, the king forces her into the role of a lost Silver princess and betroths her to one of his own sons. As Mare is drawn further into the Silver world, she risks her new position to aid the Scarlet Guard--the leaders of a Red rebellion. Her actions put into motion a deadly and violent dance, pitting prince against prince--and Mare against her own heart. From debut author Victoria Aveyard comes a lush, vivid fantasy series where loyalty and desire can tear you apart and the only certainty is betrayal.",
34183,"The Midnight Heir (The Bane Chronicles, #4)","In Edwardian London, Magnus Bane discovers old friends and new enemies--including the son of his former comrade Will Herondale. One of ten adventures in The Bane Chronicles. Magnus thought he would never return to London, but he is lured by a handsome offer from Tatiana Blackthorn, whose plans--involving her beautiful young ward--are far more sinister than Magnus even suspects. In London at the turn of the century, Magnus finds old friends, and meets a very surprising young man . . . the sixteen-year-old James Herondale. This standalone e-only short story illuminates the life of the enigmatic Magnus Bane, whose alluring personality populates the pages of the #1 New York Times bestselling series, The Mortal Instruments and The Infernal Devices series. This story in The Bane Chronicles is written by Sarah Rees Brennan and Cassandra Clare.",


Видно, что менее разреженная имеет в целом менее разнообразные рекомендации,но судя по метрикам чаще попадает в редкие рекомендации, но в целом обе из них получились неплохими

## 4: Item2item + BM25

Item-to-Item коллаборативная фильтрация с весами BM25

Item-to-Item подход, который генерирует рекомендации на основе сходства между книгами - используются веса BM25 (Best Matching 25), которые позволяют более точно оценить значимость взаимодействия пользователя с книгой, снижая влияние популярных книг

- Вычисление веса взаимодействия с помощью BM25: вес $W(u, i)$ для  пользователя $u$ и книги $i$ рассчитывается

    $$W(u, i) = \text{IDF}(i) \cdot \frac{\text{TF}(u, i) \cdot (k_1 + 1)}{\text{TF}(u, i) + k_1 \cdot \left(1 - b + b \cdot \frac{|D_u|}{\text{avgDL}}\right)}$$

    где
    - $|D_u|$ — количество книг, прочитанных пользователем $u$.
    -   $\text{avgDL}$ — среднее количество прочитанных книг по всем пользователям
    -   $k_1$ и $b$ — гиперпараметры, контролирующие влияние TF и длины документа

-  Поиск похожих книг: на основе матрицы взвешенных взаимодействий для каждой книги находится $N$ ближайших соседей. Сходство между книгами $i$ и $j$ рассчитывается как косинусное расстояние 

- Рекомендации генерируются так: для юзера $u$ берется вся история его взаимодействий $\{i_1, i_2, \dots, i_k\}$, для каждой книги находятся соседи и скор считается по формуле

    $$S(u, j) = \sum_{i \in \text{History}(u)} \text{sim}(i, j)$$

In [20]:
from scipy.sparse import csr_matrix
from sklearn.preprocessing import normalize
from sklearn.neighbors import NearestNeighbors

class BM25ItemKNNRecommender:
    def __init__(self, n_neighbors: int = 200, k1: float = 1.2, b: float = 0.75):
        self.n_neighbors = n_neighbors
        self.k1 = k1
        self.b = b
        self.trained = False
        
        self.user_to_idx: dict[t.Any, int] = {}
        self.item_to_idx: dict[t.Any, int] = {}
        self.idx_to_item: dict[int, t.Any] = {}
        self.user_item_matrix: csr_matrix | None = None
        self.popular_items: list[t.Any] = []
        
        self.item_neighbors: np.ndarray | None = None
        self.item_neighbor_sims: np.ndarray | None = None

    def _calculate_bm25_weights(self, df: pl.DataFrame, user_col: str, item_col: str) -> csr_matrix:
        # (document length) - количество интеракций на пользователя
        user_interactions = df.group_by(user_col).agg(pl.count().alias("dl"))
        # (document frequency) - количество пользователей на айтем
        item_interactions = df.group_by(item_col).agg(pl.count().alias("df"))
        # idf 
        n_users = df[user_col].n_unique()
        
        item_interactions = item_interactions.with_columns(
            idf = (1 + (n_users - pl.col("df") + 0.5) / (pl.col("df") + 0.5)).log()
        )
        
        avg_dl = user_interactions["dl"].mean()  
        interactions_with_stats = df.join(user_interactions, on=user_col, how="left")
        interactions_with_stats = interactions_with_stats.join(item_interactions, on=item_col, how="left")
        
        tf = 1.0 
        interactions_with_weights = interactions_with_stats.with_columns(
            bm25_weight = pl.col("idf") * (tf * (self.k1 + 1)) / 
                         (tf + self.k1 * (1 - self.b + self.b * pl.col("dl") / avg_dl))
        )
        rows = [self.user_to_idx.get(u) for u in interactions_with_weights[user_col]]
        cols = [self.item_to_idx.get(i) for i in interactions_with_weights[item_col]]
        valid_indices = [(r, c, w) for r, c, w in zip(rows, cols, interactions_with_weights["bm25_weight"]) if r is not None and c is not None]
        rows, cols, weights = zip(*valid_indices)
        return csr_matrix(
            (weights, (rows, cols)),
            shape=(len(self.user_to_idx), len(self.item_to_idx))
        )

    def fit(self, df: pl.DataFrame, items_df: pl.DataFrame, user_id_col: str = "user_id", item_id_col: str = "item_id") -> None:
        users = df[user_id_col].unique().to_list()
        items = items_df[item_id_col].unique().to_list()
        
        self.user_to_idx = {u: i for i, u in enumerate(users)}
        self.item_to_idx = {it: j for j, it in enumerate(items)}
        self.idx_to_item = {j: it for it, j in self.item_to_idx.items()}

        rows = [self.user_to_idx[u] for u in df[user_id_col] if u in self.user_to_idx]
        cols = [self.item_to_idx[i] for i in df[item_id_col] if i in self.item_to_idx]
        self.user_item_matrix = csr_matrix((np.ones(len(rows)), (rows, cols)), shape=(len(users), len(items)))

        self.popular_items = (
            df.group_by(item_id_col).agg(pl.count())
            .sort("count", descending=True)
            .head(100)[item_id_col].to_list()
        )


        weighted_user_item_matrix = self._calculate_bm25_weights(df, user_id_col, item_id_col)
        item_matrix_bm25 = normalize(weighted_user_item_matrix.T, norm="l2", axis=1)
        knn_model = NearestNeighbors(
            n_neighbors=min(self.n_neighbors, item_matrix_bm25.shape[0]),
            metric="cosine", algorithm="brute"
        )
        knn_model.fit(item_matrix_bm25)
        dists, neigh = knn_model.kneighbors(item_matrix_bm25, n_neighbors=self.n_neighbors)
        self.item_neighbors = neigh
        self.item_neighbor_sims = 1.0 - dists
        self.trained = True

    def predict(self, df: pl.DataFrame, topn: int = 10) -> list[np.ndarray]:
        assert self.trained, "Model must be trained before calling predict."
        
        predictions = []
        for user_id in tqdm(df["user_id"].to_list(), desc="Predicting"):
            recs = []
            
            if user_id in self.user_to_idx:
                user_idx = self.user_to_idx[user_id]
                user_items_indices = self.user_item_matrix[user_idx].indices
                
                if len(user_items_indices) > 0:
                    candidate_scores = np.zeros(len(self.item_to_idx))
                    
                    for item_idx in user_items_indices:
                        neighbors = self.item_neighbors[item_idx]
                        similarities = self.item_neighbor_sims[item_idx]
                        candidate_scores[neighbors] += similarities
                    
                    candidate_scores[user_items_indices] = -1
                    top_indices = np.argpartition(-candidate_scores, topn)[:topn]
                    top_indices = top_indices[np.argsort(-candidate_scores[top_indices])]
                    recs = [self.idx_to_item[idx] for idx in top_indices]
            
            if len(recs) < topn:
                unseen_popular = [item for item in self.popular_items if item not in recs]
                recs.extend(unseen_popular)

            predictions.append(np.array(recs[:topn], dtype=np.int64))
        
        return predictions

In [21]:
bm25_knn = BM25ItemKNNRecommender(n_neighbors=200)
bm25_knn.fit(train, books)

test = test.with_columns(
    bm25_recs=pl.Series(bm25_knn.predict(test, topn=10))
)

(Deprecated in version 0.20.5)
  df.group_by(item_id_col).agg(pl.count())
(Deprecated in version 0.20.5)
  user_interactions = df.group_by(user_col).agg(pl.count().alias("dl"))
(Deprecated in version 0.20.5)
  item_interactions = df.group_by(item_col).agg(pl.count().alias("df"))
Predicting: 100%|██████████| 185828/185828 [00:43<00:00, 4255.94it/s]


In [30]:
print(test.head())

evaluate_recommender_modified(df=test, model_preds_col="bm25_recs")

shape: (5, 4)
┌────────────────────────┬───────────────────────┬────────────────────────┬────────────────────────┐
│ user_id                ┆ item_id               ┆ toppopular_recs        ┆ bm25_recs              │
│ ---                    ┆ ---                   ┆ ---                    ┆ ---                    │
│ str                    ┆ list[i64]             ┆ array[i64, 10]         ┆ array[i64, 10]         │
╞════════════════════════╪═══════════════════════╪════════════════════════╪════════════════════════╡
│ 00000377eea48021d30027 ┆ [13252]               ┆ [4058, 15514, … 13159] ┆ [12599, 118, … 27745]  │
│ 30d56aca…              ┆                       ┆                        ┆                        │
│ 00009ab2ed8cbfceda5a59 ┆ [2328]                ┆ [4058, 15514, … 13159] ┆ [20150, 32516, …       │
│ da409663…              ┆                       ┆                        ┆ 17509]                 │
│ 00009e46d18f223a82b22d ┆ [28636, 30197]        ┆ [4058, 15514, … 13159] ┆ [

{'ndcg@10': 0.09,
 'recall@10': 0.072,
 'map@10': 0.049,
 'serendipity@10': 0.081,
 'coverage': 0.615}

In [29]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["bm25_recs"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
1274,"City of Lost Souls (The Mortal Instruments, #5)","What price is too high to pay, even for love? Plunge into fifth installment in the internationally bestselling Mortal Instruments series and ""prepare to be hooked"" (Entertainment Weekly)--now with a gorgeous new cover, a map, a new foreword, and exclusive bonus content! City of Lost Souls is a Shadowhunters novel. When Jace and Clary meet again, Clary is horrified to discover that the demon Lilith's magic has bound her beloved Jace together with her evil brother Sebastian, and that Jace has become a servant of evil. The Clave is out to destroy Sebastian, but there is no way to harm one boy without destroying the other. As Alec, Magnus, Simon, and Isabelle wheedle and bargain with Seelies, demons, and the merciless Iron Sisters to try to save Jace, Clary plays a dangerous game of her own. The price of losing is not just her own life, but Jace's soul. She's willing to do anything for Jace, but can she still trust him? Or is he truly lost? Love. Blood. Betrayal. Revenge. Darkness threatens to claim the Shadowhunters in the harrowing fifth book of the Mortal Instruments series.",
8039,"Fairest (The Lunar Chronicles, #3.5)","In this stunning bridge book between Cressand Winterin the bestselling Lunar Chronicles, Queen Levana's story is finally told. Mirror, mirror on the wall, Who is the fairest of them all? Fans of the Lunar Chronicles know Queen Levana as a ruler who uses her ""glamour"" to gain power. But long before she crossed paths with Cinder, Scarlet, and Cress, Levana lived a very different story - a story that has never been told . . . until now. Marissa Meyer spins yet another unforgettable tale about love and war, deceit and death. This extraordinary book includes full-color art and an excerpt from Winter, the next book in the Lunar Chronicles series.",
9722,City of Ashes (The Mortal Instruments #2),"Is love worth betraying everything? Plunge into the second adventure in the internationally bestselling Mortal Instruments series and ""prepare to be hooked"" (Entertainment Weekly). Clary Fray just wishes that her life would go back to normal. But what's normal when you're a demon-slaying Shadowhunter, your mother is in a magically induced coma, and you can suddenly see Downworlders like werewolves, vampires, and faeries? If Clary left the world of the Shadowhunters behind, it would mean more time with her best friend, Simon, who's becoming more than a friend. But the Shadowhunting world isn't ready to let her go--especially her handsome, infuriating, newfound brother, Jace. And Clary's only chance to help her mother is to track down rogue Shadowhunter Valentine, who is probably insane, certainly evil--and also her father. To complicate matters, someone in New York City is murdering Downworlder children. Is Valentine behind the killings--and if he is, what is he trying to do? When the second of the Mortal Instruments, the Soul-Sword, is stolen, the terrifying Inquisitor arrives to investigate and zooms right in on Jace. How can Clary stop Valentine if Jace is willing to betray everything he believes in to help their father? In this breathtaking sequel to City of Bones, Cassandra Clare lures her readers back into the dark grip of New York City's Downworld, where love is never safe, and power becomes the deadliest temptation.",
9767,"Clockwork Princess (The Infernal Devices, #3)","Danger and betrayal, love and loss, secrets and enchantment are woven together in the breathtaking finale to the #1 New York Times bestselling Infernal Devices Trilogy, prequel to the internationally bestselling Mortal Instruments series. THE INFERNAL DEVICES WILL NEVER STOP COMING A net of shadows begins to tighten around the Shadowhunters of the London Institute. Mortmain plans to use his Infernal Devices, an army of pitiless automatons, to destroy the Shadowhunters. He needs only one last item to complete his plan: he needs Tessa Gray. Charlotte Branwell, head of the London Institute, is desperate to find Mortmain before he strikes. But when Mortmain abducts Tessa, the boys who lay equal claim to her heart, Jem and Will, will do anything to save her. For though Tessa and Jem are now engaged, Will is as much in love with her as ever. As those who love Tessa rally to rescue her from Mortmain's clutches, Tessa realizes that the only person who can save her is herself. But can a single girl, even one who can command the power of angels, face down an entire army? Danger and betrayal, secrets and enchantment, and the tangled threads of love and loss intertwine as the Shadowhunters are pushed to the very brink of destruction in the breathtaking conclusion to the Infernal Devices trilogy.",
12599,"Clockwork Angel (The Infernal Devices, #1)","This is an alternate cover edition for . Magic is dangerous--but love is more dangerous still. When sixteen-year-old Tessa Gray crosses the ocean to find her brother, her destination is England, the time is the reign of Queen Victoria, and something terrifying is waiting for her in London's Downworld, where vampires, warlocks and other supernatural folk stalk the gaslit streets. Only the Shadowhunters, warriors dedicated to ridding the world of demons, keep order amidst the chaos. Kidnapped by the mysterious Dark Sisters, members of a secret organization called The Pandemonium Club, Tessa soon learns that she herself is a Downworlder with a rare ability: the power to transform, at will, into another person. What's more, the Magister, the shadowy figure who runs the Club, will stop at nothing to claim Tessa's power for his own. Friendless and hunted, Tessa takes refuge with the Shadowhunters of the London Institute, who swear to find her brother if she will use her power to help them. She soon finds herself fascinated by--and torn between--two best friends: James, whose fragile beauty hides a deadly secret, and blue-eyed Will, whose caustic wit and volatile moods keep everyone in his life at arm's length...everyone, that is, but Tessa. As their search draws them deep into the heart of an arcane plot that threatens to destroy the Shadowhunters, Tessa realizes that she may need to choose between saving her brother and helping her new friends save the world...and that love may be the most dangerous magic of all.",
16739,"Clockwork Prince (The Infernal Devices, #2)","In the magical underworld of Victorian London, Tessa Gray has at last found safety with the Shadowhunters. But that safety proves fleeting when rogue forces in the Clave plot to see her protector, Charlotte, replaced as head of the Institute. If Charlotte loses her position, Tessa will be out on the street--and easy prey for the mysterious Magister, who wants to use Tessa's powers for his own dark ends. With the help of the handsome, self-destructive Will and the fiercely devoted Jem, Tessa discovers that the Magister's war on the Shadowhunters is deeply personal. He blames them for a long-ago tragedy that shattered his life. To unravel the secrets of the past, the trio journeys from mist-shrouded Yorkshire to a manor house that holds untold horrors, from the slums of London to an enchanted ballroom where Tessa discovers that the truth of her parentage is more sinister than she had imagined. When they encounter a clockwork demon bearing a warning for Will, they realize that the Magister himself knows their every move--and that one of their own has betrayed them. Tessa finds her heart drawn more and more to Jem, though her longing for Will, despite his dark moods, continues to unsettle her. But something is changing in Will--the wall he has built around himself is crumbling. Could finding the Magister free Will from his secrets and give Tessa the answers about who she is and what she was born to do? As their dangerous search for the Magister and the truth leads the friends into peril, Tessa learns that when love and lies are mixed, they can corrupt even the purest heart. An unabridged recording on 13 CDs (15 hours, 30 minutes).",
23460,"Obsidian (Lux, #1)","From Jennifer L. Armentrout, author of the Covenantseries, comes the unputdownable first novel in the New York Timesbestselling Luxseries. Aliens are the new vampires, and sexy Daemon Black will set your pulse racing... STARTING OVER SUCKS When we moved to West Virginia right before my senior year, I'd pretty much resigned myself to thick accents, dodgy internet access, and a whole lot of boring... until I spotted my hot neighbor, with his looming height and eerie green eyes. Things were looking up. AND THEN HE OPENED HIS MOUTH Daemon is infuriating. Arrogant. Stab-worthy. We do not get along. At all. But when a stranger attacks me and Daemon literally freezes time with a wave of his hand, well, something... unexpected happens. THE HOT ALIEN LIVING NEXT DOOR MARKS ME You heard me. Alien. Turns out Daemon and his sister have a galaxy of enemies wanting to steal their abilities, and Daemon's touch has me lit up like the Vegas Strip. The only way I'm getting out of this alive is by sticking close to Daemon until my alien mojo fades. IF I DON'T KILL HIM FIRST, THAT IS",
28025,"The Assassin's Blade (Throne of Glass, #0.1-0.5)","Discover where Celaena Sardothien's thrilling saga began Celaena Sardothien is her kingdom's most feared assassin. Though she works for the powerful and ruthless Assassin's Guild, Celaena yields to no one and trusts only her fellow killer-for-hire, Sam. When Celaena's scheming master, Arobynn Hamel, dispatches her on missions that take her from remote islands to hostile deserts, she finds herself acting independently of his wishes--and questioning her own allegiance. Along the way, she makes friends and enemies alike, and discovers that she feels far more for Sam than just friendship. But by defying Arobynn's orders, Celaena risks unimaginable punishment, and with Sam by her side, he is in danger, too. They will have to risk it all if they hope to escape Arobynn's clutches--and if they fail, they'll lose not just a chance at freedom, but their lives... A prequel to Throne of Glass, this collection of five novellas offers listeners a deeper look into the history of this cunning assassin and her enthralling--and deadly--world. Included in this volume: The Assassin and the Pirate Lord The Assassin and the Healer The Assassin and the Desert The Assassin and the Underworld The Assassin and the Empire",
30197,"Red Queen (Red Queen, #1)","Mare Barrow's world is divided by blood--those with red and those with silver. Mare and her family are lowly Reds, destined to serve the Silver elite whose supernatural abilities make them nearly gods. Mare steals what she can to help her family survive, but when her best friend is conscripted into the army she gambles everything to win his freedom. A twist of fate leads her to the royal palace itself, where, in front of the king and all his nobles, she discovers a power of her own--an ability she didn't know she had. Except . . . her blood is Red. To hide this impossibility, the king forces her into the role of a lost Silver princess and betroths her to one of his own sons. As Mare is drawn further into the Silver world, she risks her new position to aid the Scarlet Guard--the leaders of a Red rebellion. Her actions put into motion a deadly and violent dance, pitting prince against prince--and Mare against her own heart. From debut author Victoria Aveyard comes a lush, vivid fantasy series where loyalty and desire can tear you apart and the only certainty is betrayal.",
31381,"Shatter Me (Shatter Me, #1)","This is an alternate cover edition for ASIN B00526VVN2. I have a curse I have a gift I am a monster I'm more than human My touch is lethal My touch is power I am their weapon I will fight back Juliette hasn't touched anyone in exactly 264 days. The last time she did, it was an accident, but The Reestablishment locked her up for murder. No one knows why Juliette's touch is fatal. As long as she doesn't hurt anyone else, no one really cares. The world is too busy crumbling to pieces to pay attention to a 17-year-old girl. Diseases are destroying the population, food is hard to find, birds don't fly anymore, and the clouds are the wrong color. The Reestablishment said their way was the only way to fix things, so they threw Juliette in a cell. Now so many people are dead that the survivors are whispering war - and The Reestablishment has changed its mind. Maybe Juliette is more than a tortured soul stuffed into a poisonous body. Maybe she's exactly what they need right now. Juliette has to make a choice: Be a weapon. Or be a warrior.",


In [14]:
HTML(dataframe_to_html(books.filter(pl.col("item_id").is_in(test.sample(1)["bm25_recs"][0].to_list())), ["item_id", "title", "description", "image_url"]))

item_id,title,description,image_url
3423,The Boy in the Black Suit,"A 2016 Coretta Scott King Author Honor book. Just when seventeen-year-old Matt thinks he can't handle one more piece of terrible news, he meets a girl who's dealt with a lot more--and who just might be able to clue him in on how to rise up when life keeps knocking him down--in this ""vivid, satisfying, and ultimately upbeat tale of grief, redemption, and grace"" (Kirkus Reviews) from the Coretta Scott King - John Steptoe Award-winning author of When I Was the Greatest. Matt wears a black suit every day. No, not because his mom died--although she did, and it sucks. But he wears the suit for his gig at the local funeral home, which pays way better than the Cluck Bucket, and he needs the income since his dad can't handle the bills (or anything, really) on his own. So while Dad's snagging bottles of whiskey, Matt's snagging fifteen bucks an hour. Not bad. But everything else? Not good. Then Matt meets Lovey. Crazy name, and she's been through more crazy stuff than he can imagine. Yet Lovey never cries. She's tough. Really tough. Tough in the way Matt wishes he could be. Which is maybe why he's drawn to her, and definitely why he can't seem to shake her. Because there's nothing more hopeful than finding a person who understands your loneliness--and who can maybe even help take it away.",
7209,American Street,"American Street is an evocative and powerful coming-of-age story perfect for fans of Everything, Everything; Bone Gap; and All American Boys. In this stunning debut novel, Pushcart-nominated author Ibi Zoboi draws on her own experience as a young Haitian immigrant, infusing this lyrical exploration of America with magical realism and vodouculture. On the corner of American Street and Joy Road, Fabiola Toussaint thought she would finally find une belle vie--a good life. But after they leave Port-au-Prince, Haiti, Fabiola's mother is detained by U.S. immigration, leaving Fabiola to navigate her loud American cousins, Chantal, Donna, and Princess; the grittiness of Detroit's west side; a new school; and a surprising romance, all on her own. Just as she finds her footing in this strange new world, a dangerous proposition presents itself, and Fabiola soon realizes that freedom comes at a cost. Trapped at the crossroads of an impossible choice, will she pay the price for the American dream?",
10547,Goodbye Stranger,"Masterly. . . . Sensitively explores togetherness, aloneness, betrayal and love. The New York Times Book Review 2016 Honor Book in Fiction, Boston Globe Horn Book Award A NYT Editors' Choice and NYT Notable Children's Books of 2015 This brilliant novel by Newbery Medal winner Rebecca Stead explores multiple perspectives on the bonds and limits of friendship. Bridge is an accident survivor who s wondering why she s still alive. Emily has new curves and an almost-boyfriend who wants a certain kind of picture. Tabitha sees through everybody s games or so she tells the world. The three girls are best friends with one rule: No fighting.Can it get them through seventh grade? This year everything is different for Sherm Russo as he gets to know Bridge Barsamian. What does it mean to fall for a girl as a friend? On Valentine s Day, an unnamed high school girl struggles with a betrayal. How long can she hide in plain sight? Each memorable character navigates the challenges of love and change in this captivating novel. Praise for Goodbye Stranger Six Starred Reviews Publishers WeeklyBest Books of 2015 School Library Journal Best Books of 2015 BooklistEditor s Choice 2015 The Horn Books Fanfare: Best Books of 2015 The Washington PostBest Books of 2015 The New York TimesNotable Children s Books of 2015 This astonishingly profound novel is not your average middle-school friendship tale. The Horn Book, Starred Stead shows how strongly love of all kinds can smooth the juddering path toward adulthood. Winsome, bighearted, and altogether rewarding. Booklist, Starred [Stead] captures the stomach-churning moments of a misstep or an unplanned betrayal and reworks these events with grace, humor, and polish into possibilities for kindness and redemption. Superb. Kirkus Reviews, Starred This memorable story about female friendships, silly bets, different kinds of love, and bad decisions is authentic in detail and emotion. Publishers Weekly, Starred Filled with humor [and] delightful coincidences. . . . An immensely satisfying addition for Stead s many fans. School Library Journal, Starred ""The author as usual deftly interweaves her plot strands into an organic whole, and between the multifocal plot and the exploration on growth and self-recognition. . .deeply explores mistakes, and forgiveness, and growing away from people as well as toward them."" The Bulletin, Starred This eloquent story of friendship, first love, and identity will resonate powerfully with readers. VOYA, Perfect Ten ""Stead raises questions about whether a relationship can survive change. If someone makes a mistake, can you forgive the person, if not the act? Can two people reconcile, if they are both willing to process what happened? Or is the change more systemic--has one of you become a stranger? . . . It's a question all of the characters ask themselves at some point in Stead's perfectly synchronized novel."" Shelf Awareness From the Hardcover edition.""",
11107,"Shadowshaper (Shadowshaper, #1)","Sierra Santiago was looking forward to a fun summer of making art, hanging out with her friends, and skating around Brooklyn. But then a weird zombie guy crashes the first party of the season. Sierra's near-comatose abuelo begins to say ""Lo siento"" over and over. And when the graffiti murals in Bed-Stuy start to weep.... Well, something stranger than the usual New York mayhem is going on. Sierra soon discovers a supernatural order called the Shadowshapers, who connect with spirits via paintings, music, and stories. Her grandfather once shared the order's secrets with an anthropologist, Dr. Jonathan Wick, who turned the Caribbean magic to his own foul ends. Now Wick wants to become the ultimate Shadowshaper by killing all the others, one by one. With the help of her friends and the hot graffiti artist Robbie, Sierra must dodge Wick's supernatural creations, harness her own Shadowshaping abilities, and save her family's past, present, and future.",
13307,Behind the Mountains,"First Person Fiction is dedicated to the immigrant experience in modern America. In ""Behind the Mountains"" Edwidge Danticat tells the story of Celiane and her family's struggles in Haiti and New York. It is election time in Haiti, and bombs are going off in the capital city of Port-au-Prince. During a visit from her home in rural Haiti, Celiane Esperance and her mother are nearly killed. Looking at her country with new eyes, Celiane gains a fresh resolve to be reunited with her father in Brooklyn, New York. The harsh winter and concrete landscape of her new home are a shock to Celiane, who witnesses her parents' struggle to earn a living, her brother's uneasy adjustment to American society, and her own encounters with learning difficulties and school violence.",
18309,Toning The Sweep,"Angela Johnson's Coretta Scott King Award winning novel that traces three generations of African American women as they learn one another's truths. Three generations of African American women, each holding on to a separate truth. Their story -- encompassing racism and murder as well as the family commonplaces that make a life -- is one that readers will never forget.",
21444,All American Boys,"In an unforgettable new novel from award-winning authors Jason Reynolds and Brendan Kiely, two teens--one black, one white--grapple with the repercussions of a single violent act that leaves their school, their community, and, ultimately, the country bitterly divided by racial tension. A bag of chips. That's all sixteen-year-old Rashad is looking for at the corner bodega. What he finds instead is a fist-happy cop, Paul Galluzzo, who mistakes Rashad for a shoplifter, mistakes Rashad's pleadings that he's stolen nothing for belligerence, mistakes Rashad's resistance to leave the bodega as resisting arrest, mistakes Rashad's every flinch at every punch the cop throws as further resistance and refusal to STAY STILL as ordered. But how can you stay still when someone is pounding your face into the concrete pavement? But there were witnesses: Quinn Collins--a varsity basketball player and Rashad's classmate who has been raised by Paul since his own father died in Afghanistan--and a video camera. Soon the beating is all over the news and Paul is getting threatened with accusations of prejudice and racial brutality. Quinn refuses to believe that the man who has basically been his savior could possibly be guilty. But then Rashad is absent. And absent again. And again. And the basketball team--half of whom are Rashad's best friends--start to take sides. As does the school. And the town. Simmering tensions threaten to explode as Rashad and Quinn are forced to face decisions and consequences they had never considered before. Written in tandem by two award-winning authors, this tour de force shares the alternating perspectives of Rashad and Quinn as the complications from that single violent moment, the type taken from the headlines, unfold and reverberate to highlight an unwelcome truth.",
23262,Coverup,"""Quick and fun with a satisfying resolution."" VOYA All Brad remembers about that party was that his friend Alden Whitlock drove him home. He also has the nagging feeling that while he was sleeping in the passenger seat, Alden hit something--or someone. Alden and his father, a powerful judge, deny that anything took place. Brad isn't so sure--especially when beautiful Ellen Hanson comes searching in town for her lost father. Suddenly Brad is forced to examine his cushioned world of privilege and to choose between the truth and deceit. Either way, Brad could lose....",
28970,The Lesser Blessed,"A fresh, funny look at growing up Native in the North, by award-winning author Richard Van Camp. Larry is a Dogrib Indian growing up in the small northern town of Fort Simmer. His tongue, his hallucinations and his fantasies are hotter than the sun. At sixteen, he loves Iron Maiden, the North and Juliet Hope, the high school ""tramp."" When Johnny Beck, a Metis from Hay River, moves to town, Larry is ready for almost anything. In this powerful and often very funny first novel, Richard Van Camp gives us one of the most original teenage characters in fiction. Skinny as spaghetti, nervy and self-deprecating, Larry is an appealing mixture of bravado and vulnerability. His past holds many terrors: an abusive father, blackouts from sniffing gasoline, an accident that killed several of his cousins. But through his friendship with Johnny, he's ready now to face his memories--and his future. Marking the debut of an exciting new writer, The Lesser Blessed is an eye-opening depiction of what it is to be a young Native man in the age of AIDS, disillusionment with Catholicism and a growing world consciousness. A coming-of-age story that any fan of The Catcher in the Rye will enjoy.",
31633,Gangs,"A Booklist Editors' Choice and a Society of School Librarians International (SSLI) Honor Book Street gangs have exploded worldwide. Tattoos, baggy pants, tagging, gangsta style, the unspoken threat -- it's all just around the corner in most of the world's major cities. From the streets of Los Angeles to the shantytowns of Cape Town, hundreds of thousands of ""at risk"" youth are deciding whether they should join their local gang. Violence, guns, the drug trade, racism, poverty, families under pressure and ever-widening slums all provide a witch's brew in which the youth gang tempts young males and females with a sense of identity and belonging that their world has denied them. Gangsexposes the roots of the problem as it moves from the banlieues of France to the favelas of Brazil. It offers a startling analysis of the complicity of the official adult world and some controversial ideas for reforms that might just undermine the appeal of gang life. For many of the world's young -- especially those who are poor -- joining a gang is a real career choice. It is a choice that can be as deadly for young gangsters as for their victims. Richard Swift shows us that we fail to understand gangs at our peril.",


Судя и по метрикам, и по самим рекомендациям, модель действительно получилась хорошая - высокий coverage сочетается с наилучшими полученными значениями и по остальным метрикам среди всех моделей. С применением веса, учитывающего популярность книги, получили желаемый результат - не испортив (и даже улучшив) рекомендации в целом, также рекомендуем и холодные айтемы, как и айтемы с небольшими взаимодействиями

## 5: Идея работы с эмбеддингами

Поработаем с эмбеддингами, потому что они тоже могут сильно влиять на качесто: рассмотрим каждое поле описания книги отдельно, для названий и описаний используем tf-idf, затем объединяем в матрицу и делаем LSA (снижаем размерность до 128)

In [31]:
import re
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from scipy.sparse import hstack
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import normalize

books_for_embedding = books.with_columns(
    authors_str = pl.col("authors").list.eval(
        pl.lit("author_") + pl.element().struct.field(books.schema['authors'].inner.fields[0].name)
    ).list.join(" ").fill_null(""),
    
    tags_str = pl.col("tags").list.eval(
        pl.lit("tag_") + pl.element().str.replace_all("-", "_", literal=True).str.replace_all(" ", "_", literal=True)
    ).list.join(" ").fill_null("")
)

title_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english', ngram_range=(1, 2))
title_vecs = title_vectorizer.fit_transform(books_for_embedding['title'].fill_null(""))

desc_vectorizer = TfidfVectorizer(max_features=15000, stop_words='english')
desc_vecs = desc_vectorizer.fit_transform(books_for_embedding['description'].fill_null(""))

categorical_text = books_for_embedding.select(
    pl.concat_str(["authors_str", "tags_str"], separator=" ")
).to_series()
cat_vectorizer = CountVectorizer(max_features=10000, token_pattern=r'[\w_]+')
cat_vecs = cat_vectorizer.fit_transform(categorical_text)

combined_sparse_matrix = hstack([title_vecs, desc_vecs, cat_vecs]).tocsr()

n_embedding_dims = 128
lsa = TruncatedSVD(n_components=n_embedding_dims, random_state=42)
dense_embeddings_raw = lsa.fit_transform(combined_sparse_matrix)

normalized_embeddings = normalize(dense_embeddings_raw, norm='l2', axis=1)

books = books.with_columns(
    pl.Series("embedding_new", normalized_embeddings.tolist())
)

display(books.head())

item_id,series,tags,title,description,url,image_url,authors,embedding_new
i64,list[str],list[str],str,str,str,str,list[struct[2]],list[f64]
0,[null],"[""e-book"", ""young-adult"", … ""y-a""]","""Hallie Hath No Fury . . .""","""There are two sides to every s…","""https://www.goodreads.com/book…","""https://images.gr-assets.com/b…","[{""1879494"",""""}]","[0.550546, 0.033266, … -0.019082]"
1,"[""149079""]","[""primary"", ""melissa-j--morgan"", … ""fiction""]","""Hide and Shriek: Super Special…","""The girls go on an overnight a…","""https://www.goodreads.com/book…","""https://s.gr-assets.com/assets…","[{""21740"",""""}]","[0.585932, 0.176821, … -0.004595]"
2,[null],"[""friendship"", ""middle-reader"", … ""my-library""]","""Dear Mom, You're Ruining My Li…","""Samantha Slayton worries about…","""https://www.goodreads.com/book…","""https://s.gr-assets.com/assets…","[{""18946"",""""}]","[0.5157, 0.278442, … -0.002641]"
3,"[""151088""]","[""summer-2017"", ""bullying"", … ""re-read""]","""Bratfest at Tiffany's (Clique …","""Massie Block: The Briarwood bo…","""https://www.goodreads.com/book…","""https://images.gr-assets.com/b…","[{""4605"",""""}]","[0.638572, 0.326413, … -0.04838]"
4,"[""812067""]","[""rosemary-vernon"", ""young-adult"", … ""to-read""]","""Questions of Love (Sweet Dream…","""When Sammi Edwards is chosen t…","""https://www.goodreads.com/book…","""https://images.gr-assets.com/b…","[{""792676"",""""}]","[0.324899, 0.076244, … -0.013045]"


## Что еще можно сделать, чтобы улучшить рекомендации?

1. Улучшить признаки, больше использовав метаданные книг, например

    - Семантические признаки из текста: использовать языковые модели (например, на основе BERT) для получения эмбеддингов для описаний и названий книг

    - Графовые признаки: построить граф, где узлами являются пользователи, книги, авторы и серии. Использовать, например, Node2Vec, чтобы получить графовые эмбеддинги - получится найти более сложные зависимости, возможно

    - Временные признаки: учесть динамику интересов пользователя и старение популярности книг, то есть как-то еще использовать date_added

2. Архитектура моделей

    - Нейтросетевые подходы 
    - Графовые нейронные сети (GNN) - использовать граф взаимодействий, возможно, позволило бы получить очень высокие результаты

3. Гибридные модели/ансамбли: даже исходя из имеющихся результатов, хорошо видно, что при объединении различных моделей их сильные стороны могут дать хороший результат

    - Переранжирование (Reranking): сгенерировать кандидатов, используя эффективную модель (например, BM25ItemKNNRecommender) для генерации широкого списка релевантных кандидатов. А затем обучить несколько моделей, и возможно, ансамблировать 