In [None]:
import ast

import numpy as np
import polars as pl
from scipy.sparse import csr_matrix


In [None]:
df = pl.read_csv("../povarenok/povarenok_recipes_2021_06_16.csv")

df = df.with_columns(
    pl.col("ingredients")
    .map_elements(
        lambda x: list(ast.literal_eval(x).keys()), return_dtype=pl.List(pl.Utf8)
    )
    .alias("ingredients")
)

df = df.filter(
    pl.col("ingredients").is_not_null() & (pl.col("ingredients").list.len() > 0)
)

In [None]:
df_exploded = df.explode("ingredients")

df_exploded_wo_url = df_exploded.drop("url")


In [None]:
unique_names = df_exploded_wo_url.select("name").unique().sort("name")
unique_names = unique_names.with_columns(pl.arange(0, pl.len()).alias("name_id"))

# Join back to original dataframe
df = df_exploded_wo_url.join(unique_names, on="name", how="left")

unique_ingridients = df.select("ingredients").unique()
unique_ingridients = unique_ingridients.with_columns(
    pl.arange(0, pl.len()).alias("ingridient_id")
)
df = df.join(unique_ingridients, on="ingredients", how="left")

In [None]:
class BaseItemRecommender:
    """Base class for item-to-item recommenders (ingredient-based)"""

    def __init__(
        self,
        interactions: pl.DataFrame,
        item_col: str = "name",
        feature_col: str = "ingredient",
    ):
        self.item_col = item_col
        self.feature_col = feature_col

        # Create mappings
        self.id2item = np.array(interactions[item_col].unique())
        self.id2feature = np.array(interactions[feature_col].unique())

        self.item2id = {item: idx for idx, item in enumerate(self.id2item)}
        self.feature2id = {feat: idx for idx, feat in enumerate(self.id2feature)}

        # Build item-feature matrix (recipes × ingredients)
        rows = [self.item2id[i] for i in interactions[item_col]]
        cols = [self.feature2id[i] for i in interactions[feature_col]]
        data = [1] * len(rows)

        # Matrix: Items × Features
        self.interactions_matrix = csr_matrix(
            (data, (rows, cols)),
            shape=(len(self.id2item), len(self.id2feature)),
            dtype=np.float32,
        )

        # Transpose: Features × Items
        self.interactions_matrix_t = self.interactions_matrix.T

        # Non-zero counts
        self.row_lengths = np.array(
            self.interactions_matrix.getnnz(axis=1)
        ).ravel()  # ingredients per item
        self.col_lengths = np.array(
            self.interactions_matrix.getnnz(axis=0)
        ).ravel()  # items per ingredient

    def _get_item_features(self, item_idx: int) -> set[int]:
        """Get features (ingredients) for an item"""
        return set(self.id2feature[self.interactions_matrix[item_idx].indices])

    def find_similar_items(self, item_id: str, topn: int = 10) -> list[str]:
        raise NotImplementedError

    def recommend_batch(
        self, item_ids: list[str], topn: int = 10
    ) -> dict[str, list[str]]:
        raise NotImplementedError

In [None]:
base = BaseItemRecommender(df)
base.item_ingredient_matrix.toarray()
