In [None]:
from __future__ import annotations

from itertools import islice
from typing import Callable, Dict, Hashable, Literal, Sequence, Tuple

import numpy as np
from numpy.typing import NDArray

In [None]:
import annoy

ModuleNotFoundError: ignored

In [None]:
from __future__ import annotations

from itertools import islice
from typing import Callable, Dict, Hashable, Literal, Sequence, Tuple

import numpy as np
from numpy.typing import NDArray


class AnnoyIndex:
    def __init__(self, metric):
        self.metric = metric

    def compute_distances(self, X_train, X_test):
        dists = np.zeros(X_train.shape[0])

        if self.metric == "euclidian":
            for i in range(dists.shape[0]):
                vec = X_test - X_train[i]
                dists[i] = np.linalg.norm(vec, ord=2)

        elif self.metric == "frobenius":
            for i in range(dists.shape[0]):
                vec = X_test - X_train[i]
                dists[i] = np.linalg.norm(vec, ord=1)

        elif self.metric == "chebyshev":
            for i in range(dists.shape[0]):
                vec = X_test - X_train[i]
                dists[i] = np.linalg.norm(vec, ord=np.inf)

        elif self.metric == "cosine":
            for i in range(dists.shape[0]):
              vec = X_test - X_train[i]
              dists[i] = vec/np.linalg.norm(vec)

        elif self.metric == "ip":              # inner product
            for i in range(dists.shape[0]):
              dists[i] = np.dot(X_test, X_train.T)

        return dists


class AnnoyRecommender(AnnoyIndex):

    def __init__(
        self,
        item_vectors: NDArray[np.float32],
        user_vectors: NDArray[np.float32],
        user_id_user_index_id_mapping: Dict[Hashable, int],
        item_id_item_index_id_mapping: Dict[Hashable, int],
        item_mappings_remap: Dict[Hashable, int],
        top_k: int,
        dim: int,
        sim_function: Callable[[np.ndarray, np.ndarray], np.ndarray],
        metric='euclidian',
        n_trees: int = 10,
        n_jobs: int = -1,
        search_k: int = -1,
        n_neighbors: int = 500,
    ) -> None:
        super().__init__(metric)
        self.item_vectors = item_vectors
        self.user_vectors = user_vectors
        self.uid_uiid_mapping = user_id_user_index_id_mapping # users
        self.iid_iiid_mapping = item_id_item_index_id_mapping # items
        self.uiid_uid_mapping = {v: k for k, v in user_id_user_index_id_mapping.items()}
        self.iiid_iid_mapping = {v: k for k, v in item_id_item_index_id_mapping.items()}
        self.top_k = top_k
        self.dim = dim
        self.sim_function = sim_function
        self.metric = metric
        self.n_trees = n_trees
        self.n_jobs = n_jobs
        self.search_k = search_k
        self.n_neighbors = n_neighbors
        self.item_mappings_remap = item_mappings_remap

    def fit(self, X_test, k):

        X_test = np.array(self.item_vectors[self.iid_iiid_mapping[X_test]])
        X_train = self.user_vectors

        dists = self.compute_distances(X_train, X_test)

        inds = np.zeros(k, dtype=int)
        inds[:] = np.argsort(dists)[:k]  # индексы самых похожих

        return inds

    def recommend_single_user(self, user_id: Hashable, k: int):
        external_items_id = self.fit(user_id, k)
        return [self.item_mappings_remap[external_items_id[i]] for i in range(len(external_items_id))]
    
    def recommend_bruteforce_single_user(self, user_id: Hashable, item_whitelist: Sequence[Hashable]) -> Sequence[Hashable]:
        internal_uid, internal_item_whitelist = self._external_inputs_to_internal_item(user_id)
        if len(item_whitelist) == 0:
            internal_item_whitelist = list(self.iid_iiid_mapping.values())
        user_vector = self.user_vectors[internal_uid, :].reshape(1, -1)
        closest = self.sim_function(user_vector, self.item_vectors)
        closest = np.argsort(-closest).flatten().tolist()
        closest = self._get_filtered_top(
            candidates=closest, allowed_items=internal_item_whitelist
        )
        return self._map_internal_to_external_id(closest)

    def _external_inputs_to_internal_item(self, user_id: Hashable):

        internal_uid = self.iid_iiid_mapping[user_id]
        internal_item_whitelist = np.array(self.item_vectors[internal_uid])
        return internal_uid, internal_item_whitelist

    def _get_similar(
        self, user_vector: NDArray[np.float32]
    ) -> Sequence[int]:
        """
        Gets nearest neighbors from an Annoy index

        Parameters
        ----------
        user_vector:
            Numpy array of user's vector representation of shape (1, n),
            where n is the number of dimensions
        
        Returns
        -------
        nearest_negihbors
            A sequence of sorted similar items to a given user_vector
        """
        nearest_neighbors = self.index.get_nns_by_vector(
            user_vector,
            self.n_neighbors,
            search_k=self.search_k,
            include_distances=False,
        )
        return nearest_neighbors

    def _get_filtered_top(
        self, candidates: Sequence[int], allowed_items: Sequence[int]
    ) -> Sequence[int]:
        """
        Takes candidates, intersects with allowed items and returns top_k similar items

        Parameters
        ----------
        candidates:
            A sequence of candidates to recommend
        allowed_items:
            A sequence of items allowed to recommend
        
        Returns
        -------
        A sequence of filtered top_k recommendations
        """
        allowed_items_set = set(allowed_items)
        return list(
            islice(
                (cand for cand in candidates if cand in allowed_items_set), self.top_k
            )
        )

    def _map_internal_to_external_id(
        self, seq_to_map: Sequence[int]
    ) -> Sequence[Hashable]:
        return [self.iiid_iid_mapping[item] for item in seq_to_map]


In [None]:
item_vectors = np.array(
    [
        [1, 1, 1],
        [2, 2, 2],
        [1, 1, 0],
        [2, 2, 1],
    ]
)

user_vectors = np.array(
    [
        [3, 3, 3],
        [2, 2, 0],
        [2, 1, 2],
    ]
)

user_map = {10: 0, 11: 1, 12: 2}
item_map = {100: 0, 101: 1, 102: 2, 103: 3}

keys = list(item_map.keys())
item_mappings_remap = {item_map[key]: key for key in keys}



In [None]:
recommender = AnnoyRecommender(
    item_vectors=item_vectors,
    user_vectors=user_vectors,
    user_id_user_index_id_mapping=user_map,
    item_id_item_index_id_mapping=item_map,
    item_mappings_remap=item_mappings_remap,
    top_k=2,
    dim=3,
    sim_function=np.dot,
    metric="euclidian"
)

In [None]:
recommender.recommend_single_user(user_id=100, k=2)

[102, 101]