In [2]:
!pip install implicit

Collecting implicit
  Downloading implicit-0.7.2-cp311-cp311-manylinux2014_x86_64.whl.metadata (6.1 kB)
Downloading implicit-0.7.2-cp311-cp311-manylinux2014_x86_64.whl (8.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m51.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: implicit
Successfully installed implicit-0.7.2


In [4]:
from typing import Dict, List, Sequence
import scipy.sparse as sp
import numpy as np
from implicit.als import AlternatingLeastSquares

UserLogs = Dict[str, Sequence[str]]

def build_model(user_logs: UserLogs,
                factors: int = 64, regularization: float = 0.01,
                iterations: int = 20):
    users = list(user_logs.keys())
    books = sorted({b for bl in user_logs.values() for b in bl})
    uid = {u: i for i, u in enumerate(users)}
    bid = {b: i for i, b in enumerate(books)}

    rows, cols, data = [], [], []
    for u, books_read in user_logs.items():
        for b in books_read:
            rows.append(uid[u])
            cols.append(bid[b])
            data.append(1.0)
    mat = sp.csr_matrix((data, (rows, cols)),
                        shape=(len(users), len(books)))

    model = AlternatingLeastSquares(factors=factors,
                                    regularization=regularization,
                                    iterations=iterations)
    model.fit(mat.T)

    return model, bid, books

def recommend(model, bid: Dict[str, int], books: List[str],
              input_titles: Sequence[str], top_k: int = 5) -> List[str]:
    import numpy as np
    if not input_titles:
        raise ValueError("input_titles пусто")

    ids = [bid[t] for t in input_titles if t in bid]
    if not ids:
        raise KeyError("Нет совпадений во входных названиях")

    user_vec = np.add.reduce([model.item_factors[i] for i in ids])
    sims = model.item_factors @ user_vec
    ranked = sorted(((score, i) for i, score in enumerate(sims)),
                    reverse=True)

    res = []
    for score, idx in ranked:
        title = books[idx]
        if title not in input_titles:
            res.append(title)
        if len(res) == top_k:
            break
    return res


In [5]:
logs = {
        "u1": ["Дюна – Фрэнк Герберт", "451° по Фаренгейту – Рэй Брэдбери"],
        "u2": ["Дюнкерк – Антони Бивор", "Убить пересмешника – Харпер Ли"],
        "u3": ["Гарри Поттер и философский камень – Дж. К. Роулинг",
               "Перси Джексон и похититель молний – Рик Риордан"],
        "u4": ["Дюна – Фрэнк Герберт", "Властелин колец – Дж. Р. Р. Толкин"],
    }
model, bid, books = build_model(logs)
print(recommend(model, bid, books, ["Дюна – Фрэнк Герберт"], 5))



  0%|          | 0/20 [00:00<?, ?it/s]

['Гарри Поттер и философский камень – Дж. К. Роулинг', '451° по Фаренгейту – Рэй Брэдбери', 'Властелин колец – Дж. Р. Р. Толкин']
