In [1]:
import numpy as np
import pandas as pd
from IPython.display import display


In [2]:
# Входные данные
input_data = np.array(
    [
        [5, 4, 5, 5, 5, 0, 5, 5, 4, 5, 5],
        [5, 5, 3, 4, 5, 0, 5, 4, 4, 2, 5],
        [5, 4, 4, 2, 5, 0, 5, 3, 3, 0, 3],
        [5, 4, 3, 0, 3, 0, 5, 4, 3, 3, 5],
        [5, 5, 4, 0, 0, 0, 5, 4, 4, 1, 5],
        [5, 5, 5, 3, 1, 0, 5, 0, 4, 1, 0],
    ],
    dtype=float,
)
input_data

array([[5., 4., 5., 5., 5., 0., 5., 5., 4., 5., 5.],
       [5., 5., 3., 4., 5., 0., 5., 4., 4., 2., 5.],
       [5., 4., 4., 2., 5., 0., 5., 3., 3., 0., 3.],
       [5., 4., 3., 0., 3., 0., 5., 4., 3., 3., 5.],
       [5., 5., 4., 0., 0., 0., 5., 4., 4., 1., 5.],
       [5., 5., 5., 3., 1., 0., 5., 0., 4., 1., 0.]])

# Формирование рекомендаций для пользователя, уже оценившего другие товары

In [3]:
user_index = 3
products_indices = [i for i in range(input_data.shape[0])]

In [4]:
# Не оцененные пользователем продукты.
not_rated_products_indices = tuple(np.where(input_data[:, user_index] == 0)[0])
not_rated_products_indices

(np.int64(3), np.int64(4))

In [5]:
def get_cosine_similarity(array_1: np.ndarray[int, int], array_2: np.ndarray[int, int]) -> float:
    """Функция подсчета косинусного подобия."""
    return np.dot(array_1, array_2) / (np.linalg.norm(array_1) * np.linalg.norm(array_2))


def get_cosine_similarity_matrix() -> np.ndarray[[int, int], float]:
    """Получение матрицы схожести."""
    products_count = input_data.shape[0]
    products_similarity_matrix: np.ndarray[(int, int), float] = np.full(
        (products_count, products_count),
        0,
        dtype=float,
    )

    for i in range(products_count):
        for j in range(products_count):
            if i != j:
                similarity_value = get_cosine_similarity(input_data[i], input_data[j])
                products_similarity_matrix[i, j] = similarity_value

    return products_similarity_matrix


In [6]:
# Получение матрицы косинусного подобия продуктов.
similarity_matrix = get_cosine_similarity_matrix()
display(pd.DataFrame(similarity_matrix))

Unnamed: 0,0,1,2,3,4,5
0,0.0,0.967599,0.91097,0.922353,0.839049,0.792306
1,0.967599,0.0,0.961224,0.938138,0.877007,0.813302
2,0.91097,0.961224,0.0,0.918295,0.871721,0.853566
3,0.922353,0.938138,0.918295,0.0,0.945405,0.764306
4,0.839049,0.877007,0.871721,0.945405,0.0,0.814185
5,0.792306,0.813302,0.853566,0.764306,0.814185,0.0


In [7]:
# Расчет среднего арифметического пользовательского рейтинга.
avg_user_rating = input_data[np.nonzero(input_data[:, user_index]), user_index].mean()
print(f'Средний пользовательский рейтинг пользователя #{user_index}: {avg_user_rating}')

Средний пользовательский рейтинг пользователя #3: 3.5


In [8]:
for not_rated_product_index in not_rated_products_indices:
    # Отсечение непохожих продуктов по пороговому значению.
    min_similarity_value = 0.85
    low_similarity_products = np.argwhere(similarity_matrix[not_rated_product_index, :] < min_similarity_value)
    not_similar_products = low_similarity_products[
                           ~np.all(low_similarity_products == not_rated_product_index, axis=1), :].T[0].tolist()
    low_similarity_products = low_similarity_products.T[0].tolist()

    # Поиск в матрице продуктов, которые также не были оценены пользователем.
    products_without_rating_on_product = np.argwhere(input_data[:, user_index] == 0)
    products_without_rating_on_product = products_without_rating_on_product.T[0].tolist()
    products_without_rating_on_product.remove(not_rated_product_index)
    rated_products_indices = [i for i in products_indices if i not in products_without_rating_on_product]

    # Удаление из матрицы косинусного подобия непохожих / неоцененных продуктов.
    indices_to_delete = list(set(not_similar_products + products_without_rating_on_product))
    similar_products_similarity = np.delete(similarity_matrix, indices_to_delete, axis=1)
    similar_products_similarity = np.delete(similar_products_similarity, indices_to_delete, axis=0)

    products_indices_of_similarity_for_similar_products = [
        i for i in rated_products_indices
        if i not in not_similar_products
    ]
    product_index_in_similarity_matrix = products_indices_of_similarity_for_similar_products.index(
        not_rated_product_index
    )
    display(
        pd.DataFrame(
            similar_products_similarity,
            columns=products_indices_of_similarity_for_similar_products,
            index=products_indices_of_similarity_for_similar_products,
        )
    )

    similar_products_rankings = np.delete(input_data, indices_to_delete + [not_rated_product_index], axis=0)
    similar_products_indices = [i for i in products_indices_of_similarity_for_similar_products if
                                i != not_rated_product_index]
    similar_products_user_rankings = similar_products_rankings[:, user_index]
    display(pd.DataFrame(similar_products_user_rankings, columns=[user_index], index=similar_products_indices))

    print(f'Продукт {not_rated_product_index}:')
    cos_similarity = similar_products_similarity[np.nonzero(
        similar_products_similarity[:, product_index_in_similarity_matrix]
    ), product_index_in_similarity_matrix]
    calculated_rating = np.sum(similar_products_user_rankings * cos_similarity) / np.sum(np.abs(cos_similarity))
    print(f'Рассчитанная оценка: {calculated_rating}')
    print(
        f'Вердикт: {"РЕКОМЕНДОВАН" if calculated_rating > avg_user_rating else "НЕ РЕКОМЕНДОВАН"} '
        f'пользователю #{user_index}.\n'
    )


Unnamed: 0,0,1,2,3
0,0.0,0.967599,0.91097,0.922353
1,0.967599,0.0,0.961224,0.938138
2,0.91097,0.961224,0.0,0.918295
3,0.922353,0.938138,0.918295,0.0


Unnamed: 0,3
0,5.0
1,4.0
2,2.0


Продукт 3:
Рассчитанная оценка: 3.6709943484403884
Вердикт: РЕКОМЕНДОВАН пользователю #3.



Unnamed: 0,1,2,4
1,0.0,0.961224,0.877007
2,0.961224,0.0,0.871721
4,0.877007,0.871721,0.0


Unnamed: 0,3
1,4.0
2,2.0


Продукт 4:
Рассчитанная оценка: 3.0030231858536784
Вердикт: НЕ РЕКОМЕНДОВАН пользователю #3.



# Формирование рекомендаций для нового пользователя


In [9]:
user_index = 5

In [10]:
# Заменяем 0 в матрице на NaN для того, чтобы значения не учитывались в расчете среднего значения рейтинга по продукту.
nan_instead_zeros_input_data = input_data.copy()
nan_instead_zeros_input_data[nan_instead_zeros_input_data == 0] = np.nan
nan_instead_zeros_input_data

array([[ 5.,  4.,  5.,  5.,  5., nan,  5.,  5.,  4.,  5.,  5.],
       [ 5.,  5.,  3.,  4.,  5., nan,  5.,  4.,  4.,  2.,  5.],
       [ 5.,  4.,  4.,  2.,  5., nan,  5.,  3.,  3., nan,  3.],
       [ 5.,  4.,  3., nan,  3., nan,  5.,  4.,  3.,  3.,  5.],
       [ 5.,  5.,  4., nan, nan, nan,  5.,  4.,  4.,  1.,  5.],
       [ 5.,  5.,  5.,  3.,  1., nan,  5., nan,  4.,  1., nan]])

In [11]:
# Получение массива средних значений рейтинга по продуктам.
averages_products_rating = np.nanmean(nan_instead_zeros_input_data, axis=1)
averages_products_rating

array([4.8       , 4.2       , 3.77777778, 3.88888889, 4.125     ,
       3.625     ])

In [12]:
max_rated_product_index = np.argmax(averages_products_rating)
print(f'Рекомендовать новому пользователю продукт #{max_rated_product_index} '
      f'(средний рейтинг == {averages_products_rating[max_rated_product_index]}).')

Рекомендовать новому пользователю продукт #0 (средний рейтинг == 4.8).
