In [1]:
import numpy as np

### 1. Hit rate

In [2]:
# был ли хотя бы 1 релевантный товар среди рекомендованных
def hit_rate(recommended_list, bought_list):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    flags = np.isin(bought_list, recommended_list)
    return (flags.sum() > 0) * 1

# был ли хотя бы 1 релевантный товар среди топ-k рекомендованных
def hit_rate_at_k(recommended_list, bought_list, k=5):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    flags = np.isin(bought_list, recommended_list[:k])
    return (flags.sum() > 0) * 1

Вопрос, топ k определяется маркетингом или что-то на подобие uplift?

In [3]:
recommended_list = [14, 156, 1134, 27, 1543, 3345, 533, 11, 143] #id товаров
bought_list = [521, 32, 143, 991]

print(hit_rate(recommended_list, bought_list))

hit_rate_at_k(recommended_list, bought_list, k=5)

1


0

### 2. Precision

*Precision* - доля релевантных товаров среди рекомендованных = Какой % рекомендованных товаров  юзер купил

- Пожалуй, самая приближенная к бизнес-метрикам и самая популярная метрика


In [9]:
# Precision= (# of recommended items that are relevant) / (# of recommended items)
def precision(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    flags = np.isin(bought_list, recommended_list)
    return flags.sum() / len(recommended_list)

# Precision@k = (# of recommended items @k that are relevant) / (# of recommended items @k)
def precision_at_k(recommended_list, bought_list, k=None):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    flags = np.isin(bought_list, recommended_list[:k])
    return flags.sum() / len(recommended_list[:k])

# Money Precision@k = (revenue of recommended items @k that are relevant) / (revenue of recommended items @k)
def money_precision_at_k_(recommended_list, bought_list, prices_recommended, k=5):
    
    recommend_list = np.array(recommended_list)[:k]
    prices_recommended = np.array(prices_recommended)[:k]
    
    flags = np.isin(recommend_list, bought_list)
    
    precision = np.dot(flags, prices_recommended).sum() / prices_recommended.sum()
    
    return precision

In [7]:
recommended_list = [14, 156, 1134, 991, 27, 1543, 3345, 533, 11, 143] #id товаров
bought_list = [521, 32, 143, 991]

print(precision(recommended_list, bought_list))
precision_at_k(recommended_list, bought_list, k=3)

0.2


0.0

### 3. Recall

*Recall* - доля рекомендованных товаров среди релевантных = Какой % купленных товаров был среди рекомендованных

- Обычно используется для моделей пре-фильтрации товаров (убрать те товары, которые точно не будем рекомендовать)

In [10]:
# Recall= (# of recommended items that are relevant) / (# of relevant items)
def recall(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    flags = np.isin(bought_list, recommended_list)
    return flags.sum() / len(bought_list)
    
# Recall@k = (# of recommended items @k that are relevant) / (# of relevant items)
def recall_at_k(recommended_list, bought_list, k=None):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    flags = np.isin(bought_list, recommended_list[:k])
    return flags.sum() / len(bought_list)


# Money Recall@k = (revenue of recommended items @k that are relevant) / (revenue of relevant items)
def money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5):
    
    recommend_list = np.array(recommended_list)[:k]
    prices_recommended = np.array(prices_recommended)
    flags = np.isin(recommend_list, bought_list)
    
    return np.dot(flags, prices_recommended[:k]).sum() / prices_recommended.sum()

In [14]:
print(recall(recommended_list, bought_list))
recall_at_k(recommended_list,bought_list,k=10)

0.5


0.5

## Метрики ранжирования(Когда важен порядок рекомендаций)

### 4.AP@k
AP@k - average precision at k

$$AP@k = \frac{1}{r} \sum{[recommended_{relevant_i}] * precision@k}$$

- r - кол-во релевантный среди рекомендованных
- Суммируем по всем релевантным товарам
- Зависит от порядка реокмендаций

In [127]:
recommended_list = [221,21,3234,1,234,234,234,666] #id товаров
bought_list = [1,2,3,4,5,6,7,8,9]

def ap_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    
    relevant_indexes = np.nonzero(np.isin(recommended_list, bought_list))[0]
    
    if len(relevant_indexes) == 0:
        return 0
    
    amount_relevant = len(relevant_indexes)
    
    sum_ = sum([precision_at_k(recommended_list, bought_list, k=index_relevant+1) for index_relevant in relevant_indexes])
    return sum_/amount_relevant

ap_k(recommended_list, bought_list, k=5)

0.25

### 5. MAP@k

MAP@k (Mean Average Precision@k)  
Среднее AP@k по всем юзерам
- Показывает средневзвешенную точность рекомендаций

$$MAP@k = \frac{1}{|U|} \sum_u{AP_k}$$

$$map@K = \frac{1}{N}\sum_{j=1}^N ap@K_j.$$
  
|U| - кол-во юзеров

In [128]:
# теперь список из 3 пользователей
recommended_list_3_users = [[143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43], 
                    [1134, 533, 14, 4, 15, 1543, 1, 99, 27, 3345],
                    [991, 3345, 27, 533, 43, 143, 1543, 156, 1134, 11]
                           ]

bought_list_3_users = [[521, 32, 11, 143],  # юзер 1
                       [143, 533, 991, 43, 15], # юзер 2
                       [1, 2,27]] # юзер 3

Сомневаюсь в правильности кода ниже

In [130]:
def map_k(recommended_list, bought_list, k=5):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    relevant_indexes = np.nonzero(np.isin(recommended_list, bought_list))[0]
    
    #if len(relevant_indexes) == 0:
    #    return 0
    
    sum_ = sum([ap_k(recommended_list, bought_list, k=index_relevant+1) for index_relevant in relevant_indexes])
    return sum_/bought_list.shape[0]

In [131]:
map_k(recommended_list_3_users,bought_list_3_users, k=5)

0.0

### Mean Reciprocal Rank ( MRR@k )


- Считаем для первых k рекоммендаций
- Найти ранк первого релевантного предсказания $k_u$
- Посчитать reciprocal rank = $\frac{1}{k_u}$

$$MRR = mean(\frac{1}{k_u})$$

In [138]:
def reciprocal_rank(recommended_list, bought_list, k=1):
    flags = np.isin(recommended_list, bought_list)
    
    if sum(flags) == 0:
        return 0
    
    sum_ = 0
    count = 0
    for i in range(1, len(flags)+1):
        if flags[i-1]:
            sum_ += 1/i
            count += 1
    
    return sum_/count

In [139]:
reciprocal_rank(recommended_list, bought_list, k=5)

0.25