## ML-метрики качества рекомендательных систем

In [1]:
import pandas as pd
import numpy as np

In [18]:
recommended_list = [0, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43] #id товаров
bought_list = [521, 32, 143, 991, 3345]

## 1. Hit rate

In [8]:
def hit_rate(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    
    hit_rate = (flags.sum() > 0) * 1
    
    return hit_rate


def hit_rate_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    recommended_list = recommended_list[:k]
    
    flags = np.isin(bought_list, recommended_list)
    
    hit_rate = (flags.sum() > 0) * 1
    
    # your_code
    
    return hit_rate

In [21]:
#k=7 in bought_list
recommended_list = [3, 156, 1134, 5, 27, 1543, 3345, 533, 11, 43] 
bought_list = [521, 32, 143, 991, 3345]

In [24]:
hit_rate(recommended_list, bought_list), hit_rate_at_k(recommended_list, bought_list)

(1, 0)

## 2. Precision

In [81]:
def precision(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    
    precision = flags.sum() / len(recommended_list)
    
    return precision


def precision_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    bought_list = bought_list
    recommended_list = recommended_list[:k]
    
    flags = np.isin(bought_list, recommended_list)
    
    precision = flags.sum() / len(recommended_list)
    
    
    return precision


def money_precision_at_k(recommended_list, bought_list, prices_recommended, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    prices_recommended = np.array(prices_recommended)
    
    bought_list = bought_list
    recommended_list = recommended_list[:k]
    prices_recommended = prices_recommended[:k]
    
    flags = np.where(np.isin(recommended_list, bought_list), 1, 0)
    
    money_precision = (flags @ prices_recommended) / np.sum(prices_recommended)

    
    return money_precision

In [82]:
#k=1, 7 in bought_list
recommended_list = [32, 156, 1134, 5, 27, 1543, 3345, 533, 11, 43] 
prices_recommended = [400, 60, 40, 40 , 90, 60, 320, 200, 85, 70] 

bought_list = [521, 32, 143, 991, 3345]

In [83]:
money_precision_at_k(recommended_list, bought_list, prices_recommended)

0.6349206349206349

## 3. Recall

In [88]:
def recall(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    
    recall = flags.sum() / len(bought_list)
    
    return recall


def recall_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    bought_list = bought_list
    recommended_list = recommended_list[:k]
    
    flags = np.isin(bought_list, recommended_list)
    
    recall = flags.sum() / len(bought_list)
    
    # your_code
    
    return recall


def money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    prices_recommended = np.array(prices_recommended)
    
    bought_list = bought_list
    recommended_list = recommended_list[:k]
    prices_recommended = prices_recommended[:k]
    
    flags = np.where(np.isin(recommended_list, bought_list), 1, 0)
    
    money_recall = (flags @ prices_recommended) / np.sum(prices_bought)
    
    return money_recall

In [84]:
#k=1,7,9 in bought_list
recommended_list = [32, 156, 1134, 5, 27, 1543, 3345, 533, 11, 43] 
prices_recommended = [400, 60, 40, 40 , 90, 60, 320, 200, 85, 70] 

bought_list = [521, 32, 143, 991, 3345, 15, 11, 18]
prices_bought= [100, 400, 38, 70 , 320, 65, 85, 250]

recall(recommended_list, bought_list), recall_at_k(recommended_list, bought_list)

(0.375, 0.125)

In [90]:
money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought)

0.30120481927710846

## ML-метрики ранжирования

### 1. AP@k
AP@k - average precision at k

$$AP@k = \frac{1}{r} \sum{[recommended_{relevant_i}] * precision@k}$$

- r - кол-во релевантный среди рекомендованных
- Суммируем по всем релевантным товарам
- Зависит от порядка реокмендаций

In [151]:
def ap_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(recommended_list, bought_list)
    
    if sum(flags) == 0:
        return 0
    
    sum_ = 0
    for i in range(0, k+1):
        
        if flags[i] == True:
            p_k = precision_at_k(recommended_list, bought_list, k=i+1)
            sum_ += p_k
            
    result = sum_ / sum(flags)
    
    return result

### 2. MAP@k

MAP@k (Mean Average Precision@k)  
Среднее AP@k по всем юзерам
- Показывает средневзвешенную точность рекомендаций

$$MAP@k = \frac{1}{|U|} \sum_u{AP_k}$$
  
|U| - кол-во юзеров

In [126]:
def map_k(recommended_list, bought_list, k=5):
    
    ap_k_list = []
    
    for user in recommended_list:
        ap_k_ = ap_k(recommended_list[user], bought_list[user], k=k)
        ap_k_list.append(ap_k_)

    ap_k_list = np.array(ap_k_list)
    
    map_k = np.mean(ap_k_list)
    
    return map_k

In [153]:
recommended_list = {
    'user_1': [15, 32, 153, 85, 72, 126, 203],
    'user_2': [11, 8, 15, 24, 38, 94, 98],
    'user_3': [53, 35, 14, 23, 43, 109, 100]
}

bought_list = {
    'user_1': [203, 32, 16, 43],
    'user_2': [94, 10, 11],
    'user_3': [15, 9, 7, 23, 100]
}

In [None]:
ap_k(recommended_list['user_1'], bought_list['user_1'])

In [156]:
map_k(recommended_list, bought_list)

0.34722222222222215

### Normalized discounted cumulative gain ( NDCG@k)


$$DCG = \frac{1}{|r|} \sum_u{\frac{[bought fact]}{discount(i)}}$$  

$discount(i) = i$ if $i <= 2$,   
$discount(i) = log_2(i)$ if $i > 2$


(!) Считаем для первых k рекоммендаций   
(!) - существуют вариации с другими $discount(i)$  
i - ранк рекомендованного товара  
|r| - кол-во рекомендованных товаров 

$$NDCG = \frac{DCG}{ideal DCG}$$

$DCG@5 = \frac{1}{5}*(1 / 1 + 0 / 2 + 0 / log(3) + 1 / log(4) + 0 / log(5))$  
$ideal DCG@5 = \frac{1}{5}*(1 / 1 + 1 / 2 + 1 / log(3) + 1 / log(4) + 1 / log(5))$  

$NDCG = \frac{DCG}{ideal DCG}$

In [182]:
# по желанию
def ndcg_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    flags = np.where(np.isin(recommended_list, bought_list), 1, 0)
    
    ranks = []
    for i in range(1, k+1):
        if i<=2: 
            ranks.append(i)
        if i>2:
            ranks.append(np.log2(i))

    ranks = np.array(ranks)
    
    dcg = 1/k * (np.sum(flags[:k]/ranks))
    ideal = 1/k * (np.sum(np.ones(k)/ranks))
    ndcg_at_k = dcg / ideal
    
    return ndcg_at_k

In [183]:
#k=1,7,9 in bought_list
recommended_list = [32, 156, 1134, 5, 27, 1543, 3345, 533, 11, 43] 
bought_list = [521, 32, 143, 991, 3345, 15, 11, 18]

ndcg_at_k(recommended_list, bought_list)

0.32662592711430266

### 3. MRR@k
Mean Reciprocal Rank

- Считаем для первых k рекоммендаций
- Найти ранк первого релевантного предсказания $k_u$
- Посчитать reciprocal rank = $\frac{1}{k_u}$

$$MRR = mean(\frac{1}{k_u})$$

In [149]:
def reciprocal_rank(recommended_list, bought_list, k=5):
    
    reciprocal_ranks = []
    
    for i in range(k):
        if any(np.isin(bought_list, recommended_list[i])):
            reciprocal_rank = 1 / (i + 1)
            reciprocal_ranks.append(reciprocal_rank)

    reciprocal_ranks = np.array(reciprocal_ranks)    
    mean_reciprocal_rank = np.mean(reciprocal_ranks)

    return mean_reciprocal_rank

In [150]:
#k=1,7,9 in bought_list
recommended_list = [32, 156, 521, 5, 27, 1543, 3345, 533, 11, 43] 
bought_list = [521, 32, 143, 991, 3345, 15, 11, 18]

reciprocal_rank(recommended_list, bought_list)

0.6666666666666666

**1) Приведите еще примеры метрик для оценки рекомендаций/ранжирования (можно взять из интернета, или ваши знания)**

Метрики на основе ранговой корреляции: Ранговый коэффициент корреляции Кендэлла, Ранговый коэффициент корреляции Спирмена

Метрики на основе каскадной модели поведения: Expected reciprocal rank(ERR), PFound