# Метрики

## Imports

In [24]:
import pandas as pd
import numpy as np

from tqdm.auto import tqdm
from rectools import Columns

### Загрузим данные МТС Кион

Датасет необходимо скачать: https://ods.ai/competitions/competition-recsys-21/data

Оттуда нужны файлы interactions.csv, items.csv и users.csv

### Зафиксируем random seed

In [25]:
np.random.seed(23)

## Read data

In [26]:
interactions = pd.read_csv('interactions.csv', parse_dates=['last_watch_dt'])

interactions.rename(
    columns={
        'last_watch_dt': Columns.Datetime,
        'total_dur': Columns.Weight
    },
    inplace=True)


In [27]:
users = pd.read_csv('users.csv')
items = pd.read_csv('items.csv')

In [28]:
def headtail(df):
    return pd.concat([df.head(), df.tail()])

headtail(interactions)


Unnamed: 0,user_id,item_id,datetime,weight,watched_pct
0,176549,9506,2021-05-11,4250,72.0
1,699317,1659,2021-05-29,8317,100.0
2,656683,7107,2021-05-09,10,0.0
3,864613,7638,2021-07-05,14483,100.0
4,964868,9506,2021-04-30,6725,100.0
5476246,648596,12225,2021-08-13,76,0.0
5476247,546862,9673,2021-04-13,2308,49.0
5476248,697262,15297,2021-08-20,18307,63.0
5476249,384202,16197,2021-04-19,6203,100.0
5476250,319709,4436,2021-08-15,3921,45.0


In [29]:
interactions.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5476251 entries, 0 to 5476250
Data columns (total 5 columns):
 #   Column       Dtype         
---  ------       -----         
 0   user_id      int64         
 1   item_id      int64         
 2   datetime     datetime64[ns]
 3   weight       int64         
 4   watched_pct  float64       
dtypes: datetime64[ns](1), float64(1), int64(3)
memory usage: 208.9 MB


Выделим небольшой кусок из данных, чтобы не слишком страдать

In [30]:
sample_users = [57607, 403227, 70720]
df = interactions[interactions[Columns.User].isin(sample_users)].sort_values("user_id").reset_index(drop=True)
del df[Columns.Datetime], df[Columns.Weight], df['watched_pct']
df


Unnamed: 0,user_id,item_id
0,57607,4151
1,57607,10440
2,57607,13865
3,70720,4880
4,70720,4881
5,70720,6327
6,403227,6353
7,403227,1736
8,403227,5336
9,403227,181


In [31]:
print('Users', df[Columns.User].unique())
sample_items = df[Columns.Item].unique()
print('Items', sample_items)


Users [ 57607  70720 403227]
Items [ 4151 10440 13865  4880  4881  6327  6353  1736  5336   181]


## Regression

С регрессией все относительно просто. По (user, item) мы знаем таргет (рейтинг чаще всего) и по такой же паре предсказываем его

In [32]:
df['target'] = np.random.choice([3, 4, 5], df.shape[0])
df['predict'] = np.random.rand(df.shape[0]) * 3 + 2
df


Unnamed: 0,user_id,item_id,target,predict
0,57607,4151,5,2.658958
1,57607,10440,3,4.500017
2,57607,13865,4,3.280296
3,70720,4880,5,4.263719
4,70720,4881,3,4.990597
5,70720,6327,4,2.000249
6,403227,6353,3,4.927865
7,403227,1736,5,3.257364
8,403227,5336,4,4.044672
9,403227,181,5,4.257889


Общая оценка

In [33]:
mae = (df['target'] - df['predict']).abs().mean()
print(mae)

1.374467645324227


Оценка по пользователю с последюущим усреднением

In [34]:
df['diff'] = (df['target'] - df['predict']).abs()
average_mae = df.groupby(Columns.User)['diff'].mean()
print(average_mae.mean())
average_mae

1.4033728201405007


user_id
57607     1.520254
70720     1.575543
403227    1.114321
Name: diff, dtype: float64

Видно, что в данном случае метрики близки к друг другу, но это не всегда так

In [35]:
del df['target'], df['predict'], df['diff']

## Classification

Сгенерируем случайные рекомендации.

In [36]:
top_k = 5
recs = np.array([
    np.random.choice(sample_items, top_k, replace=False),
    np.random.choice(sample_items, top_k, replace=False),
    np.random.choice(sample_items, top_k, replace=False),
])
recs

array([[ 4151,  6353, 13865,  6327,  4880],
       [  181,  6327,  4151,  1736,  4881],
       [10440,  4880,  4881, 13865,  6327]])

Преобразуем в длинный датафрейм

In [37]:
df_recs = pd.DataFrame({
    Columns.User: np.repeat(sample_users, top_k),
    Columns.Item: recs.ravel()
})
df_recs


Unnamed: 0,user_id,item_id
0,57607,4151
1,57607,6353
2,57607,13865
3,57607,6327
4,57607,4880
5,403227,181
6,403227,6327
7,403227,4151
8,403227,1736
9,403227,4881


In [38]:
df_recs[Columns.Rank] = df_recs.groupby(Columns.User).cumcount() + 1
df_recs

Unnamed: 0,user_id,item_id,rank
0,57607,4151,1
1,57607,6353,2
2,57607,13865,3
3,57607,6327,4
4,57607,4880,5
5,403227,181,1
6,403227,6327,2
7,403227,4151,3
8,403227,1736,4
9,403227,4881,5


Ключевой момент. Именно ради него преобразовывали данные и именно это позволяет считать метрики быстрее.

In [39]:
df_recs = df.merge(df_recs, how='left', left_on=Columns.UserItem, right_on=Columns.UserItem)
df_recs = df_recs.sort_values(by=[Columns.User, Columns.Rank])
df_recs

Unnamed: 0,user_id,item_id,rank
0,57607,4151,1.0
2,57607,13865,3.0
1,57607,10440,
3,70720,4880,2.0
4,70720,4881,3.0
5,70720,6327,5.0
9,403227,181,1.0
7,403227,1736,4.0
6,403227,6353,
8,403227,5336,


### Precision@K

In [40]:
df_recs[f'TP@5'] = df_recs['rank'] <= 5
df_recs

Unnamed: 0,user_id,item_id,rank,TP@5
0,57607,4151,1.0,True
2,57607,13865,3.0,True
1,57607,10440,,False
3,70720,4880,2.0,True
4,70720,4881,3.0,True
5,70720,6327,5.0,True
9,403227,181,1.0,True
7,403227,1736,4.0,True
6,403227,6353,,False
8,403227,5336,,False


In [41]:
df_recs[df_recs[Columns.Rank].notnull()]

Unnamed: 0,user_id,item_id,rank,TP@5
0,57607,4151,1.0,True
2,57607,13865,3.0,True
3,70720,4880,2.0,True
4,70720,4881,3.0,True
5,70720,6327,5.0,True
9,403227,181,1.0,True
7,403227,1736,4.0,True


Посчитаем вручную Precision@5 (усредняем по юзерам): (2/5 + 3/5 + 2/5) / 3

In [42]:
(2/5 + 3/5 + 2/5) / 3

0.4666666666666666

Посчитаем через groupby

In [43]:
df_recs['TP@5/5'] = df_recs['TP@5'] / top_k

p5 = df_recs.groupby(Columns.User)['TP@5/5'].sum().mean()

print(f'Precision@5 = {p5}')

Precision@5 = 0.4666666666666666


In [44]:
df_recs

Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5
0,57607,4151,1.0,True,0.2
2,57607,13865,3.0,True,0.2
1,57607,10440,,False,0.0
3,70720,4880,2.0,True,0.2
4,70720,4881,3.0,True,0.2
5,70720,6327,5.0,True,0.2
9,403227,181,1.0,True,0.2
7,403227,1736,4.0,True,0.2
6,403227,6353,,False,0.0
8,403227,5336,,False,0.0


Используем тот факт, что мы знаем количество пользователей, а значит groupby не нужен

In [45]:
p5 = df_recs['TP@5/5'].sum() / len(sample_users)
print(f'Precision@5 = {p5}')

Precision@5 = 0.46666666666666673


### Recall@K

In [46]:
df_recs['actual'] = df_recs.groupby(Columns.User)[Columns.Item].transform('count')
df_recs

Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5,actual
0,57607,4151,1.0,True,0.2,3
2,57607,13865,3.0,True,0.2,3
1,57607,10440,,False,0.0,3
3,70720,4880,2.0,True,0.2,3
4,70720,4881,3.0,True,0.2,3
5,70720,6327,5.0,True,0.2,3
9,403227,181,1.0,True,0.2,4
7,403227,1736,4.0,True,0.2,4
6,403227,6353,,False,0.0,4
8,403227,5336,,False,0.0,4


In [47]:
df_recs['TP@5/actual'] = df_recs['TP@5'] / df_recs['actual']
df_recs

Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5,actual,TP@5/actual
0,57607,4151,1.0,True,0.2,3,0.333333
2,57607,13865,3.0,True,0.2,3,0.333333
1,57607,10440,,False,0.0,3,0.0
3,70720,4880,2.0,True,0.2,3,0.333333
4,70720,4881,3.0,True,0.2,3,0.333333
5,70720,6327,5.0,True,0.2,3,0.333333
9,403227,181,1.0,True,0.2,4,0.25
7,403227,1736,4.0,True,0.2,4,0.25
6,403227,6353,,False,0.0,4,0.0
8,403227,5336,,False,0.0,4,0.0


In [48]:
(2/3 + 3/3 + 2/4) / 3

0.7222222222222222

In [49]:
r5 = df_recs.groupby(Columns.User)['TP@5/actual'].sum().mean()
print(f'Recall@5 = {r5}')

Recall@5 = 0.7222222222222222


In [50]:
r5 = df_recs['TP@5/actual'].sum() / len(sample_users)
print(f'Recall@5 = {r5}')

Recall@5 = 0.7222222222222222


## Ranking

### MAP@K

In [51]:
df_recs

Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5,actual,TP@5/actual
0,57607,4151,1.0,True,0.2,3,0.333333
2,57607,13865,3.0,True,0.2,3,0.333333
1,57607,10440,,False,0.0,3,0.0
3,70720,4880,2.0,True,0.2,3,0.333333
4,70720,4881,3.0,True,0.2,3,0.333333
5,70720,6327,5.0,True,0.2,3,0.333333
9,403227,181,1.0,True,0.2,4,0.25
7,403227,1736,4.0,True,0.2,4,0.25
6,403227,6353,,False,0.0,4,0.0
8,403227,5336,,False,0.0,4,0.0


In [52]:
df_recs['cumTP@5'] = df_recs.groupby(Columns.User)['TP@5'].cumsum()
df_recs

Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5,actual,TP@5/actual,cumTP@5
0,57607,4151,1.0,True,0.2,3,0.333333,1
2,57607,13865,3.0,True,0.2,3,0.333333,2
1,57607,10440,,False,0.0,3,0.0,2
3,70720,4880,2.0,True,0.2,3,0.333333,1
4,70720,4881,3.0,True,0.2,3,0.333333,2
5,70720,6327,5.0,True,0.2,3,0.333333,3
9,403227,181,1.0,True,0.2,4,0.25,1
7,403227,1736,4.0,True,0.2,4,0.25,2
6,403227,6353,,False,0.0,4,0.0,2
8,403227,5336,,False,0.0,4,0.0,2


In [53]:
df_recs['Prec@5'] = df_recs['cumTP@5'] / df_recs[Columns.Rank]
df_recs

Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5,actual,TP@5/actual,cumTP@5,Prec@5
0,57607,4151,1.0,True,0.2,3,0.333333,1,1.0
2,57607,13865,3.0,True,0.2,3,0.333333,2,0.666667
1,57607,10440,,False,0.0,3,0.0,2,
3,70720,4880,2.0,True,0.2,3,0.333333,1,0.5
4,70720,4881,3.0,True,0.2,3,0.333333,2,0.666667
5,70720,6327,5.0,True,0.2,3,0.333333,3,0.6
9,403227,181,1.0,True,0.2,4,0.25,1,1.0
7,403227,1736,4.0,True,0.2,4,0.25,2,0.5
6,403227,6353,,False,0.0,4,0.0,2,
8,403227,5336,,False,0.0,4,0.0,2,


In [54]:
df_recs['Prec@5/actual'] = df_recs['Prec@5'] / df_recs['actual']
df_recs

Unnamed: 0,user_id,item_id,rank,TP@5,TP@5/5,actual,TP@5/actual,cumTP@5,Prec@5,Prec@5/actual
0,57607,4151,1.0,True,0.2,3,0.333333,1,1.0,0.333333
2,57607,13865,3.0,True,0.2,3,0.333333,2,0.666667,0.222222
1,57607,10440,,False,0.0,3,0.0,2,,
3,70720,4880,2.0,True,0.2,3,0.333333,1,0.5,0.166667
4,70720,4881,3.0,True,0.2,3,0.333333,2,0.666667,0.222222
5,70720,6327,5.0,True,0.2,3,0.333333,3,0.6,0.2
9,403227,181,1.0,True,0.2,4,0.25,1,1.0,0.25
7,403227,1736,4.0,True,0.2,4,0.25,2,0.5,0.125
6,403227,6353,,False,0.0,4,0.0,2,,
8,403227,5336,,False,0.0,4,0.0,2,,


In [55]:
map = df_recs.groupby(Columns.User)['Prec@5/actual'].sum()
print(map.mean())
map

0.5064814814814814


user_id
57607     0.555556
70720     0.588889
403227    0.375000
Name: Prec@5/actual, dtype: float64

## Naive vs Pandas

In [56]:
df = interactions[Columns.UserItem]
df

Unnamed: 0,user_id,item_id
0,176549,9506
1,699317,1659
2,656683,7107
3,864613,7638
4,964868,9506
...,...,...
5476246,648596,12225
5476247,546862,9673
5476248,697262,15297
5476249,384202,16197


In [57]:
target = df.values
target


array([[176549,   9506],
       [699317,   1659],
       [656683,   7107],
       ...,
       [697262,  15297],
       [384202,  16197],
       [319709,   4436]])

In [58]:
target[target[:, 0] == 176549][:, 1]  # таргет для 1 случайного юзера


array([ 9506, 15469,  9164, 12250,  5695, 11345,  7582, 10881,  5051,
        7453,  3258,  7626, 13243, 10761,  5600,  5533, 16197, 12302,
        6626,  9352, 10605,  1343,  8972,  6649, 11919,  1132,   899,
        5087, 14684,  4387,  4756, 15096,  1659,  1641, 10954,  2358,
       13018,  1105, 13424, 10732,  4360, 14689,  8211, 12324,   349,
       11654, 14006,  2956,  8437, 12770,  2722,   149, 10688, 14217,
        8283,  7000,  3182, 12104,  5171,  5411, 15915,  5956,  3834,
       11494,  6870, 15719,  2505, 15464, 14317, 11689, 10544, 15531,
       12448,  9728,   101,  7102, 11539, 16166,  4718,  4273,  7401,
       14470])

In [59]:
recs


array([[ 4151,  6353, 13865,  6327,  4880],
       [  181,  6327,  4151,  1736,  4881],
       [10440,  4880,  4881, 13865,  6327]])

In [60]:
sample_users

[57607, 403227, 70720]

#### Напишем функцию расчета precision@k "в лоб"

In [61]:
def precision_naive(target, users, recs, k):
    precision = []
    for i, user in enumerate(users):
        p = 0
        user_target = target[target[:, 0] == user][:, 1]
        for rec in recs[i]:
            if rec in user_target:
                p += 1
        precision.append(p / k)
    return sum(precision) / len(users)

In [62]:
precision_naive(target, sample_users, recs, 5)

0.4666666666666666

#### Напишем реализацию функции с использованием pandas

In [63]:
def precision_pandas(df, users, recs, k):
    df_recs = pd.DataFrame({
        Columns.User: np.repeat(users, k),
        Columns.Item: recs.ravel()
    })
    df_recs[Columns.Rank] = df_recs.groupby(Columns.User).cumcount() + 1
    df_recs = df.merge(df_recs, how='left', left_on=Columns.UserItem, right_on=Columns.UserItem)
    tp_k = f'TP@{k}'
    df_recs[tp_k] = df_recs[Columns.Rank] < (k + 1)
    p = df_recs[tp_k].sum() / k / len(users)
    return p

In [64]:
precision_pandas(df, sample_users, recs, 5)

0.4666666666666666

#### Посмотрим через `timeit`

In [65]:
%timeit precision_naive(target, sample_users, recs, 5)

58.5 ms ± 3.36 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [66]:
%timeit precision_pandas(df, sample_users, recs, 5)

2.75 s ± 438 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Почему так произошло?

#### Попробуем взять больше данных

In [67]:
def generate_subsample(users_count, top_k):
    users = np.random.choice(interactions[Columns.User].unique(), users_count, replace=False)
    df = interactions[interactions[Columns.User].isin(users)].reset_index(drop=True)
    del df[Columns.Datetime], df[Columns.Weight], df['watched_pct']

    recs = np.random.choice(df[Columns.Item], size=(users_count, top_k))
    return df, users, recs

In [68]:
top_k = 10
df, users, recs = generate_subsample(10000, top_k)
target = df.values

In [69]:
users

array([628072,  15993, 470329, ..., 721843, 987818,  28484])

In [70]:
precision_naive(target, users, recs, top_k)

0.030479999999999865

In [71]:
%timeit precision_naive(target, users, recs, top_k)

2.29 s ± 395 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [72]:
precision_pandas(df, users, recs, top_k)

0.03048

In [73]:
%timeit precision_pandas(df, users, recs, top_k)

69.1 ms ± 11.6 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Homework Метрики и бейзлайны**

Добавьте реализацию метрик (по аналогии с Precision) MAP, Recall и применить к данным выше (из семинара), полученным с помощью generate_subsample

k взять разным: 1, 5, 10

На выходе - сравнение

In [102]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from rectools import Columns

def check_input_data(df, users, recs, k):
    print(f"Total users: {len(users)}")
    print(f"Unique items in df: {df[Columns.Item].nunique()}")
    print(f"Average interactions per user: {df.groupby(Columns.User).size().mean():.2f}")
    print(f"Recommendations shape: {recs.shape}")
    print(f"Unique items in recommendations: {np.unique(recs).size}")
    print(f"Sample of recommendations:\n{recs[:5, :k]}")

def generate_subsample(users_count, top_k):
    # Выбираем пользователей с минимальным количеством взаимодействий
    user_interaction_counts = interactions[Columns.User].value_counts()
    eligible_users = user_interaction_counts[user_interaction_counts >= top_k].index
    users = np.random.choice(eligible_users, min(users_count, len(eligible_users)), replace=False)

    df = interactions[interactions[Columns.User].isin(users)].reset_index(drop=True)
    del df[Columns.Datetime], df[Columns.Weight], df['watched_pct']

    # Генерируем рекомендации без повторов
    all_items = df[Columns.Item].unique()
    recs = np.array([np.random.choice(all_items, size=top_k, replace=False) for _ in range(len(users))])

    return df, users, recs

def recall_naive(target, users, recs, k):
    recall = []
    for i, user in enumerate(users):
        user_target = set(target[target[:, 0] == user][:, 1])
        if len(user_target) > 0:
            r = sum(1 for item in recs[i][:k] if item in user_target)
            recall.append(r / len(user_target))
    print(f"Naive Recall: min={min(recall):.4f}, max={max(recall):.4f}, non-zero={sum(r > 0 for r in recall)}")
    return sum(recall) / len(recall) if recall else 0

def recall_pandas(df, users, recs, k):
    df_recs = pd.DataFrame({
        Columns.User: np.repeat(users, k),
        Columns.Item: recs[:, :k].ravel()
    })

    df_merged = pd.merge(df, df_recs, on=[Columns.User, Columns.Item], how='inner')
    df_merged['hit'] = 1

    user_hits = df_merged.groupby(Columns.User)['hit'].sum()
    user_relevant_counts = df.groupby(Columns.User)[Columns.Item].nunique()

    recall = (user_hits / user_relevant_counts).fillna(0)
    return recall.mean()

def map_naive(target, users, recs, k):
    ap_values = []
    for i, user in enumerate(users):
        user_target = set(target[target[:, 0] == user][:, 1])
        ap = 0
        hits = 0
        for j, item in enumerate(recs[i][:k], 1):
            if item in user_target:
                hits += 1
                ap += hits / j
        if len(user_target) > 0:
            ap_values.append(ap / min(len(user_target), k))
        else:
            ap_values.append(0)  # Добавляем 0 для пользователей без релевантных элементов
    print(f"Naive MAP: min={min(ap_values):.4f}, max={max(ap_values):.4f}, non-zero={sum(ap > 0 for ap in ap_values)}")
    return sum(ap_values) / len(ap_values)

def map_pandas(df, users, recs, k):
    df_recs = pd.DataFrame({
        Columns.User: np.repeat(users, k),
        Columns.Item: recs[:, :k].ravel(),
        'rank': np.tile(np.arange(1, k + 1), len(users))
    })

    df_merged = pd.merge(df, df_recs, on=[Columns.User, Columns.Item], how='inner')
    df_merged['hit'] = 1
    df_merged['precision'] = df_merged.groupby(Columns.User)['hit'].cumsum() / df_merged['rank']

    user_relevant_counts = df.groupby(Columns.User)[Columns.Item].nunique()

    if k == 1:
        user_ap = df_merged.groupby(Columns.User)['precision'].mean().fillna(0)
    else:
        user_ap = df_merged.groupby(Columns.User).apply(lambda x:
            x['precision'].sum() / min(user_relevant_counts[x.name], k)).fillna(0)

    map_score = user_ap.mean()

    return map_score

# Генерация подвыборки
users_count = 10000
top_k_values = [1, 5, 10]

for top_k in top_k_values:
    print(f"\nResults for k = {top_k}:")
    df, users, recs = generate_subsample(users_count, top_k)
    target = df.values

    check_input_data(df, users, recs, top_k)

    recall_naive_result = recall_naive(target, users, recs, top_k)
    recall_pandas_result = recall_pandas(df, users, recs, top_k)
    map_naive_result = map_naive(target, users, recs, top_k)
    map_pandas_result = map_pandas(df, users, recs, top_k)

    print(f"Recall (naive): {recall_naive_result:.4f}")
    print(f"Recall (pandas): {recall_pandas_result:.4f}")
    print(f"MAP (naive): {map_naive_result:.4f}")
    print(f"MAP (pandas): {map_pandas_result:.4f}")

    # Добавьте эти строки для проверки промежуточных результатов
    print("\nChecking intermediate results:")
    df_recs = pd.DataFrame({
        Columns.User: np.repeat(users, top_k),
        Columns.Item: recs[:, :top_k].ravel()
    })
    df_merged = pd.merge(df, df_recs, on=[Columns.User, Columns.Item], how='inner')
    print(f"Merged dataframe shape: {df_merged.shape}")
    print(f"Users with hits: {df_merged[Columns.User].nunique()}")

    print("\nPerformance tests:")
    %timeit recall_naive(target, users, recs, top_k)
    %timeit recall_pandas(df, users, recs, top_k)
    %timeit map_naive(target, users, recs, top_k)
    %timeit map_pandas(df, users, recs, top_k)


Results for k = 1:
Total users: 10000
Unique items in df: 5058
Average interactions per user: 5.61
Recommendations shape: (10000, 1)
Unique items in recommendations: 4350
Sample of recommendations:
[[13538]
 [ 8201]
 [14113]
 [15060]
 [13237]]
Naive Recall: min=0.0000, max=1.0000, non-zero=13
Naive MAP: min=0.0000, max=1.0000, non-zero=13
Recall (naive): 0.0003
Recall (pandas): 0.0003
MAP (naive): 0.0013
MAP (pandas): 1.0000

Checking intermediate results:
Merged dataframe shape: (13, 2)
Users with hits: 13

Performance tests:
Naive Recall: min=0.0000, max=1.0000, non-zero=13
Naive Recall: min=0.0000, max=1.0000, non-zero=13
Naive Recall: min=0.0000, max=1.0000, non-zero=13
Naive Recall: min=0.0000, max=1.0000, non-zero=13
Naive Recall: min=0.0000, max=1.0000, non-zero=13
Naive Recall: min=0.0000, max=1.0000, non-zero=13
Naive Recall: min=0.0000, max=1.0000, non-zero=13
Naive Recall: min=0.0000, max=1.0000, non-zero=13
1.84 s ± 270 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

- Recall: наивная и pandas реализации теперь дают идентичные результаты для всех значений k. Значения recall очень низкие, что ожидаемо для случайных рекомендаций.

- MAP: pandas реализация дает значение 1.0, что некорректно. Это происходит потому, что учитываются только пользователи с совпадениями, и для k=1 точность всегда будет 1. Для k = 5 и k = 10 значения MAP отличаются между наивной и pandas реализациями. Pandas версия дает более высокие значения (0.0946 и 0.0300) по сравнению с наивной (0.0009 и 0.0008).

- Pandas реализации значительно быстрее наивных для всех метрик и значений k. Время выполнения растет с увеличением k, что ожидаемо.

- Количество пользователей с релевантными рекомендациями (non-zero) растет с увеличением k, что логично. Максимальные значения метрик уменьшаются с ростом k, что также ожидаемо для случайных рекомендаций.

### Критерии успеха

- написана реализация хотя бы одной метрики (реализация "в лоб") - 1 балл
- написаны функции для двух метрик в наивном виде - +2 балла
- если одна метрика реализована двумя способами (наивный и с помощью pandas) +2 балла
- обе метрики сделаны двумя способами +1 балла

Bonus:
- если самостоятельно изучите одну дополнительную метрику (HR/MNAP/NDCG/etc) и добавите реализацию двумя способами, то еще +2 балла