In [209]:
import numpy as np
import pandas as pd

from collections import Counter
from functools import reduce

In [211]:
def pr_k(k, recommended, purchased):
    result = len(set(recommended[:k]) & set(purchased)) / k
    return result

In [212]:
def recall_k(k, recommended, purchased):
    result = len(set(recommended[:k]) & set(purchased)) / len(purchased)
    return result

In [225]:
def calculate_metrics(recommended, purchased):
    data = pd.DataFrame({'recommended': recommended, 'purchased': purchased})
    recall_1 = round(data.apply(lambda x: recall_k(1, x['recommended'], x['purchased']), axis=1).mean(), 2)
    recall_5 = round(data.apply(lambda x: recall_k(5, x['recommended'], x['purchased']), axis=1).mean(), 2)
    
    pr_1 = round(data.apply(lambda x: pr_k(1, x['recommended'], x['purchased']), axis=1).mean(), 2)
    pr_5 = round(data.apply(lambda x: pr_k(5, x['recommended'], x['purchased']), axis=1).mean(), 2)
    return recall_1, pr_1, recall_5, pr_5

In [231]:
def write_answer(file, answers_list):
    with open(file, 'w') as f:
        f.write(' '.join(list(map(str, answers_list))))

In [215]:
train = pd.read_csv('coursera_sessions_train.txt', sep=';', names=['viewed', 'purchased']).dropna()
test = pd.read_csv('coursera_sessions_test.txt', sep=';', names=['viewed', 'purchased']).dropna()
train.head()

Unnamed: 0,viewed,purchased
7,59606162606364656661676867,676063
10,848586878889849091929386,86
19,138198199127,199
30,303304305306307308309310311312,303
33,352353352,352


In [216]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3608 entries, 7 to 49995
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   viewed     3608 non-null   object
 1   purchased  3608 non-null   object
dtypes: object(2)
memory usage: 84.6+ KB


In [218]:
train.viewed = train.viewed.apply(lambda x: x.split(','))
train.purchased = train.purchased.apply(lambda x: x.split(','))

test.viewed = test.viewed.apply(lambda x: x.split(','))
test.purchased = test.purchased.apply(lambda x: x.split(','))

In [219]:
train.head()

Unnamed: 0,viewed,purchased
7,"[59, 60, 61, 62, 60, 63, 64, 65, 66, 61, 67, 6...","[67, 60, 63]"
10,"[84, 85, 86, 87, 88, 89, 84, 90, 91, 92, 93, 86]",[86]
19,"[138, 198, 199, 127]",[199]
30,"[303, 304, 305, 306, 307, 308, 309, 310, 311, ...",[303]
33,"[352, 353, 352]",[352]


In [220]:
viewed_dict = Counter(reduce(lambda x, y: x + y, train.viewed.tolist()))

purchased_dict = Counter(reduce(lambda x, y: x + y, train.purchased.tolist()))

# Алгоритм 1

### Train

In [222]:
train['recommended_1'] = train.viewed.apply(
    lambda x: sorted(list(set(x)), key= lambda x: viewed_dict.get(x) if viewed_dict.get(x) else 0, reverse=True)
)

In [232]:
answers_list1 = calculate_metrics(train.recommended_1, train.purchased)

write_answer('1.txt', answers_list1)

answers_list1

(0.48, 0.56, 0.85, 0.22)

### Test

In [233]:
test['recommended_1'] = test.viewed.apply(
    lambda x: sorted(list(set(x)), key= lambda x: viewed_dict.get(x) if viewed_dict.get(x) else 0, reverse=True)
)

In [234]:
answers_list2 = calculate_metrics(test.recommended_1, test.purchased)

write_answer('2.txt', answers_list2)

answers_list2

(0.42, 0.48, 0.79, 0.2)

# Алгоритм 2

### Train

In [235]:
train['recommended_2'] = train.viewed.apply(
    lambda x: sorted(list(set(x)), key= lambda x: purchased_dict.get(x) if purchased_dict.get(x) else 0, reverse=True)
)

In [237]:
answers_list3 = calculate_metrics(train.recommended_2, train.purchased)

write_answer('3.txt', answers_list3)

answers_list3

(0.68, 0.79, 0.93, 0.25)

### Test

In [238]:
test['recommended_2'] = test.viewed.apply(
    lambda x: sorted(list(set(x)), key= lambda x: purchased_dict.get(x) if purchased_dict.get(x) else 0, reverse=True)
)

In [239]:
answers_list4 = calculate_metrics(test.recommended_2, test.purchased)

write_answer('4.txt', answers_list4)

answers_list4

(0.42, 0.48, 0.79, 0.2)