In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Для работы с матрицами
from scipy.sparse import csr_matrix

# Матричная факторизация
from implicit import als

import os, sys
module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

# Написанные нами функции
from src.metrics import precision_at_k, recall_at_k, money_precision_at_k
from src.utils import prefilter_items, get_prices, postfilter_items
from src.recommenders import MainRecommender

In [2]:
data = pd.read_csv('/home/alex/Downloads/recsys/retail_train.csv')
item_features = pd.read_csv('/home/alex/Downloads/recsys/product.csv')

# column processing
item_features.columns = [col.lower() for col in item_features.columns]

item_features.rename(columns={'product_id': 'item_id'}, inplace=True)

n_items_before = data['item_id'].nunique()

data = prefilter_items(data, take_n_popular=300, item_features=item_features)
data = data.merge(item_features, on='item_id', how='left')


n_items_after = data['item_id'].nunique()
print('Decreased # items from {} to {}'.format(n_items_before, n_items_after))

Decreased # items from 89051 to 301


In [3]:
test_size_weeks = 3

data_train = data[data['week_no'] < data['week_no'].max() - test_size_weeks]
data_test = data[data['week_no'] >= data['week_no'].max() - test_size_weeks]
data_train.head(2)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc,price,manufacturer,department,brand,commodity_desc,sub_commodity_desc,curr_size_of_product
0,2295,31336488347,293,999999,2,3.0,403,-1.58,117,43,0.0,0.0,1.5,,,,,,
1,2295,31336488347,293,999999,1,2.5,403,-0.19,117,43,0.0,0.0,2.5,,,,,,


In [4]:
recommender = MainRecommender(data_train)



HBox(children=(IntProgress(value=0, max=15), HTML(value='')))




HBox(children=(IntProgress(value=0, max=301), HTML(value='')))




In [5]:
result = data_test.groupby('user_id')['item_id'].unique().reset_index()
result.columns=['user_id', 'actual']
result.head(2)

Unnamed: 0,user_id,actual
0,1,"[999999, 865456, 962568, 979707, 995242, 10049..."
1,3,"[999999, 1053690]"


In [6]:
result['candidates_own'] = result['user_id'].apply(lambda x: recommender.get_own_recommendations(x, N=100))

In [7]:
result['candidates_als'] = result['user_id'].apply(lambda x: recommender.get_als_recommendations(x, N=50))

In [8]:
result['final_recommendations'] = result.apply(lambda row: postfilter_items(row['candidates_als'], row['candidates_own'], data), axis=1)

In [9]:
result.head()

Unnamed: 0,user_id,actual,candidates_own,candidates_als,final_recommendations
0,1,"[999999, 865456, 962568, 979707, 995242, 10049...","[940947, 9527290, 995242, 1082212, 840361, 603...","[8090536, 1027569, 995242, 1082185, 1006184, 1...","[866211, 940947, 9527290, 8090536, 1027569]"
1,3,"[999999, 1053690]","[1053690, 1092026, 9527494, 910032, 951590, 91...","[1043751, 1056509, 1092026, 859075, 844179, 10...","[874972, 1053690, 9527494, 1043751, 1056509]"
2,6,"[999999, 840361, 845208, 995242, 1024306, 1037...","[1037863, 951412, 5569230, 965267, 1082185, 10...","[1082185, 866211, 916122, 934639, 878996, 1029...","[863447, 1037863, 951412, 1082185, 866211]"
3,7,"[999999, 898068, 909714, 1003188, 1082185, 110...","[1122358, 828867, 1110572, 1126899, 1106523, 1...","[1076056, 1082185, 909714, 1029743, 909894, 93...","[866211, 1122358, 828867, 1076056, 1082185]"
4,8,"[835098, 872137, 999999, 5569230, 5569471, 840...","[5569471, 862349, 1133018, 5569230, 12302069, ...","[844179, 8090536, 940766, 1076056, 840361, 868...","[866211, 5569471, 862349, 844179, 940766]"


In [10]:
result['final_recommendations_prices'] = result['final_recommendations'].apply(lambda x: get_prices(x, data))

In [11]:
result.apply(lambda row: money_precision_at_k(row['final_recommendations'], row['actual'], row['final_recommendations_prices'], 5), axis=1).mean()

0.16259112246551174