In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Для работы с матрицами
from scipy.sparse import csr_matrix

# Матричная факторизация
from implicit import als

import os, sys
module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

# Написанные нами функции
from src.metrics import precision_at_k, recall_at_k, money_precision_at_k
from src.utils import prefilter_items, get_prices, postfilter_items
from src.recommenders import MainRecommender

In [2]:
data = pd.read_csv('/home/alex/Downloads/recsys/retail_train.csv')
item_features = pd.read_csv('/home/alex/Downloads/recsys/product.csv')
data_test = pd.read_csv('/home/alex/Downloads/recsys/retail_test1.csv')

# column processing
item_features.columns = [col.lower() for col in item_features.columns]

item_features.rename(columns={'product_id': 'item_id'}, inplace=True)

n_items_before = data['item_id'].nunique()

data = prefilter_items(data, take_n_popular=300, item_features=item_features)
data = data.merge(item_features, on='item_id', how='left')


n_items_after = data['item_id'].nunique()
print('Decreased # items from {} to {}'.format(n_items_before, n_items_after))

Decreased # items from 89051 to 301


In [4]:
recommender = MainRecommender(data)



HBox(children=(IntProgress(value=0, max=5), HTML(value='')))




HBox(children=(IntProgress(value=0, max=301), HTML(value='')))




In [5]:
result = data_test.groupby('user_id')['item_id'].unique().reset_index()
result.columns=['user_id', 'actual']
result.head(2)

Unnamed: 0,user_id,actual
0,1,"[880007, 883616, 931136, 938004, 940947, 94726..."
1,2,"[820165, 820291, 826784, 826835, 829009, 85784..."


In [6]:
result['candidates_own'] = result['user_id'].apply(lambda x: recommender.get_own_recommendations(x, N=100))

In [7]:
result['candidates_als'] = result['user_id'].apply(lambda x: recommender.get_als_recommendations(x, N=50))

In [8]:
result['final_recommendations'] = result.apply(lambda row: postfilter_items(row['candidates_als'], row['candidates_own'], data), axis=1)

In [9]:
result['final_recommendations_prices'] = result['final_recommendations'].apply(lambda x: get_prices(x, data))

In [10]:
result.head()

Unnamed: 0,user_id,actual,candidates_own,candidates_als,final_recommendations,final_recommendations_prices
0,1,"[880007, 883616, 931136, 938004, 940947, 94726...","[1082185, 995242, 1004906, 840361, 961554, 603...","[962568, 961554, 9527290, 979707, 1081177, 100...","[866211, 1082185, 995242, 962568, 961554]","[3.3967356910190745, 1.3441046581705522, 1.624..."
1,2,"[820165, 820291, 826784, 826835, 829009, 85784...","[1106523, 916122, 5569230, 1133018, 866211, 10...","[961554, 866211, 1106523, 1053690, 1068719, 11...","[866211, 1106523, 916122, 961554, 1053690]","[3.3967356910190745, 2.4783331220571982, 4.268..."
2,3,"[827683, 908531, 989069, 1071377, 1080155, 109...","[1106523, 1053690, 951590, 5568378, 878996, 84...","[5568378, 965766, 951590, 8090521, 5569327, 10...","[874972, 1106523, 1053690, 5568378, 965766]","[8.559660340632604, 2.4783331220571982, 1.4088..."
3,6,"[956902, 960791, 1037863, 1119051, 1137688, 84...","[1082185, 1029743, 1127831, 995242, 878996, 86...","[845208, 832678, 930118, 860299, 995242, 87899...","[866211, 1082185, 1029743, 845208, 832678]","[3.3967356910190745, 1.3441046581705522, 2.475..."
4,7,"[847270, 855557, 859987, 863407, 895454, 90663...","[1082185, 1106523, 866211, 1127831, 961554, 55...","[1127831, 1029743, 986912, 1096036, 898068, 11...","[866211, 1082185, 1106523, 1127831, 986912]","[3.3967356910190745, 1.3441046581705522, 2.478..."


In [11]:
result.apply(lambda row: money_precision_at_k(row['final_recommendations'], row['actual'], row['final_recommendations_prices'], 5), axis=1).mean()

0.1530957656353474

In [16]:
final = result[['user_id', 'final_recommendations']]

In [19]:
final.to_csv('recommendations.csv', index=False)