In [1]:
import pandas as pd
import numpy as np

import os, sys
module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.metrics import precision_at_k, recall_at_k
from src.utils import pre_filter_items, train_test_split
from src.recommenders import MainRecommender, SecondLevelRecommender, DataTransformer

import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv('../data/retail_train.csv')
item_features = pd.read_csv('../data/product.csv')
user_features = pd.read_csv('../data/hh_demographic.csv')

In [3]:
data = pre_filter_items(data, item_features=item_features, take_n_popular=5000)

In [4]:
transformer = DataTransformer(data, user_features, item_features)

In [5]:
transformer.transform()

In [6]:
data = transformer.data

In [7]:
data_train_1, data_valid_1, data_train_2, data_valid_2 = train_test_split(data)

In [8]:
recommender = MainRecommender(data_train_1)

In [9]:
recommender.fit(n_factors=40)



Precision@5 и recall@200 на валидационном датасете первого уровня

In [10]:
result_1 = transformer.valid_items(data_valid_1, data_train_1)

In [11]:
# result_1 = recommender.df_als_predictions(result_1)

In [12]:
result_1 = recommender.df_als_predictions_ranked(result_1)

In [13]:
transformer.eval_recall_at_k(result_1, 'als_candidates')

0.3567498518849593

In [14]:
transformer.eval_precision_at_k(result_1, 'als_candidates')

0.14252873563218232

In [15]:
data_train_2 = transformer.prepare_train_df(data_train_1, data_train_2, recommender)

In [16]:
recommender_second = SecondLevelRecommender(data_train_2, transformer.categorical, recommender.overall_top_purchases)

In [17]:
recommender_second.fit()

In [18]:
recs = recommender_second.df_predict(5)
recs.head()

Unnamed: 0,user_id,candidates
0,1,"[1029743, 856942, 896666, 8090521, 1106523]"
1,2,"[1029743, 1106523, 5569230, 1075368, 8090521]"
2,4,"[1029743, 1075368, 5569230, 1106523, 977374]"
3,6,"[1029743, 5569230, 1106523, 1127831, 5569792]"
4,7,"[1029743, 1106523, 1126899, 1122358, 1075368]"


Precision@5 на валидационном датасете второго уровня

In [19]:
recommender_second.eval_prediction(data_valid_2, recs)

0.18513750731421644

In [20]:
recs.to_csv('../data/predictions.csv', sep='\t', index=False)