In [1]:
import pandas as pd
import numpy as np

import os, sys
module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.metrics import precision_at_k, recall_at_k
from src.utils import pre_filter_items, train_test_split
from src.recommenders import MainRecommender, SecondLevelRecommender, DataTransformer

import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv('../data/retail_train.csv')
item_features = pd.read_csv('../data/product.csv')
user_features = pd.read_csv('../data/hh_demographic.csv')

In [3]:
data = pre_filter_items(data, item_features=item_features, take_n_popular=5000)

In [4]:
transformer = DataTransformer(data, user_features, item_features)

In [5]:
transformer.transform()

In [6]:
data = transformer.data

In [7]:
data_train_1, data_valid_1, data_train_2, data_valid_2 = train_test_split(data)

In [8]:
recommender = MainRecommender(data_train_1)

In [9]:
recommender.fit(n_factors=40)



Precision@5 и recall@200 на валидационном датасете первого уровня

In [10]:
result_1 = transformer.valid_items(data_valid_1, data_train_1)

In [11]:
result_1 = recommender.df_als_predictions(result_1)

In [12]:
transformer.eval_recall_at_k(result_1, 'als_candidates')

0.35386055262723093

In [13]:
transformer.eval_precision_at_k(result_1, 'als_candidates')

0.1399300349825073

In [14]:
data_train_2 = transformer.prepare_train_df(data_train_1, data_train_2, recommender)

In [15]:
recommender_second = SecondLevelRecommender(data_train_2, transformer.categorical)

In [17]:
recommender_second.fit()

In [18]:
recs = recommender_second.df_predict()
recs.head()

Unnamed: 0,user_id,candidates
0,1,"[1029743, 1126899, 1070820, 1106523, 962967]"
1,2,"[1029743, 1106523, 1070820, 1126899, 1004906]"
2,4,"[1029743, 1106523, 1126899, 1070820, 1004906]"
3,6,"[1029743, 1106523, 1126899, 1070820, 854754]"
4,7,"[1029743, 1106523, 1126899, 1122358, 1070820]"


Precision@5 на валидационном датасете второго уровня

In [19]:
recommender_second.eval_prediction(data_valid_2, recs)

0.14572849619660527

In [20]:
recommender_second.data.loc[(recommender_second.data['user_id'] == 1) & (recommender_second.data.target == 1)]

Unnamed: 0,user_id,item_id,target,manufacturer,department,brand,commodity_desc,sub_commodity_desc,curr_size_of_product,age_desc,...,user_factor_32,user_factor_33,user_factor_34,user_factor_35,user_factor_36,user_factor_37,user_factor_38,user_factor_39,user_factor_40,preds
129240,1,1004906,1.0,69,PRODUCE,Private,POTATOES,POTATOES RUSSET (BULK&BAG),5 LB,65+,...,1.194346,1.711777,1.621746,1.097204,9.189105,9.005104,9.935469,-2.756252,-9.645917,0.030068
129200,1,856942,1.0,159,GROCERY,National,BAKED BREAD/BUNS/ROLLS,FRUIT/BREAKFAST BREAD,16 OZ,65+,...,1.194346,1.711777,1.621746,1.097204,9.189105,9.005104,9.935469,-2.756252,-9.645917,0.029999
129201,1,856942,1.0,159,GROCERY,National,BAKED BREAD/BUNS/ROLLS,FRUIT/BREAKFAST BREAD,16 OZ,65+,...,1.194346,1.711777,1.621746,1.097204,9.189105,9.005104,9.935469,-2.756252,-9.645917,0.029999
129205,1,5577022,1.0,1194,GROCERY,National,REFRGRATD JUICES/DRNKS,DAIRY CASE 100% PURE JUICE - O,64 OZ,65+,...,1.194346,1.711777,1.621746,1.097204,9.189105,9.005104,9.935469,-2.756252,-9.645917,0.029999
129209,1,8293439,1.0,1519,GROCERY,National,FRZN NOVELTIES/WTR ICE,STICKS/ENROBED,6/4 OZ,65+,...,1.194346,1.711777,1.621746,1.097204,9.189105,9.005104,9.935469,-2.756252,-9.645917,0.029999
129210,1,8293439,1.0,1519,GROCERY,National,FRZN NOVELTIES/WTR ICE,STICKS/ENROBED,6/4 OZ,65+,...,1.194346,1.711777,1.621746,1.097204,9.189105,9.005104,9.935469,-2.756252,-9.645917,0.029999
129211,1,8293439,1.0,1519,GROCERY,National,FRZN NOVELTIES/WTR ICE,STICKS/ENROBED,6/4 OZ,65+,...,1.194346,1.711777,1.621746,1.097204,9.189105,9.005104,9.935469,-2.756252,-9.645917,0.029999
129250,1,940947,1.0,2082,MEAT-PCKGD,National,HEAT/SERVE,ENTREES,24 OZ,65+,...,1.194346,1.711777,1.621746,1.097204,9.189105,9.005104,9.935469,-2.756252,-9.645917,0.029999
129251,1,940947,1.0,2082,MEAT-PCKGD,National,HEAT/SERVE,ENTREES,24 OZ,65+,...,1.194346,1.711777,1.621746,1.097204,9.189105,9.005104,9.935469,-2.756252,-9.645917,0.029999
129252,1,940947,1.0,2082,MEAT-PCKGD,National,HEAT/SERVE,ENTREES,24 OZ,65+,...,1.194346,1.711777,1.621746,1.097204,9.189105,9.005104,9.935469,-2.756252,-9.645917,0.029999
