In [1]:
pip install -U turicreate

Collecting turicreate
[?25l  Downloading https://files.pythonhosted.org/packages/4f/ef/1847a704548ad4cbcabe09b3882181c190f5b696da8b2d082521c33ec187/turicreate-5.4-cp36-cp36m-manylinux1_x86_64.whl (87.4MB)
[K    100% |████████████████████████████████| 87.4MB 340kB/s 
Collecting mxnet<1.2.0,>=1.1.0 (from turicreate)
[?25l  Downloading https://files.pythonhosted.org/packages/96/98/c9877e100c3d1ac92263bfaba7bb8a49294e099046592040a2ff8620ac61/mxnet-1.1.0.post0-py2.py3-none-manylinux1_x86_64.whl (23.8MB)
[K    100% |████████████████████████████████| 23.8MB 2.1MB/s 
Collecting coremltools==2.1.0 (from turicreate)
[?25l  Downloading https://files.pythonhosted.org/packages/b9/9d/7ec5a2480c6afce4fcb99de1650b7abfd1457b2ef1de5ce39bf7bee8a8ae/coremltools-2.1.0-cp36-none-manylinux1_x86_64.whl (2.7MB)
[K    100% |████████████████████████████████| 2.7MB 12.2MB/s 
Collecting graphviz<0.9.0,>=0.8.1 (from mxnet<1.2.0,>=1.1.0->turicreate)
  Downloading https://files.pythonhosted.org/packages/53/39/4

In [0]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
import turicreate as tc
from IPython.display import Latex

**DATA LOADING AND DATA PREPARATION**

In [0]:
transactions = pd.read_csv('ta_feng_all_months_merged.csv')
transactions = transactions.rename(index=str, columns={"CUSTOMER_ID": "customerId", 'п»ї"TRANSACTION_DT"': 'TRANSACTION_DT', 'PRODUCT_ID':'productId'})
transactions = transactions.drop(columns=['TRANSACTION_DT', 'AGE_GROUP', 'PIN_CODE', 'PRODUCT_SUBCLASS', 'AMOUNT', 'ASSET', 'SALES_PRICE'])
transactions = transactions.rename(index=str, columns={"productId": "products"})

In [4]:
transactions.shape

(817741, 2)

In [6]:
customers = transactions.copy()
customers = customers.drop(columns=['products'])
customers = customers.drop_duplicates()
customers.head()

Unnamed: 0,customerId
0,1104905
1,418683
2,1057331
3,1849332
4,1981995


In [8]:
transactions = transactions.groupby('customerId')['products'].apply(list)
transactions = pd.DataFrame({'customerId':transactions.index, 'products':transactions.values})
transactions = transactions.reset_index()
transactions.head(2).set_index('customerId')['products'].apply(pd.Series).reset_index()

Unnamed: 0,customerId,0,1
0,1069,9556439880610,4710176008699
1,1113,4902105011621,4711271000014


In [0]:
data = pd.melt(transactions.set_index('customerId')['products'].apply(pd.Series).reset_index(), 
             id_vars=['customerId'],
             value_name='products') \
    .dropna().drop(['variable'], axis=1) \
    .groupby(['customerId', 'products']) \
    .agg({'products': 'count'}) \
    .rename(columns={'products': 'purchase_count'}) \
    .reset_index() \
    .rename(columns={'products': 'productId'})
data['productId'] = data['productId'].astype(np.int64)

### Отнормируем число покупок каждой SKU используя Rescaling (min-max normalization)

x^' = \frac{x - min(x)}{max(x) - min(x)}

In [0]:
def normalize_data(data):
    df_matrix = pd.pivot_table(data, values='purchase_count', index='customerId', columns='productId')
    df_matrix_norm = (df_matrix-df_matrix.min())/(df_matrix.max()-df_matrix.min())
    d = df_matrix_norm.reset_index()
    d.index.names = ['scaled_purchase_freq']
    return pd.melt(d, id_vars=['customerId'], value_name='scaled_purchase_freq').dropna()

data_norm = normalize_data(data)

**Добавим переменную purchase_dummy равную единице, если клиент хотя бы один раз покупал соответствующий товар. **



In [0]:
def create_data_dummy(data):
    data_dummy = data.copy()
    data_dummy['purchase_dummy'] = 1
    return data_dummy

In [0]:
data_dummy = create_data_dummy(data)

In [15]:
data_dummy.head()

Unnamed: 0,customerId,productId,purchase_count,purchase_dummy
0,1069,4710176008699,1,1
1,1069,9556439880610,1,1
2,1113,4711271000014,1,1
3,1113,4902105011621,1,1
4,1823,20398576,1,1


### Определим функцию разбиения на обучающую и тестовую выборки 

In [0]:
def split_data(data):
    train, test = train_test_split(data, test_size = .2)
    train_data = tc.SFrame(train)
    test_data = tc.SFrame(test)
    return train_data, test_data

**На этом этапе у нас имеются три датасета. Разделим каждый на обучающую и тестовую выборки**

In [0]:
train_data, test_data = split_data(data)
train_data_dummy, test_data_dummy = split_data(data_dummy)
train_data_norm, test_data_norm = split_data(data_norm)

**Обучим два вида моделей: Popularity model и Collaborative Filtering Model на трех имеющихся датасетов. **

In [0]:
def model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display):
    if name == 'popularity':
        model = tc.popularity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target)
    elif name == 'cosine':
        model = tc.item_similarity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target, 
                                                    similarity_type='cosine')
    elif name == 'pearson':
        model = tc.item_similarity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target, 
                                                    similarity_type='pearson')
        
    recom = model.recommend(users=users_to_recommend, k=n_rec)
    recom.print_rows(n_display)
    return model

**1.1 Popularity model based on purchase counts**

In [0]:
def model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display):
    if name == 'popularity':
        model = tc.popularity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target)
    elif name == 'cosine':
        model = tc.item_similarity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target, 
                                                    similarity_type='cosine')
    elif name == 'pearson':
        model = tc.item_similarity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target, 
                                                    similarity_type='pearson')
        
    recom = model.recommend(users=users_to_recommend, k=n_rec)
    recom.print_rows(n_display)
    return model

In [0]:
user_id = 'customerId'
item_id = 'productId'
users_to_recommend = list(customers[user_id])
n_rec = 10
n_display = 30

In [21]:
name = 'popularity'
target = 'purchase_count'
popularity_model = model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

+------------+---------------+-------+------+
| customerId |   productId   | score | rank |
+------------+---------------+-------+------+
|  1104905   | 4711703122536 |  3.0  |  1   |
|  1104905   |  74570703074  |  3.0  |  2   |
|  1104905   | 4713045018096 |  2.0  |  3   |
|  1104905   | 4713645410122 |  2.0  |  4   |
|  1104905   | 2100035002364 |  2.0  |  5   |
|  1104905   |    20538538   |  2.0  |  6   |
|  1104905   | 4710498600847 |  2.0  |  7   |
|  1104905   | 4712172200015 |  2.0  |  8   |
|  1104905   | 8712045003565 |  2.0  |  9   |
|  1104905   | 4715828131510 |  2.0  |  10  |
|   418683   | 4711703122536 |  3.0  |  1   |
|   418683   |  74570703074  |  3.0  |  2   |
|   418683   | 4713045018096 |  2.0  |  3   |
|   418683   | 4713645410122 |  2.0  |  4   |
|   418683   | 2100035002364 |  2.0  |  5   |
|   418683   |    20538538   |  2.0  |  6   |
|   418683   | 4710498600847 |  2.0  |  7   |
|   418683   | 4712172200015 |  2.0  |  8   |
|   418683   | 8712045003565 |  2.

**1.2 Popularity model based on purchase dummy.**

In [22]:
name = 'popularity'
target = 'purchase_dummy'
pop_dummy = model(train_data_dummy, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

+------------+---------------+-------+------+
| customerId |   productId   | score | rank |
+------------+---------------+-------+------+
|  1104905   | 4718176280992 |  1.0  |  1   |
|  1104905   | 4710088432353 |  1.0  |  2   |
|  1104905   | 8711500638229 |  1.0  |  3   |
|  1104905   |  41419761748  |  1.0  |  4   |
|  1104905   | 4710498123964 |  1.0  |  5   |
|  1104905   | 4710367520054 |  1.0  |  6   |
|  1104905   | 4713627810681 |  1.0  |  7   |
|  1104905   | 8712000900045 |  1.0  |  8   |
|  1104905   | 4715062861105 |  1.0  |  9   |
|  1104905   | 4710515535091 |  1.0  |  10  |
|   418683   | 4718176280992 |  1.0  |  1   |
|   418683   | 4710088432353 |  1.0  |  2   |
|   418683   | 8711500638229 |  1.0  |  3   |
|   418683   |  41419761748  |  1.0  |  4   |
|   418683   | 4710498123964 |  1.0  |  5   |
|   418683   | 4710367520054 |  1.0  |  6   |
|   418683   | 4713627810681 |  1.0  |  7   |
|   418683   | 8712000900045 |  1.0  |  8   |
|   418683   | 4715062861105 |  1.

**1.3 Popularity model based on normalized purchase count.**

In [30]:
name = 'popularity'
target = 'scaled_purchase_freq'
pop_norm = model(train_data_norm, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

+------------+---------------+-------+------+
| customerId |   productId   | score | rank |
+------------+---------------+-------+------+
|  1104905   |  614632010285 |  1.0  |  1   |
|  1104905   | 4711703122536 |  1.0  |  2   |
|  1104905   | 4714541070236 |  1.0  |  3   |
|  1104905   | 4710172030106 |  1.0  |  4   |
|  1104905   | 4717673414169 |  1.0  |  5   |
|  1104905   | 4714499363039 |  1.0  |  6   |
|  1104905   | 4714686581451 |  0.5  |  7   |
|  1104905   | 4710424701952 |  0.5  |  8   |
|  1104905   | 4710706932005 |  0.5  |  9   |
|  1104905   | 4715828131510 |  0.5  |  10  |
|   418683   |  614632010285 |  1.0  |  1   |
|   418683   | 4711703122536 |  1.0  |  2   |
|   418683   | 4714541070236 |  1.0  |  3   |
|   418683   | 4710172030106 |  1.0  |  4   |
|   418683   | 4717673414169 |  1.0  |  5   |
|   418683   | 4714499363039 |  1.0  |  6   |
|   418683   | 4714686581451 |  0.5  |  7   |
|   418683   | 4710424701952 |  0.5  |  8   |
|   418683   | 4710706932005 |  0.

** 2.1 Collaborative Filtering Model with Cosine similarity based on purchase count**

In [31]:
name = 'cosine'
target = 'purchase_count'
cos = model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

+------------+---------------+----------------------+------+
| customerId |   productId   |        score         | rank |
+------------+---------------+----------------------+------+
|  1104905   | 4901201906022 | 0.051547002792358396 |  1   |
|  1104905   |    20403515   | 0.04472135901451111  |  2   |
|  1104905   | 4714082260080 | 0.04472135901451111  |  3   |
|  1104905   |    20494803   | 0.04472135901451111  |  4   |
|  1104905   | 4717362900560 | 0.04472135901451111  |  5   |
|  1104905   | 4713792992502 | 0.04472135901451111  |  6   |
|  1104905   |    20513115   | 0.04472135901451111  |  7   |
|  1104905   | 4713045614519 | 0.04472135901451111  |  8   |
|  1104905   | 4710690001022 | 0.04472135901451111  |  9   |
|  1104905   | 4712187000129 | 0.04472135901451111  |  10  |
|   418683   | 4710011409056 | 0.24524259567260742  |  1   |
|   418683   | 4710011406123 | 0.24348507324854532  |  2   |
|   418683   | 4710011405133 | 0.20464152097702026  |  3   |
|   418683   | 471001140

** 2.2 Collaborative Filtering Model with Cosine similarity based on purchase dummy**

In [32]:
name = 'cosine'
target = 'purchase_dummy'
cos_dummy = model(train_data_dummy, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

+------------+---------------+----------------------+------+
| customerId |   productId   |        score         | rank |
+------------+---------------+----------------------+------+
|  1104905   | 4711524000471 | 0.061190160838040436 |  1   |
|  1104905   | 4711524000419 | 0.05536611513658003  |  2   |
|  1104905   | 4711524000433 | 0.05387516455216841  |  3   |
|  1104905   | 4711524000495 |  0.0534362251108343  |  4   |
|  1104905   | 4711524000617 | 0.04892170429229736  |  5   |
|  1104905   | 4719111208132 | 0.045454545454545456 |  6   |
|  1104905   |    20494803   | 0.04065578092228283  |  7   |
|  1104905   | 4714082260080 | 0.04065578092228283  |  8   |
|  1104905   | 4714082100942 | 0.04065578092228283  |  9   |
|  1104905   | 4713045614519 | 0.04065578092228283  |  10  |
|   418683   | 4710011406123 |  0.3654307723045349  |  1   |
|   418683   | 4710011409056 | 0.35480377078056335  |  2   |
|   418683   | 4710011405133 |  0.3154452443122864  |  3   |
|   418683   | 471001140

** 2.3 Collaborative Filtering Model with Cosine similarity based on normalized purchase count** 

In [33]:
name = 'cosine'
target = 'scaled_purchase_freq'
cos_norm = model(train_data_norm, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

+------------+---------------+-------+------+
| customerId |   productId   | score | rank |
+------------+---------------+-------+------+
|  1104905   | 4710008290155 |  0.0  |  1   |
|  1104905   | 4710421090059 |  0.0  |  2   |
|  1104905   | 8801019931536 |  0.0  |  3   |
|  1104905   |    93457552   |  0.0  |  4   |
|  1104905   | 4711634002587 |  0.0  |  5   |
|  1104905   | 4710421090011 |  0.0  |  6   |
|  1104905   | 4712019100607 |  0.0  |  7   |
|  1104905   | 4710088433305 |  0.0  |  8   |
|  1104905   | 4710323168054 |  0.0  |  9   |
|  1104905   | 4711863180070 |  0.0  |  10  |
|   418683   | 4710008290155 |  0.0  |  1   |
|   418683   | 4710421090059 |  0.0  |  2   |
|   418683   | 8801019931536 |  0.0  |  3   |
|   418683   |    93457552   |  0.0  |  4   |
|   418683   | 4711634002587 |  0.0  |  5   |
|   418683   | 4710421090011 |  0.0  |  6   |
|   418683   | 4712019100607 |  0.0  |  7   |
|   418683   | 4710088433305 |  0.0  |  8   |
|   418683   | 4710323168054 |  0.

** 3.1 Collaborative Filtering Model with Cosine similarity based on purchase count** 

In [34]:
# these variables will change accordingly
name = 'pearson'
target = 'purchase_count'
pear = model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

+------------+---------------+-------+------+
| customerId |   productId   | score | rank |
+------------+---------------+-------+------+
|  1104905   | 4711703122536 |  3.0  |  1   |
|  1104905   |  74570703074  |  3.0  |  2   |
|  1104905   | 4713645410122 |  2.0  |  3   |
|  1104905   | 4713645632036 |  2.0  |  4   |
|  1104905   | 4713045018096 |  2.0  |  5   |
|  1104905   | 2100035002364 |  2.0  |  6   |
|  1104905   |    20538538   |  2.0  |  7   |
|  1104905   | 4710498600847 |  2.0  |  8   |
|  1104905   | 4712172200015 |  2.0  |  9   |
|  1104905   | 8712045003565 |  2.0  |  10  |
|   418683   | 4711703122536 |  3.0  |  1   |
|   418683   |  74570703074  |  3.0  |  2   |
|   418683   | 4713645410122 |  2.0  |  3   |
|   418683   | 4713645632036 |  2.0  |  4   |
|   418683   | 4713045018096 |  2.0  |  5   |
|   418683   | 2100035002364 |  2.0  |  6   |
|   418683   |    20538538   |  2.0  |  7   |
|   418683   | 4710498600847 |  2.0  |  8   |
|   418683   | 4712172200015 |  2.

** 3.2 Collaborative Filtering Model with Cosine similarity based on purchase dummy** 

In [35]:
# these variables will change accordingly
name = 'pearson'
target = 'purchase_dummy'
pear_dummy = model(train_data_dummy, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

+------------+---------------+-------+------+
| customerId |   productId   | score | rank |
+------------+---------------+-------+------+
|  1104905   | 4718176280992 |  0.0  |  1   |
|  1104905   | 4710088432353 |  0.0  |  2   |
|  1104905   | 8711500638229 |  0.0  |  3   |
|  1104905   |  41419761748  |  0.0  |  4   |
|  1104905   | 4710498123964 |  0.0  |  5   |
|  1104905   | 4710367520054 |  0.0  |  6   |
|  1104905   | 4713627810681 |  0.0  |  7   |
|  1104905   | 8712000900045 |  0.0  |  8   |
|  1104905   | 4715062861105 |  0.0  |  9   |
|  1104905   | 4710515535091 |  0.0  |  10  |
|   418683   | 4718176280992 |  0.0  |  1   |
|   418683   | 4710088432353 |  0.0  |  2   |
|   418683   | 8711500638229 |  0.0  |  3   |
|   418683   |  41419761748  |  0.0  |  4   |
|   418683   | 4710498123964 |  0.0  |  5   |
|   418683   | 4710367520054 |  0.0  |  6   |
|   418683   | 4713627810681 |  0.0  |  7   |
|   418683   | 8712000900045 |  0.0  |  8   |
|   418683   | 4715062861105 |  0.

** 3.3 Collaborative Filtering Model with Cosine similarity based on normalized purchase count** 

In [36]:
name = 'pearson'
target = 'scaled_purchase_freq'
pear_norm = model(train_data_norm, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

+------------+---------------+-------+------+
| customerId |   productId   | score | rank |
+------------+---------------+-------+------+
|  1104905   |  614632010285 |  1.0  |  1   |
|  1104905   | 4711703122536 |  1.0  |  2   |
|  1104905   | 4714541070236 |  1.0  |  3   |
|  1104905   | 4710172030106 |  1.0  |  4   |
|  1104905   | 4717673414169 |  1.0  |  5   |
|  1104905   | 4714499363039 |  1.0  |  6   |
|  1104905   | 4714686581451 |  0.5  |  7   |
|  1104905   | 4710424701952 |  0.5  |  8   |
|  1104905   | 4710706932005 |  0.5  |  9   |
|  1104905   | 4715828131510 |  0.5  |  10  |
|   418683   |  614632010285 |  1.0  |  1   |
|   418683   | 4711703122536 |  1.0  |  2   |
|   418683   | 4714541070236 |  1.0  |  3   |
|   418683   | 4710172030106 |  1.0  |  4   |
|   418683   | 4717673414169 |  1.0  |  5   |
|   418683   | 4714499363039 |  1.0  |  6   |
|   418683   | 4714686581451 |  0.5  |  7   |
|   418683   | 4710424701952 |  0.5  |  8   |
|   418683   | 4710706932005 |  0.

** Сравним полуенные результаты моделей**

In [0]:
models_w_counts = [popularity_model, cos, pear]
models_w_dummy = [pop_dummy, cos_dummy, pear_dummy]
models_w_norm = [pop_norm, cos_norm, pear_norm]

names_w_counts = ['Popularity Model on Purchase Counts', 'Cosine Similarity on Purchase Counts', 'Pearson Similarity on Purchase Counts']
names_w_dummy = ['Popularity Model on Purchase Dummy', 'Cosine Similarity on Purchase Dummy', 'Pearson Similarity on Purchase Dummy']
names_w_norm = ['Popularity Model on Scaled Purchase Counts', 'Cosine Similarity on Scaled Purchase Counts', 'Pearson Similarity on Scaled Purchase Counts']

** Модели основанные на количестве покупок**

In [38]:
eval_counts = tc.recommender.util.compare_models(test_data, models_w_counts, model_names=names_w_counts)

PROGRESS: Evaluate model Popularity Model on Purchase Counts



Precision and recall summary statistics by cutoff
+--------+------------------------+------------------------+
| cutoff |     mean_precision     |      mean_recall       |
+--------+------------------------+------------------------+
|   1    | 0.00014318442153493665 | 2.8636884306987325e-05 |
|   2    | 0.00014318442153493652 | 3.886434298805441e-05  |
|   3    | 9.545628102329142e-05  | 3.886434298805441e-05  |
|   4    | 7.159221076746826e-05  | 3.886434298805441e-05  |
|   5    | 8.591065292096246e-05  | 5.477372315860274e-05  |
|   6    | 7.159221076746835e-05  | 5.477372315860274e-05  |
|   7    | 6.136475208640137e-05  | 5.477372315860274e-05  |
|   8    |  7.15922107674684e-05  | 9.056982854233742e-05  |
|   9    | 7.954690085274287e-05  | 0.00013829796905398282 |
|   10   | 7.159221076746845e-05  | 0.00013829796905398282 |
+--------+------------------------+------------------------+
[10 rows x 3 columns]


Overall RMSE: 0.14419355268193434

Per User RMSE (best)
+------------+-


Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.04782359679266902  | 0.027942512177907275 |
|   2    | 0.033290378006872845 | 0.03626494138736405  |
|   3    | 0.02706185567010319  | 0.04230525166414334  |
|   4    | 0.02226517754868265  | 0.045050167002317204 |
|   5    | 0.01915807560137455  | 0.04809300641757925  |
|   6    | 0.016871897670866807 | 0.05059157457336369  |
|   7    | 0.015075274095892644 | 0.05238150095719277  |
|   8    | 0.013799398625429534 | 0.05414392453018335  |
|   9    | 0.01283886979763266  |  0.0563090258791334  |
|   10   | 0.011970217640320786 | 0.057928600780495346 |
+--------+----------------------+----------------------+
[10 rows x 3 columns]


Overall RMSE: 1.0195117625892505

Per User RMSE (best)
+------------+---------------------+-------+
| customerId |         rmse


Precision and recall summary statistics by cutoff
+--------+------------------------+------------------------+
| cutoff |     mean_precision     |      mean_recall       |
+--------+------------------------+------------------------+
|   1    | 0.00014318442153493682 | 2.8636884306987352e-05 |
|   2    | 0.00014318442153493668 | 3.8864342988054366e-05 |
|   3    | 9.545628102329138e-05  | 3.8864342988054366e-05 |
|   4    | 0.00010738831615120242 |  8.65924834996998e-05  |
|   5    | 8.591065292096234e-05  |  8.65924834996998e-05  |
|   6    | 7.159221076746845e-05  |  8.65924834996998e-05  |
|   7    |  6.13647520864014e-05  |  8.65924834996998e-05  |
|   8    | 5.369415807560121e-05  |  8.65924834996998e-05  |
|   9    | 4.7728140511645614e-05 |  8.65924834996998e-05  |
|   10   |  5.72737686139749e-05  | 0.00015818469426716835 |
+--------+------------------------+------------------------+
[10 rows x 3 columns]


Overall RMSE: 0.2791879905583871

Per User RMSE (best)
+------------+--

** Модели основанные на dummy переменной**

In [39]:
eval_dummy = tc.recommender.util.compare_models(test_data_dummy, models_w_dummy, model_names=names_w_dummy)

PROGRESS: Evaluate model Popularity Model on Purchase Dummy



Precision and recall summary statistics by cutoff
+--------+------------------------+------------------------+
| cutoff |     mean_precision     |      mean_recall       |
+--------+------------------------+------------------------+
|   1    | 0.00014283673760891275 | 3.570918440222819e-05  |
|   2    | 7.141836880445637e-05  | 3.570918440222819e-05  |
|   3    |  4.76122458696377e-05  | 3.570918440222819e-05  |
|   4    | 0.00010712755320668477 | 0.00019441667063435343 |
|   5    | 0.00014283673760891332 | 0.0004800901458521823  |
|   6    | 0.00014283673760891348 | 0.0005038962687869983  |
|   7    | 0.00012243148937906824 | 0.0005038962687869983  |
|   8    | 0.00010712755320668447 | 0.0005038962687869983  |
|   9    | 9.522449173927526e-05  | 0.0005038962687869983  |
|   10   | 8.570204256534812e-05  | 0.0005038962687869983  |
+--------+------------------------+------------------------+
[10 rows x 3 columns]


Overall RMSE: 0.0

Per User RMSE (best)
+------------+------+-------+
|


Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.040565633480931264 | 0.019057996868309737 |
|   2    | 0.028281674046564863 | 0.02576639385309575  |
|   3    | 0.023044327000904558 | 0.03080223907248618  |
|   4    | 0.019461505499214313 | 0.03455799505292436  |
|   5    | 0.01674046564776454  | 0.03715932411475906  |
|   6    | 0.015045469694805512 | 0.04004258568963614  |
|   7    | 0.013528679576387061 | 0.041509723037361834 |
|   8    | 0.012319668618768716 | 0.04305887814672275  |
|   9    | 0.01129997301972728  | 0.04451025810831561  |
|   10   | 0.010598485930581365 | 0.046113770531710827 |
+--------+----------------------+----------------------+
[10 rows x 3 columns]


Overall RMSE: 0.9946087139998798

Per User RMSE (best)
+------------+-------------------+-------+
| customerId |        rmse   


Precision and recall summary statistics by cutoff
+--------+------------------------+------------------------+
| cutoff |     mean_precision     |      mean_recall       |
+--------+------------------------+------------------------+
|   1    |          0.0           |          0.0           |
|   2    | 0.00014283673760891272 | 3.825984043095888e-05  |
|   3    | 9.522449173927557e-05  | 3.825984043095888e-05  |
|   4    | 7.141836880445636e-05  | 3.825984043095888e-05  |
|   5    |  8.57020425653483e-05  | 0.00010967820923541574 |
|   6    | 7.141836880445636e-05  | 0.00010967820923541574 |
|   7    | 6.121574468953404e-05  | 0.00010967820923541574 |
|   8    | 5.356377660334247e-05  | 0.00010967820923541574 |
|   9    | 4.761224586963773e-05  | 0.00010967820923541574 |
|   10   | 5.713469504356521e-05  | 0.00014538739363764385 |
+--------+------------------------+------------------------+
[10 rows x 3 columns]


Overall RMSE: 1.0

Per User RMSE (best)
+------------+------+-------+
|

** Модели основанные на нормированном значении количества покупок **

In [40]:
eval_norm = tc.recommender.util.compare_models(test_data_norm, models_w_norm, model_names=names_w_norm)

PROGRESS: Evaluate model Popularity Model on Scaled Purchase Counts



Precision and recall summary statistics by cutoff
+--------+------------------------+------------------------+
| cutoff |     mean_precision     |      mean_recall       |
+--------+------------------------+------------------------+
|   1    |          0.0           |          0.0           |
|   2    | 0.0003512058066026694  | 0.00039803324748302534 |
|   3    | 0.0003902286740029662  | 0.0005541247170842112  |
|   4    | 0.00035120580660266926 | 0.0007882619214859915  |
|   5    | 0.00032779208616249097 | 0.0010223991258877705  |
|   6    | 0.0003512058066026688  | 0.0013345820650901413  |
|   7    | 0.0003344817205739712  |  0.001568719269491923  |
|   8    | 0.0003512058066026706  |  0.002036993678295484  |
|   9    | 0.00031218293920237195 |  0.002036993678295484  |
|   10   | 0.0002809646452821358  | 0.0020369936782954835  |
+--------+------------------------+------------------------+
[10 rows x 3 columns]


Overall RMSE: 0.20297286225380898

Per User RMSE (best)
+------------+-


Precision and recall summary statistics by cutoff
+--------+-----------------------+-----------------------+
| cutoff |     mean_precision    |      mean_recall      |
+--------+-----------------------+-----------------------+
|   1    |  0.002809646452821361 | 0.0014893727724446528 |
|   2    | 0.0022243034418169072 | 0.0022112958193501384 |
|   3    | 0.0022633263092171983 |  0.003273275282172503 |
|   4    | 0.0021657691407164685 |  0.004326892701980501 |
|   5    | 0.0021072348396160177 |  0.005329780394168103 |
|   6    | 0.0017950519004136418 |  0.005376607835048493 |
|   7    | 0.0019399939793290238 |  0.00671118990013864  |
|   8    | 0.0019023647857644547 |  0.007655543291225844 |
|   9    |  0.001795051900413642 |  0.00829942060333069  |
|   10   | 0.0016623741512526377 |  0.00847502350663203  |
+--------+-----------------------+-----------------------+
[10 rows x 3 columns]


Overall RMSE: 0.19530710217026012

Per User RMSE (best)
+------------+------+-------+
| customerId 


Precision and recall summary statistics by cutoff
+--------+------------------------+------------------------+
| cutoff |     mean_precision     |      mean_recall       |
+--------+------------------------+------------------------+
|   1    | 0.0004682744088035587  | 0.0002809646452821357  |
|   2    | 0.00046827440880355874 | 0.00047607898228361844 |
|   3    | 0.0006243658784047439  | 0.0011004448606883638  |
|   4    | 0.00046827440880355933 | 0.0011004448606883638  |
|   5    | 0.00042144696792320314 | 0.0013345820650901429  |
|   6    | 0.0003512058066026698  | 0.0013345820650901429  |
|   7    | 0.00033448172057397123 | 0.0014126277998907325  |
|   8    | 0.00029267150550222416 | 0.0014126277998907325  |
|   9    | 0.0002601524493353104  | 0.0014126277998907325  |
|   10   | 0.0002341372044017795  | 0.0014126277998907325  |
+--------+------------------------+------------------------+
[10 rows x 3 columns]


Overall RMSE: 0.20282305840852813

Per User RMSE (best)
+------------+-