# Data preparation

In [64]:
import pandas as pd
data_train = pd.read_csv('hm_data_croped.csv')

In [65]:
data_prepared = data_train.drop(columns = ['Unnamed: 0','t_dat','price','sales_channel_id'])
data_prepared

Unnamed: 0,customer_id,article_id
0,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,663713001
1,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,541518023
2,00007d2de826758b65a93dd24ce629ed66842531df6699...,505221004
3,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687003
4,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687004
...,...,...
31784,a5e8c04f4d6d255e6f3dcaff6faeb8737ae4f2d4fcca59...,557599010
31785,a5e8c04f4d6d255e6f3dcaff6faeb8737ae4f2d4fcca59...,656345002
31786,a5ea10950843695eee0f436d6ab8f1ca185b1667f02ef7...,661023004
31787,a5ea10950843695eee0f436d6ab8f1ca185b1667f02ef7...,554772002


# Changing indexes from str to int

In [66]:
customer_reindex_dict = {}
for i, user_id in enumerate(data_prepared['customer_id'].unique()):
  customer_reindex_dict[user_id] = i

article_reindex_dict = {}
for j, item_id in enumerate(data_prepared['article_id'].unique()):
  article_reindex_dict[item_id] = j

article_reindex_dict_inverse = {}
for j, item_id in enumerate(data_prepared['article_id'].unique()):
  article_reindex_dict_inverse[j] = item_id

In [67]:
for ind in data_prepared.index:
  data_prepared.at[ind, 'customer_id'] = customer_reindex_dict[data_prepared.loc[ind]['customer_id']]
  data_prepared.at[ind, 'article_id'] = article_reindex_dict[data_prepared.loc[ind]['article_id']]

In [68]:
data_prepared['score'] = 1

In [69]:
data_prepared

Unnamed: 0,customer_id,article_id,score
0,0,0,1
1,0,1,1
2,1,2,1
3,1,3,1
4,1,4,1
...,...,...,...
31784,9174,3969,1
31785,9174,9030,1
31786,9175,9427,1
31787,9175,1763,1


# AlternatingLeastSquares

In [70]:
from scipy.sparse import csr_matrix

In [71]:
m=csr_matrix((data_prepared['score'].values, (data_prepared['article_id'], data_prepared['customer_id'])))

In [72]:
!pip install implicit
from implicit.nearest_neighbours import bm25_weight

# weight the matrix, both to reduce impact of users that have played the same artist thousands of times
# and to reduce the weight given to popular items
items_user_purchase = bm25_weight(m, K1=100, B=0.8)

# get the transpose since the most of the functions in implicit expect (user, item) sparse matrices instead of (item, user)
user_items = items_user_purchase.T.tocsr()



In [73]:
from implicit.als import AlternatingLeastSquares

model = AlternatingLeastSquares(factors=64, regularization=0.05)
model.fit(2 * user_items)

  0%|          | 0/15 [00:00<?, ?it/s]

# Getting test users from submission template and make predictions

In [74]:
data_test = pd.read_csv('hm_test_croped.csv')

for ind in data_test.index:
  data_test.at[ind, 'customer_id'] = customer_reindex_dict[data_test.loc[ind]['customer_id']]

In [75]:
recoms = []
for userid in data_test['customer_id']:

  ids, scores = model.recommend(userid, user_items[userid], N=12, filter_already_liked_items=False)
  recoms.append(ids) 

In [76]:
recoms_reindexed = []
for row in recoms:
  for j,rec in enumerate(row):

    row[j] = article_reindex_dict_inverse[rec]
  recoms_reindexed.append(row)

In [99]:
result_csv = pd.read_csv('hm_test_croped.csv')
result_csv['prediction'] = 0
result_csv = result_csv.drop(columns = ['Unnamed: 0'])
for ind in result_csv.index:
  st = ''
  for rec in recoms_reindexed[ind]:
    st += (str(rec)+' ')
  result_csv.loc[ind, 'prediction'] = st

result_csv

Unnamed: 0,customer_id,prediction
0,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,541518023 628813002 629764003 629764002 594987...
1,00007d2de826758b65a93dd24ce629ed66842531df6699...,505221004 505221001 622678003 539293003 570574...
2,00083cda041544b2fbb0e0d2905ad17da7cf1007526fb4...,571092001 680604003 501323011 634196001 662084...
3,0008968c0d451dbc5a9968da03196fe20051965edde741...,529841001 584510003 692401001 589004004 556527...
4,000aa7f0dc06cd7174389e76c9e132a67860c5f65f9706...,640639001 680912006 553139001 664421002 377277...
...,...,...
9171,a5e24a18bbc09c4ad910dd0a2caebec5e8909edac8ab82...,608069005 541286003 198518010 577006002 608069...
9172,a5e4601001fce6b750213e166e641d46ec4b92a75e62aa...,630313005 665235001 647462001 696447003 707664...
9173,a5e68630346b8f766c69b9c6894bc17a36d05f951588b2...,532578020 574117006 640176002 619148002 337991...
9174,a5e8c04f4d6d255e6f3dcaff6faeb8737ae4f2d4fcca59...,654046002 656345002 664133003 638416003 639908...


# LogisticMatrixFactorization

In [100]:
from implicit.lmf import LogisticMatrixFactorization

model = LogisticMatrixFactorization(factors=64, regularization=0.05)
model.fit(2 * user_items)

  0%|          | 0/30 [00:00<?, ?it/s]

In [101]:
recoms = []
for userid in data_test['customer_id']:

  ids, scores = model.recommend(userid, user_items[userid], N=12, filter_already_liked_items=False)
  recoms.append(ids) 

In [102]:
recoms_reindexed = []
for row in recoms:
  for j,rec in enumerate(row):

    row[j] = article_reindex_dict_inverse[rec]
  recoms_reindexed.append(row)

In [103]:
result_csv2 = pd.read_csv('hm_test_croped.csv')
result_csv2['prediction'] = 0
result_csv2 = result_csv2.drop(columns = ['Unnamed: 0'])
for ind in result_csv2.index:
  st = ''
  for rec in recoms_reindexed[ind]:
    st += (str(rec)+' ')
  result_csv2.loc[ind, 'prediction'] = st

result_csv2

Unnamed: 0,customer_id,prediction
0,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,400285006 663713001 491913001 541518023 567479...
1,00007d2de826758b65a93dd24ce629ed66842531df6699...,505221001 505221004 524825010 685687003 562245...
2,00083cda041544b2fbb0e0d2905ad17da7cf1007526fb4...,688873020 500133022 598859003 688873012 688873...
3,0008968c0d451dbc5a9968da03196fe20051965edde741...,531310002 529841001 613456009 574109014 673677...
4,000aa7f0dc06cd7174389e76c9e132a67860c5f65f9706...,640639001 631848002 564311002 664421002 685687...
...,...,...
9171,a5e24a18bbc09c4ad910dd0a2caebec5e8909edac8ab82...,608069010 685687002 529008026 589222005 573085...
9172,a5e4601001fce6b750213e166e641d46ec4b92a75e62aa...,696447003 665235001 399136027 647462001 658298...
9173,a5e68630346b8f766c69b9c6894bc17a36d05f951588b2...,574117006 619148002 685687004 573085001 625819...
9174,a5e8c04f4d6d255e6f3dcaff6faeb8737ae4f2d4fcca59...,639965001 655446001 638416003 580000002 581781...
