In [1]:
import implicit
import pandas as pd
import numpy
from scipy import sparse

In [2]:
%time df = pd.read_csv('orders_last_year.csv', sep='\t', usecols = ['kundennummer','artikelnummer','werkid','autorid','autor','mediumid','medium'], dtype = {'werkid':"category", 'autorid':"category", 'kundennummer':"category", 'artikelnummer':"category"})

CPU times: user 1min 16s, sys: 2.2 s, total: 1min 18s
Wall time: 1min 19s


In [3]:
%time grouped = df.groupby(['kundennummer','autorid']).size().reset_index(name = 'amount')

grouped.head()

CPU times: user 3.81 s, sys: 654 ms, total: 4.46 s
Wall time: 4.52 s


Unnamed: 0,kundennummer,autorid,amount
0,7700160454,10990216,1
1,7700160454,11387441,1
2,7700160454,12058762,2
3,7700160454,137510,4
4,7700160454,4354809,1


In [4]:
rowdata = grouped['autorid'].cat.codes
coldata = grouped['kundennummer'].cat.codes
data = grouped['amount']

orders = sparse.coo_matrix((data, (rowdata, coldata)))
print(repr(orders))

<198112x2415136 sparse matrix of type '<class 'numpy.int64'>'
	with 7251609 stored elements in COOrdinate format>


In [5]:
model = implicit.als.AlternatingLeastSquares(factors=32)
%time model.fit(orders)

100%|██████████| 15.0/15 [02:05<00:00,  7.89s/it]

CPU times: user 5min 46s, sys: 6min 6s, total: 11min 52s
Wall time: 2min 7s





In [8]:
orders_transposed = orders.T.tocsr()

userid = grouped['kundennummer'].cat.categories.get_loc('7703072433')

# 10009571
# 36980482

recommendations = model.recommend(userid, orders_transposed, N=10, filter_already_liked_items=False)

usercategories = grouped['kundennummer'].cat;
itemcategories = grouped['autorid'].cat;

print('Reco items')
for itemid,score in recommendations:
    print("%s / %s" % (itemcategories.categories[itemcategories.codes[itemid]], score))
    
print('Similar users')    
for uid,score in model.similar_users(userid): 
    print("%s / %s" % (usercategories.categories[usercategories.codes[uid]], score))

Reco items
9423657 / 0.14432508
667296 / 0.11732496
337927 / 0.096793205
4933844 / 0.093283
14683409 / 0.08835985
15338320 / 0.08754596
1173422 / 0.08530168
156684 / 0.08435494
13145224 / 0.08259083
5705283 / 0.081578285
Similar users
7703301690 / 0.04869334
7705256524 / 0.048227716
7705373600 / 0.048032787
7714575354 / 0.047891755
7705188890 / 0.04789023
7701201851 / 0.04780662
7705201212 / 0.04779697
7705269790 / 0.04776013
7705655021 / 0.04775347
7702977461 / 0.047731023
