In [1]:
import implicit
import pandas as pd
import numpy
from scipy import sparse
from sklearn.externals import joblib

In [2]:
%time df = pd.read_csv('interactions_nonull.csv', usecols = ['customerid','productid','amount'], dtype = {'customerid':"category", 'productid':"category", "amount":int})

CPU times: user 26.2 s, sys: 3.28 s, total: 29.5 s
Wall time: 30 s


In [3]:
df['amount'].replace(1,5)
df['amount'].replace(0,1)

%time grouped = df.groupby(['customerid','productid'])['amount'].sum().reset_index()

CPU times: user 3.58 s, sys: 2.05 s, total: 5.62 s
Wall time: 5.68 s


In [4]:
rowdata = grouped['productid'].cat.codes
coldata = grouped['customerid'].cat.codes
data = grouped['amount']

orders = sparse.coo_matrix((data, (rowdata, coldata)))
print(repr(orders))

<845208x995441 sparse matrix of type '<class 'numpy.int64'>'
	with 6320265 stored elements in COOrdinate format>


In [5]:
model = implicit.als.AlternatingLeastSquares(factors=32)
%time model.fit(orders)

100%|██████████| 15.0/15 [01:07<00:00,  3.60s/it]

CPU times: user 2min 8s, sys: 1min 30s, total: 3min 38s
Wall time: 1min 8s





In [6]:
orders_transposed = orders.T.tocsr()
userid = grouped['customerid'].cat.categories.get_loc('31460292')

recommendations = model.recommend(userid, orders_transposed, N=10, filter_already_liked_items=True)

usercategories = grouped['customerid'].cat;
itemcategories = grouped['productid'].cat;

print('Reco items')
for itemid,score in recommendations:
    print("%s / %s" % (itemcategories.categories[itemcategories.codes[itemid]], score))
    
print('Similar users')    
for uid,score in model.similar_users(userid): 
    print("%s / %s" % (usercategories.categories[usercategories.codes[uid]], score))

Reco items
48030112 / 2.8293478e-13
95444148 / 1.9978442e-13
64514473 / 1.8362232e-13
87300068 / 1.7123965e-13
142012876 / 1.5539103e-13
42378891 / 1.4982026e-13
91569175 / 1.3870671e-13
73764965 / 1.2925576e-13
115538429 / 1.2534407e-13
62292650 / 1.2519453e-13
Similar users
30663067 / 5.822139e-14
31242483 / 5.821853e-14
17282934 / 5.8218275e-14
34680414 / 5.821699e-14
36579283 / 5.821624e-14
31347510 / 5.821587e-14
36787828 / 5.8214684e-14
34245808 / 5.8214216e-14
34307914 / 5.821364e-14
30715132 / 5.821342e-14
