Python imports

In [1]:
import pandas as pd
from scipy import sparse
import numpy
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score
from lightfm.cross_validation import random_train_test_split
from sklearn.externals import joblib

Read CSV File. It has a header with customerid,productid,amount

In [2]:
df = pd.read_csv('interactions.csv', usecols = ['customerid','productid'], dtype = {'customerid':str, 'productid':str})

Create a new LightFM Dataset containing the customer and product interactions

In [None]:
uniqueCustomer = df['customerid'].unique()
uniqueProducts = df['productid'].unique()

dataset = Dataset()
dataset.fit(uniqueCustomer,uniqueProducts)

(interactions, weights) = dataset.build_interactions((row.customerid, row.productid)
                                                      for index, row in df.iterrows())

print(repr(interactions))

    
print('Total number of unique customers : %.0f' % len(uniqueCustomer))
print('Total number of unique products  : %.0f' % len(uniqueProducts))


Train the model

In [None]:
print('Training model')
(train,test) = random_train_test_split(interactions, test_percentage=0.2)

model = LightFM(loss='warp', no_components=15)
%time model.fit(train, epochs=100, num_threads=8)

In [None]:
joblib.dump(model, 'lightfmmodel.sav')
print('Model saved');

Evaluate the model

In [None]:
print('Evaluating model')

aucscore = auc_score(model, interactions, num_threads=8).mean()
print('AUC Score:          %.2f.' % (aucscore))

aucscore_train = auc_score(model, train, num_threads=8).mean()
print('AUC Score train:    %.2f.' % (aucscore_train))

train_precision = precision_at_k(model, train, k=5, num_threads=8).mean()
test_precision = precision_at_k(model, test, k=5, num_threads=8).mean()
print('Precision@k5: train %.2f.' % (train_precision))
print('Precision@k5: test  %.2f.' % (test_precision))