Python imports

In [None]:
import implicit
import pandas as pd
from scipy import sparse
import numpy
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score
from lightfm.cross_validation import random_train_test_split
from sklearn.externals import joblib
import operator

Loading product data

In [None]:
%time productdf = pd.read_csv('artikel_export.csv')
productdf = productdf[productdf.mandant==2]
productdf.head()

Read CSV File. It has a header with customerid,productid,amount

In [None]:
%time df = pd.read_csv('orders.csv', usecols = ['customerid','productid'], dtype = {'customerid':str, 'productid':str})
df.head()

Create a new LightFM Dataset containing the customer and product interactions

In [None]:
uniqueCustomer = df['customerid'].unique()
uniqueProducts = productdf['artikel_id'].unique()

dataset = Dataset()
%time dataset.fit(uniqueCustomer,uniqueProducts)
%time dataset.fit_partial(items = df['productid'].unique(), item_features = ['i_spannung_nervenkitzel','i_liebe_romantik','i_fantasy_sciencefiction','i_fremdsprachiges_anderelaender','i_reiseinvergangenezeiten','i_wissensdurst','i_schoenegeschichten','i_haus_garten','i_hobby_freizeit','i_humorvolles','i_spiel_spass','ag_bis12monate', 'ag_bis12monate', 'ag_1bis3','ag_4bis7','ag_8bis11','ag_ab12'])

def toitemfeatures(row):
    result = []
    if (row.i_spannung_nervenkitzel != 'f'):
        result.append('i_spannung_nervenkitzel')
    if (row.i_liebe_romantik != 'f'):
        result.append('i_liebe_romantik')
    if (row.i_fantasy_sciencefiction != 'f'):
        result.append('i_fantasy_sciencefiction')
    if (row.i_fremdsprachiges_anderelaender != 'f'):        
        result.append('i_fremdsprachiges_anderelaender')
    if (row.i_reiseinvergangenezeiten != 'f'):        
        result.append('i_reiseinvergangenezeiten')
    if (row.i_wissensdurst != 'f'):        
        result.append('i_wissensdurst')
    if (row.i_schoenegeschichten != 'f'):        
        result.append('i_schoenegeschichten')
    if (row.i_haus_garten != 'f'):        
        result.append('i_haus_garten')
    if (row.i_hobby_freizeit != 'f'):        
        result.append('i_hobby_freizeit')
    if (row.i_humorvolles != 'f'):        
        result.append('i_humorvolles')
    if (row.i_spiel_spass != 'f'):        
        result.append('i_spiel_spass')
    return result

%time item_features = dataset.build_item_features((row.artikel_id, toitemfeatures(row)) for index, row in productdf.iterrows())

%time (interactions, weights) = dataset.build_interactions((row.customerid, row.productid) for index, row in df.iterrows())

print(repr(interactions))
    
print('Total number of unique customers         : %.0f' % len(uniqueCustomer))
print('Total number of unique products overall  : %.0f' % len(uniqueProducts))
print('Total number of unique products orders   : %.0f' % len(df['productid'].unique()))


Creating train and test split model

In [None]:
(train,test) = random_train_test_split(interactions, test_percentage=0.2)

Train the Implicit model

In [None]:
print('Training Implicit model')
implicitmodel = implicit.als.AlternatingLeastSquares(factors=32,use_gpu=False,num_threads=0,use_native=True)
%time implicitmodel.fit(train)

Train the LightFM model

In [None]:
print('Training LightFM model')
model = LightFM(loss='warp', no_components=32)
%time model.fit(train, item_features=item_features, epochs=100, num_threads=8)

In [None]:
joblib.dump(model, 'lightfmmodel.sav')
print('Model saved');

Evaluate the model

In [None]:
print('Evaluating model')

aucscore = auc_score(model, interactions, num_threads=6, item_features=item_features).mean()
print('AUC Score:          %.2f.' % (aucscore))

aucscore_train = auc_score(model, train, num_threads=6, item_features=item_features).mean()
print('AUC Score train:    %.2f.' % (aucscore_train))

train_precision = precision_at_k(model, train, k=5, num_threads=6, item_features=item_features).mean()
test_precision = precision_at_k(model, test, k=5, num_threads=6,item_features=item_features).mean()
print('Precision@k5: train %.2f.' % (train_precision))
print('Precision@k5: test  %.2f.' % (test_precision))

Sample predictions

In [None]:
def sample_recommendation(model, data, user_ids):

    user2idx, unused1, item2idx, unused2 = dataset.mapping()
    
    item2idx_inv = dict(map(reversed, item2idx.items()))    

    n_users, n_items = data.shape

    for user_id in user_ids:
        
        print('Recommentations for user %s' % user_id)
        
        scores = model.predict(user2idx[user_id], numpy.arange(n_items))
        
        comp = []
        
        for i in range(n_items):
            itemid = item2idx_inv[i]
            score = scores[i]
            comp.append((itemid,score))
        
        comp.sort(key = operator.itemgetter(1), reverse = True)    
        
        for i in range(10):
            (itemid,score) = comp[i]
            print('%s\t%2f' % (itemid, score))
    
sample_recommendation(model, interactions, ['31460292'])