In [1]:
import pandas as pd
import numpy as np
from lightfm.data import Dataset
from lightfm import LightFM
from ast import literal_eval
import itertools
from lightfm.evaluation import auc_score

In [2]:
users = {'user_id': [0, 1, 2, 3], 'name': ['Alice', 'Bob', 'Carol', 'Dave']}
users = pd.DataFrame(data=users)
users

Unnamed: 0,user_id,name
0,0,Alice
1,1,Bob
2,2,Carol
3,3,Dave


In [3]:
movies = {'movie_id': [0, 1, 2, 3, 4], 
          'name': ['Love at last', 'Romance forever', 'Cute puppies of love', 'Nonstop car chases', 'Swords vs. karate'],
          'romance': [0.9, 1.0, 0.99, 0.1, 0], 
          'action': [0, 0.01, 0, 1.0, 0.9]}
movies = pd.DataFrame(data=movies)
movies

Unnamed: 0,movie_id,name,romance,action
0,0,Love at last,0.9,0.0
1,1,Romance forever,1.0,0.01
2,2,Cute puppies of love,0.99,0.0
3,3,Nonstop car chases,0.1,1.0
4,4,Swords vs. karate,0.0,0.9


In [4]:
rating = {'user_id':  [0,0,0,0,0,  1,1,1,1,1,  2,2,2,2,2,  3,3,3,3,3],
          'movie_id': [0,1,2,3,4,  0,1,2,3,4,  0,1,2,3,4,  0,1,2,3,4],
          'rating':   [5,5,np.nan,0,0, 5,np.nan,4,0,0, 0,np.nan,0,5,5, 0,0,np.nan,4,np.nan]}
rating = pd.DataFrame(data=rating)
rating = rating.dropna()
rating

Unnamed: 0,user_id,movie_id,rating
0,0,0,5.0
1,0,1,5.0
3,0,3,0.0
4,0,4,0.0
5,1,0,5.0
7,1,2,4.0
8,1,3,0.0
9,1,4,0.0
10,2,0,0.0
12,2,2,0.0


###### Create Dataset

In [5]:
from lightfm.data import Dataset
dataset = Dataset()
dataset.fit(rating['user_id'].unique(), rating['movie_id'].unique())

num_users, num_items = dataset.interactions_shape()
print('Num users: {}, num_items: {}.'.format(num_users, num_items))

Num users: 4, num_items: 5.


In [6]:
item_fs = []
col_names = ['romance']*len(movies.romance.unique()) + ['action']*len(movies.action.unique())
values = list(movies.romance.unique()) + list(movies.action.unique())
for n,v in zip(col_names, values):
    res = str(n)+ ":" +str(v)
    item_fs.append(res)
    
print(item_fs)
    
dataset.fit_partial(items=rating['movie_id'].unique(), item_features=item_fs)

['romance:0.9', 'romance:1.0', 'romance:0.99', 'romance:0.1', 'romance:0.0', 'action:0.0', 'action:0.01', 'action:1.0', 'action:0.9']


#### Building the interactions matrix

In [7]:
(interactions, weights) = dataset.build_interactions(((r['user_id'], r['movie_id'], r['rating']) for i,r in rating.iterrows()))
print(repr(interactions))

interactions.todense()

<4x5 sparse matrix of type '<class 'numpy.int32'>'
	with 15 stored elements in COOrdinate format>


matrix([[1, 1, 1, 1, 0],
        [1, 0, 1, 1, 1],
        [1, 0, 1, 1, 1],
        [1, 1, 1, 0, 0]], dtype=int32)

In [8]:
weights.todense()

matrix([[5., 5., 0., 0., 0.],
        [5., 0., 0., 0., 4.],
        [0., 0., 5., 5., 0.],
        [0., 0., 4., 0., 0.]], dtype=float32)

In [9]:
f_tuple = [(x['movie_id'], ['romance:'+str(x['romance']), 'action:'+str(x['action'])]) for i, x in movies.iterrows()]

item_features = dataset.build_item_features(f_tuple)
print(repr(item_features))

item_features.todense()

<5x14 sparse matrix of type '<class 'numpy.float32'>'
	with 15 stored elements in Compressed Sparse Row format>


matrix([[0.33333334, 0.        , 0.        , 0.        , 0.        ,
         0.33333334, 0.        , 0.        , 0.        , 0.        ,
         0.33333334, 0.        , 0.        , 0.        ],
        [0.        , 0.33333334, 0.        , 0.        , 0.        ,
         0.        , 0.33333334, 0.        , 0.        , 0.        ,
         0.        , 0.33333334, 0.        , 0.        ],
        [0.        , 0.        , 0.33333334, 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.33333334, 0.        ,
         0.        , 0.        , 0.33333334, 0.        ],
        [0.        , 0.        , 0.        , 0.33333334, 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.33333334,
         0.        , 0.        , 0.        , 0.33333334],
        [0.        , 0.        , 0.        , 0.        , 0.33333334,
         0.        , 0.        , 0.33333334, 0.        , 0.        ,
         0.33333334, 0.        , 0.        , 0.        ]], dtype=float32)

In [10]:
user_id_map, user_feature_map, item_id_map, item_feature_map = dataset.mapping()
item_feature_map

{0: 0,
 1: 1,
 3: 2,
 4: 3,
 2: 4,
 'romance:0.9': 5,
 'romance:1.0': 6,
 'romance:0.99': 7,
 'romance:0.1': 8,
 'romance:0.0': 9,
 'action:0.0': 10,
 'action:0.01': 11,
 'action:1.0': 12,
 'action:0.9': 13}

In [11]:
model = LightFM(loss='warp')
model.fit(interactions,
      item_features=item_features,
      epochs=10)

train_auc = auc_score(model,
                      interactions,
                      item_features=item_features
                     ).mean()
print('Hybrid training set AUC: %s' % train_auc)

Hybrid training set AUC: 0.8541667


##### predict for existing user

In [12]:
movie_names = movies['name'].values

def sample_recommendation(model, data, user_id):
    n_user, n_items = data.shape
    scores = model.predict(user_id, np.arange(n_items))
    print("Scores:")
    for i in scores:
        print(i)
    top_items_for_user = movie_names[np.argsort(-scores)]
    print("Recommended item: %s" % top_items_for_user[:2])
    
sample_recommendation(model, interactions, 2)

Scores:
-0.07903245
-0.6121537
-0.1378657
-0.25254738
-0.4651792
Recommended item: ['Love at last' 'Cute puppies of love']


In [13]:
sample_recommendation(model, interactions, 3)

Scores:
-0.11251749
-0.640653
-0.1696778
-0.29636925
-0.5068698
Recommended item: ['Love at last' 'Cute puppies of love']
