In [22]:
import numpy as np
from lightfm.datasets import fetch_movielens
from lightfm import LightFM

In [23]:
#fetchhing the dataset
data = fetch_movielens(min_rating=4.0)

In [24]:
print(data)

{'train': <943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format>, 'test': <943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format>, 'item_features': <1682x1682 sparse matrix of type '<class 'numpy.float32'>'
	with 1682 stored elements in Compressed Sparse Row format>, 'item_feature_labels': array(['Toy Story (1995)', 'GoldenEye (1995)', 'Four Rooms (1995)', ...,
       'Sliding Doors (1998)', 'You So Crazy (1994)',
       'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object), 'item_labels': array(['Toy Story (1995)', 'GoldenEye (1995)', 'Four Rooms (1995)', ...,
       'Sliding Doors (1998)', 'You So Crazy (1994)',
       'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object)}


In [25]:
#fetch_movielens method splits our dataset into training and testing data
print(repr(data['train']))
print(repr(data['test']))
#print(data['train'])

<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format>
<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format>


In [26]:
#hence we see that our training data
#stores 10 times more item than our testing data

In [27]:
#create model with loss function warp
#this helps us create a hybrid model
model = LightFM(loss='warp')

In [28]:
#now we shall train our model
model.fit(data['train'],epochs=30,num_threads=2)
#number of threads just tells the number of
#parallel computations we want

<lightfm.lightfm.LightFM at 0x7ff113307a90>

In [29]:
#having fit the model, we'll generate recommendations
#from it
print(data['item_labels'])

['Toy Story (1995)' 'GoldenEye (1995)' 'Four Rooms (1995)' ...
 'Sliding Doors (1998)' 'You So Crazy (1994)'
 'Scream of Stone (Schrei aus Stein) (1991)']


In [30]:
def sample_rec(model, data, user_ids):
    n_users, n_items = data['train'].shape
    for user_id in user_ids:
        known_positives = data['item_labels'][data['train'].tocsr()                                    
                          [user_id].indices]
        
        scores = model.predict(user_id, np.arange(n_items))

        top_items = data['item_labels'][np.argsort(-scores)]

        print("User %s" % user_id)
        print("     Known positives:")
        
        for x in known_positives[:3]:
            print("        %s" % x)
        
        print("     Recommended:")
        
        for x in top_items[:3]:
            print("        %s" % x)

In [31]:
sample_rec(model, data, [4, 20, 390])

User 4
     Known positives:
        Rumble in the Bronx (1995)
        Batman Forever (1995)
        To Wong Foo, Thanks for Everything! Julie Newmar (1995)
     Recommended:
        Empire Strikes Back, The (1980)
        Raiders of the Lost Ark (1981)
        Princess Bride, The (1987)
User 20
     Known positives:
        Toy Story (1995)
        Twelve Monkeys (1995)
        Dead Man Walking (1995)
     Recommended:
        Scream (1996)
        Godfather, The (1972)
        Heat (1995)
User 390
     Known positives:
        Dead Man Walking (1995)
        Usual Suspects, The (1995)
        Braveheart (1995)
     Recommended:
        One Flew Over the Cuckoo's Nest (1975)
        Fargo (1996)
        Silence of the Lambs, The (1991)
