In [22]:
import numpy as np
import pandas as pd
import matplotlib
# Force matplotlib to not use any Xwindows backend.
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import math
import cPickle
import os
import json
import pickle
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k
%matplotlib inline
import random
random.seed(0)
# Force matplotlib to not use any Xwindows backend.

In [23]:
# Load the MovieLens 100k dataset. Only five
# star ratings are treated as positive.
data = fetch_movielens(min_rating=4.0)
print(repr(data['train']))
print(repr(data['test']))

<943x1682 sparse matrix of type '<type 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format>
<943x1682 sparse matrix of type '<type 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format>


In [25]:
# Instantiate and train the model
model = LightFM(loss='warp')
%time model.fit(data['train'], epochs=40, num_threads=2)

# Evaluate the trained model
train_precision = precision_at_k(model, data['train'], k=5).mean()
test_precision = precision_at_k(model, data['test'], k=5).mean()
print test_precision

# save the classifier
stats = {"train_precision": str(train_precision),"test_precision":str(test_precision)}
print stats
model_filename = os.path.join(os.environ['OUTPUT_DIR'],'model.dat')
pickle.dump(model, open(model_filename, 'wb'))
stats_filename = os.path.join(os.environ['OUTPUT_DIR'],'stats.json')
with open(stats_filename, 'wb') as f:
    f.write(json.dumps(stats))


CPU times: user 2.24 s, sys: 4 ms, total: 2.25 s
Wall time: 1.14 s
0.0835118
{'train_precision': '0.573036', 'test_precision': '0.0835118'}


In [21]:
def sample_recommendation(model, data, user_ids):


    n_users, n_items = data['train'].shape

    for user_id in user_ids:
        known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices]

        scores = model.predict(user_id, np.arange(n_items))
        top_items = data['item_labels'][np.argsort(-scores)]

        print("User %s" % user_id)
        print("     Known positives:")

        for x in known_positives[:3]:
            print("        %s" % x)

        print("     Recommended:")

        for x in top_items[:3]:
            print("        %s" % x)

sample_recommendation(model, data, [3, 25, 450])

User 3
     Known positives:
        Seven (Se7en) (1995)
        Contact (1997)
        Starship Troopers (1997)
     Recommended:
        Scream (1996)
        Starship Troopers (1997)
        Contact (1997)
User 25
     Known positives:
        Dead Man Walking (1995)
        Star Wars (1977)
        Fargo (1996)
     Recommended:
        English Patient, The (1996)
        Fargo (1996)
        Contact (1997)
User 450
     Known positives:
        Contact (1997)
        George of the Jungle (1997)
        Event Horizon (1997)
     Recommended:
        Scream (1996)
        Kiss the Girls (1997)
        I Know What You Did Last Summer (1997)
