Recommendation system of the movies for the user id we give based on the input library. The details for the library used and code are provided at the bottom.

In [3]:
import numpy as np
from lightfm import LightFM


In [4]:
from lightfm.datasets import fetch_movielens

In [5]:
#Let's grab the data
data = fetch_movielens(min_rating = 3.5)
#3.5 rating not bad movie!!!

In [6]:
#The rating enables us to collect data only above 3.5 rating and stores it
model = LightFM(loss='warp')
#WARP stands for Weighted Approximate Rank Pairwise
#Its a hybrid system which uses a combination of collabrative and content based recommendation

In [7]:
model.fit(data['train'], epochs=30, num_threads=2)
#epochs is number of runs
#num_threads is number of parallel computations

<lightfm.lightfm.LightFM at 0xb09ee48>

In [9]:
#evaluate the model
from lightfm.evaluation import precision_at_k

In [10]:
test_precision = precision_at_k(model, data['test'], k=3.5).mean()

In [11]:
test_precision

0.099724688

In [18]:
def samp_rec(model,data,user_ids):
    #number of users and movies in training data
    n_users,n_movies = data['train'].shape
    
    for user_id in user_ids:
        #movies they like
        positives = data['item_labels'][data['train'].tocsr()[user_id].indices]
        
        #movies they may like
        score = model.predict(user_id,np.arange(n_movies))
        
        #rank them in order
        top_movies = data['item_labels'][np.argsort(-score)]
        
        #lets print them
        print("User %s" %user_id)
        print("    positives:")
        
        #only top 3
        for x in positives[:3]:
            print("     %s" % x )
            
        print("      Recommended:")
        
        for x in top_movies[:3]:
            print("     %s" % x)
        
        

In [21]:
#let's try printing them
samp_rec(model,data,[18])

User 18
    positives:
     Babe (1995)
     Groundhog Day (1993)
     M*A*S*H (1970)
      Recommended:
     Fargo (1996)
     Star Wars (1977)
     Godfather, The (1972)


In [22]:
samp_rec(model,data,[18,200,45])

User 18
    positives:
     Babe (1995)
     Groundhog Day (1993)
     M*A*S*H (1970)
      Recommended:
     Fargo (1996)
     Star Wars (1977)
     Godfather, The (1972)
User 200
    positives:
     Get Shorty (1995)
     Seven (Se7en) (1995)
     Usual Suspects, The (1995)
      Recommended:
     Fargo (1996)
     Godfather, The (1972)
     L.A. Confidential (1997)
User 45
    positives:
     Twelve Monkeys (1995)
     Star Wars (1977)
     Welcome to the Dollhouse (1995)
      Recommended:
     Contact (1997)
     English Patient, The (1996)
     Air Force One (1997)


In [23]:
print(repr(data['train']))

<943x1682 sparse matrix of type '<type 'numpy.float32'>'
	with 49906 stored elements in COOrdinate format>


The project is based on the lightFM mentioned below:
https://github.com/lyst/lightfm

It also uses parts of code from Siraj Raval!!!