In [1]:
import pandas as pd
import numpy as np
from lightfm import LightFM
from scipy import sparse
import warnings
warnings.filterwarnings('ignore')



In [2]:
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

In [3]:
data = movies.join(ratings.set_index('movieId'), on='movieId').reset_index(drop=True)
data.dropna(inplace=True)

data.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1.0,4.0,964982700.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5.0,4.0,847435000.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7.0,4.5,1106636000.0
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15.0,2.5,1510578000.0
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17.0,4.5,1305696000.0


In [4]:
def interaction_matrix(df, user_col, item_col, rating_col, norm= False, threshold = None):
    interactions = df.groupby([user_col, item_col])[rating_col] \
            .sum().unstack().reset_index(). \
            fillna(0).set_index(user_col)
    if norm:
        interactions = interactions.applymap(lambda x: 1 if x > threshold else 0)
    return interactions

In [5]:
def func_user_dict(interactions):
    user_id = list(interactions.index)
    user_dict = {}
    counter = 0 
    for i in user_id:
        user_dict[i] = counter
        counter += 1
    return user_dict

In [6]:
def func_item_dict(df, id_col, name_col):
    item_dict ={}
    for i in range(df.shape[0]):
        item_dict[(df.loc[i,id_col])] = df.loc[i,name_col]
    return item_dict

In [7]:
def func_model(interactions, n_components=30, loss='warp', k=15, epoch=30):
    x = sparse.csr_matrix(interactions.values)
    model = LightFM(no_components= n_components, loss=loss,k=k)
    model.fit(x, epochs=epoch)
    return model

In [8]:
def recommendation_for_user(model, interactions, user_id, user_dict, 
                               item_dict, threshold = 0, nrec_items = 10, show = True):
    n_users, n_items = interactions.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items)))
    scores.index = interactions.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    known_items = list(pd.Series(interactions.loc[user_id,:] \
                                 [interactions.loc[user_id,:] > threshold].index) \
                       .sort_values(ascending=False))

    scores = [x for x in scores if x not in known_items]
    return_score_list = scores[0:nrec_items]
    known_items = list(pd.Series(known_items).apply(lambda x: item_dict[x]))
    scores = list(pd.Series(return_score_list).apply(lambda x: item_dict[x]))
    if show == True:
        print('You like:')
        counter = 1
        for i in known_items[:10]:
            print(str(counter) + '- ' + i)
            counter+=1

        print('\nRecommendations:')
        counter = 1
        for i in scores:
            print(str(counter) + '- ' + i)
            counter+=1
    return return_score_list

In [9]:
interactions = interaction_matrix(df = data, user_col = 'userId', 
                                         item_col = 'movieId', rating_col = 'rating')

interactions.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.0,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
user_dict = func_user_dict(interactions=interactions)
movies_dict = func_item_dict(df = movies, id_col = 'movieId', name_col = 'title')

In [11]:
model = func_model(interactions = interactions, n_components = 30, loss = 'warp', epoch = 30)

In [12]:
USER_ID = 2

In [13]:
rec_list = recommendation_for_user(model = model, interactions = interactions, user_id = USER_ID, 
                                      user_dict = user_dict, item_dict = movies_dict, threshold = 4,
                                      nrec_items = 10, show = True)

You like:
1- The Jinx: The Life and Deaths of Robert Durst (2015)
2- Mad Max: Fury Road (2015)
3- Wolf of Wall Street, The (2013)
4- Warrior (2011)
5- Inside Job (2010)
6- Town, The (2010)
7- Inglourious Basterds (2009)
8- Step Brothers (2008)
9- Dark Knight, The (2008)
10- Good Will Hunting (1997)

Recommendations:
1- Inception (2010)
2- Django Unchained (2012)
3- Interstellar (2014)
4- Dark Knight Rises, The (2012)
5- Shutter Island (2010)
6- Fight Club (1999)
7- Hangover, The (2009)
8- Shawshank Redemption, The (1994)
9- Up (2009)
10- Avengers, The (2012)
