In [2]:
import pandas as pd
from sklearn.decomposition import NMF
import numpy as np
import pickle

In [3]:
# import clean dataset
df = pd.read_csv('./data/ml-latest-small/dev_ds_ratings_names_uniqueids.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,userId,movieId_x,rating,timestamp,movieId_unique,movieId_y,title,genres
0,0,1,1,4.0,964982703,1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,1,3,4.0,964981247,3,3,Grumpier Old Men (1995),Comedy|Romance
2,2,1,6,4.0,964982224,6,6,Heat (1995),Action|Crime|Thriller
3,3,1,47,5.0,964983815,47,47,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,4,1,50,5.0,964982931,50,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller


In [4]:
# import model
# load the model from disk
m = pickle.load(open('NMF_model.sav', 'rb'))

In [5]:
# To map movie names to movie ID’s, construct a {name: id} dictionary
title_dict = dict(zip(df.title, df.movieId_unique))
len(title_dict)

9719

#### Create user input

In [7]:
title_list = df['title'].to_list()

In [10]:
# vals = []
# user_mov_1 = input('Enter a movie that you liked:')
# user_mov_2 = input('Enter a 2nd movie that you liked:')
# user_mov_3 = input('Enter a 3rd movie that you liked:')

# vals = [user_mov_1, user_mov_2, user_mov_3]


userinput_titles = ['Titanic', 'Toy Story', 'Star Wars']

from fuzzywuzzy import process

choices = []
for name in userinput:
    title_list = df['title'].to_list()
    selection = process.extractOne(name, title_list)
    choices.append(selection[0])

### use fuzzywuzzy to allow not exact movie title input from user
#from fuzzywuzzy import process
#choices = df['title'].to_list()
#process.extractOne("Toy Story", choices)

In [11]:
choices

['Titanic (1997)',
 'Toy Story (1995)',
 'Star Wars: Episode IV - A New Hope (1977)']

In [13]:
keys = [title_dict.get(key) for key in choices]
keys, choices

([1721, 1, 260],
 ['Titanic (1997)',
  'Toy Story (1995)',
  'Star Wars: Episode IV - A New Hope (1977)'])

In [35]:
userinput_ratings = [5, 3, 2]

In [36]:
# create ratings_list
#ratings_5 = [5 for y in keys]
# create small dict
d_fill = dict(zip(keys, userinput_ratings))
d_fill

{1721: 5, 1: 3, 260: 2}

In [15]:
### transform user input to user movie vector (for model input)

In [7]:
# create vector for user with zeros (missing values = 0 in this case)
dict_new_user = (dict.fromkeys(df.movieId_unique,0))

NameError: name 'df' is not defined

In [49]:
# update the values with the rated movie ids 
dict_new_user.update(d_fill)

# look at the first entries
first5pairs = {k: dict_new_user[k] for k in list(dict_new_user)[:6]}
first5pairs

{1: 3, 3: 0, 6: 0, 47: 0, 50: 0, 70: 0}

In [43]:
# transform into an array/ vector of the values
user_arr = (np.array(list(dict_new_user.values()))).reshape(1,9719)
# reshape
#user_arr = user_arr.reshape(1,9719)
user_arr

array([[5, 0, 0, ..., 0, 0, 0]])

### Making a prediction based on new user input

Use the NMF model to produce recommendations for one user.

- To map movie names to movie ID’s, construct a {name: id} dictionary
- To deal with small differences in the names, the fuzzywuzzy package is quite useful
- Create an vector of three movies the user likes. Set these to 5 and all others to zero.

#### Prediction

In [22]:
Q = m.components_

In [23]:
#Prediction step 1 - generate extra a user_P
user_P = m.transform(user_arr)
user_P

array([[0.        , 0.        , 0.        , 0.        , 0.00067616,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.00523679, 0.01188896,
        0.        , 0.        , 0.        , 0.        , 0.        ]])

In [37]:
#new user R - reconstruct R but for this new user only
# (column of first matrix = row of second matrix)
# get rid of one dimension of the array
user_R = (np.dot(user_P,Q))[0]
user_R

array([0.02116956, 0.00881316, 0.00496184, ..., 0.00125942, 0.00125942,
       0.00118928])

In [27]:
# get the corresponding array of movie titles
df.title.values

array(['Toy Story (1995)', 'Grumpier Old Men (1995)', 'Heat (1995)', ...,
       'Get Out (2017)', 'Logan (2017)', 'The Fate of the Furious (2017)'],
      dtype=object)

In [41]:
# zip into tuples of rating and film title 
# remove the first three ones (the films that the user has already seen):
rec = (list(zip(user_R,df.title.values)))[3:]

In [33]:
sorted_top_3_rec = sorted(rec, key = lambda x: x[0], reverse=True)[:3]
sorted_top_3_rec

[(0.060925453237519196, 'Exit to Eden (1994)'),
 (0.058325588824099654, 'Ghost World (2001)'),
 (0.05747658047827649, 'Payback (1999)')]

In [34]:
# list comprehension to get the top 5 film titles for the current user
[x[1] for x in sorted_top_3_rec]

['Exit to Eden (1994)', 'Ghost World (2001)', 'Payback (1999)']

---

### Function:

In [8]:
import pandas as pd
from sklearn.decomposition import NMF
import numpy as np
from fuzzywuzzy import process
import pickle



In [11]:
def get_NMF_recommendations(title1, title2, title3, rat1, rat2, rat3):
    
    """ Function that outputs 3 movie recommendations based on the user input 
    of 3 films that they have watched and the user's rating of these films"""
    
    # load df containing the movie names 
    df = pd.read_csv('./data/ml-latest-small/dev_ds_ratings_names_uniqueids.csv')
    
    # load pretrained model from disk
    m = pickle.load(open('NMF_model.sav', 'rb'))
    
    # construct title dict 
    title_dict = dict(zip(df.title, df.movieId_unique))
    
    # get title list
    title_list = df['title'].to_list()

    # USER INPUT PROCESSING:
    choices = []
    for title_fuzz in [title1, title2, title3]:
        selection = process.extractOne(title_fuzz, title_list)
        choices.append(selection[0])
    
    keys = [title_dict.get(key) for key in choices]
    
    # create ratings dict
    d_fill = dict(zip(keys, [rat1, rat2, rat3]))
    
    # create dictionary for user
    dict_new_user = dict.fromkeys(df.movieId_unique,0)
    dict_new_user.update(d_fill)
    
    # transform into an array/ vector of the values and reshape
    user_arr = (np.array(list(dict_new_user.values()))).reshape(1,9719)
        
    #generate user_profile via nmf.transform(user_array)
    user_P = m.transform(user_arr)
    
    
    # PREDICTION:
    Q = m.components_
    #constract ratings matrix for this user
    # np.dot(user_profile, nmf.components_)
    user_R = (np.dot(user_P,Q))[0]
    
    # zip into tuples of rating and film title 
    # remove the first three ones (the films that the user has already seen):
    recs = list(zip(user_R,df.title.values))
    
    # remove the first three ones (the films that the user has already seen):
    rec = (list(zip(user_R,df.title.values)))[3:]
    
    # sort by rating
    sorted_top_3_rec = sorted(rec, key = lambda x: x[0], reverse=True)[:3]

    # return only movie names of the tuples
    return [x[1] for x in sorted_top_3_rec]

In [12]:
result = get_NMF_recommendations('Titanic', 'Toy Story', 'Star Wars', 3, 4, 5)

In [13]:
result

['Payback (1999)', 'Exit to Eden (1994)', 'The Drop (2014)']

---