In [3]:
import graphlab as gl

# Load Data

MovieLens dataset collected by the GroupLens Research Project at the University of Minnesota.
For more information, see http://grouplens.org/datasets/movielens/

In [4]:
# Table of movies we are recommending: movieId, title, genres
items = gl.SFrame.read_csv('../../datasets/ml-20m/items.csv')
# Table of user actions on items: userId, movieId, rating, timestamp
actions = gl.SFrame.read_csv('../../datasets/ml-20m/ratings.csv')

------------------------------------------------------
Inferred types from first line of file as 
column_type_hints=[int,str,str,int,int,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
------------------------------------------------------


Inferred types from first line of file as 
column_type_hints=[int,int,float,int]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


# Prepare Data

In [5]:
# Prepare the data by removing items that are rare
rare_items = actions.groupby('movieId', gl.aggregate.COUNT).sort('Count')
rare_items = rare_items[rare_items['Count'] <= 5]
items = items.filter_by(rare_items['movieId'], 'movieId', exclude=True)
actions = actions.filter_by(rare_items['movieId'], 'movieId', exclude=True)

In [6]:
# Extract year, title, and genre
items['year'] = items['title'].apply(lambda x: x[-5:-1])
items['title'] = items['title'].apply(lambda x: x[:-7])
items['genres'] = items['genres'].apply(lambda x: x.split('|'))

# Train Recommender Model

In [7]:
train, valid = gl.recommender.util.random_split_by_user(actions, 'userId', 'movieId')
model = gl.recommender.create(train, 'userId', 'movieId')

# Explore the Model

In [8]:
# Interactively explore recommendations
v1 = model.views.explore(train, items, item_title_column='title', item_path_column='path')
v1.show()

View object

URI: 		http://localhost:32212/view/8c0680ed-a838-40c3-88e6-c9dba07bdb81
HTML: 		
<gl-recommender-explore
    uri="http://localhost:32212/view/1894b4ab-fe20-4b5a-8fe3-919a97aa99d6"
    api_key=""
/>
        

In [9]:
# Visualize model performance
baseline = gl.popularity_recommender.create(train, 'userId', 'movieId')
v2 = model.views.performance(train, valid, baseline)
v2.show()

View object

URI: 		http://localhost:32212/view/89151254-640c-44f2-9d50-8891b716b2a5
HTML: 		
<gl-recommender-performance
    uri="http://localhost:32212/view/47ec17da-668e-47e3-bb23-2bddeb68c0ec"
    api_key=""
/>
        

