In [1]:
from os import path
import graphlab as gl
from datetime import datetime

# Load Data

MovieLens dataset collected by the GroupLens Research Project at the University of Minnesota.
For more information, see http://grouplens.org/datasets/movielens/

In [2]:
# Path to the dataset directory
data_dir = './dataset/ml-20m'

# Table of movies we are recommending: movieId, title, genres
items = gl.SFrame.read_csv(path.join(data_dir, 'movies.csv'))

# Table of interactions between users and items: userId, movieId, rating, timestamp
actions = gl.SFrame.read_csv(path.join(data_dir, 'ratings.csv'))

[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1541519359.log
INFO:graphlab.cython.cy_server:GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1541519359.log


This non-commercial license of GraphLab Create for academic use is assigned to bernardo.ronquillo@gmail.com and will expire on May 11, 2019.
------------------------------------------------------


Inferred types from first 100 line(s) of file as 
column_type_hints=[int,str,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
------------------------------------------------------


Inferred types from first 100 line(s) of file as 
column_type_hints=[int,int,float,int]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


# Prepare Data

In [3]:
# Prepare the data by removing items that are rare
rare_items = actions.groupby('movieId', gl.aggregate.COUNT).sort('Count')
rare_items = rare_items[rare_items['Count'] <= 5]
items = items.filter_by(rare_items['movieId'], 'movieId', exclude=True)
actions = actions.filter_by(rare_items['movieId'], 'movieId', exclude=True)
actions['timestamp'] = actions['timestamp'].astype(datetime)

In [4]:
# Extract year, title, and genre
items['year'] = items['title'].apply(lambda x: x[-5:-1])
items['title'] = items['title'].apply(lambda x: x[:-7])
items['genres'] = items['genres'].apply(lambda x: x.split('|'))

# Train Recommender Model

In [5]:
training_data, validation_data = gl.recommender.util.random_split_by_user(actions, 'userId', 'movieId')
model = gl.recommender.create(training_data, 'userId', 'movieId')

# Explore and Evaluate the Model

In [6]:
# Get the metadata ready
urls = gl.SFrame.read_csv(path.join(data_dir, 'movie_urls.csv'))
items = items.join(urls, on='movieId')
users = gl.SFrame.read_csv(path.join(data_dir, 'user_names.csv'))

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[int,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
------------------------------------------------------


Inferred types from first 100 line(s) of file as 
column_type_hints=[int,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


In [7]:
gl.canvas.set_target('browser', port = 35000)

In [8]:
# Interactively evaluate and explore recommendations
view = model.views.overview(validation_set=validation_data,
                            user_data=users,
                            item_data=items,
                            item_name_column='title',
                            item_url_column='url')

In [10]:
view

View object

URI: 		http://localhost:32212/view/297bdae4-407f-4911-8ec5-058f6e63d0e1
HTML: 		
<gl-model-overview 
headers="[&quot;Explore&quot;, &quot;Evaluate&quot;]" title="Recommender View"
            >
    <!--
        
<gl-model-description
    uri="http://localhost:32212/view/f0e912a8-dc8f-4b98-9dcb-f365c00e047c"
    api_key=""
/>
        
<gl-recommender-explore
    uri="http://localhost:32212/view/74f9eeba-48d1-448e-a993-9d32f3e54d44"
    api_key=""
/>
        
<gl-recommender-evaluate
    uri="http://localhost:32212/view/2a63bc20-5dc3-4301-8e33-28c3806fca24"
    api_key=""
/>
        
    -->
</gl-model-overview>
        

In [None]:
view.show()