In [2]:
! pip install turicreate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting turicreate
  Downloading turicreate-6.4.1-cp37-cp37m-manylinux1_x86_64.whl (92.0 MB)
[K     |████████████████████████████████| 92.0 MB 1.1 MB/s 
Collecting tensorflow<2.1.0,>=2.0.0
  Downloading tensorflow-2.0.4-cp37-cp37m-manylinux2010_x86_64.whl (86.4 MB)
[K     |████████████████████████████████| 86.4 MB 1.1 MB/s 
Collecting coremltools==3.3
  Downloading coremltools-3.3-cp37-none-manylinux1_x86_64.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 40.9 MB/s 
Collecting prettytable==0.7.2
  Downloading prettytable-0.7.2.zip (28 kB)
Collecting resampy==0.2.1
  Downloading resampy-0.2.1.tar.gz (322 kB)
[K     |████████████████████████████████| 322 kB 76.4 MB/s 
Collecting numba<0.51.0
  Downloading numba-0.50.1-cp37-cp37m-manylinux2014_x86_64.whl (3.6 MB)
[K     |████████████████████████████████| 3.6 MB 50.8 MB/s 
Collecting llvmlite<0.34,>=0.33.0.dev0
  Downloa

In [1]:
import pandas as pd

movies = pd.read_csv('./movie.csv')
ratings = pd.read_csv('./rating.csv')
full_data = ratings.merge(movies, how='left', on='movieId')
full_data.to_csv('./clean_ratings.csv')

In [2]:
import turicreate as tc

data = tc.SFrame.read_csv('clean_ratings.csv')
train, test = tc.recommender.util.random_split_by_user(data, 'userId', 'title')

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[int,int,int,float,str,str,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


In [3]:

model = tc.recommender.create(train, 'userId', 'title', target='rating')

# Recommendations for all users
results = model.recommend()

In [4]:
recommendations = model.recommend(users=[1, 30, 138491])
recommendations.print_rows(num_rows=30, num_columns=4)

+--------+-------------------------------+--------------------+------+
| userId |             title             |       score        | rank |
+--------+-------------------------------+--------------------+------+
|   1    |   Princess Bride, The (1987)  | 4.181172024264837  |  1   |
|   1    |    Wizard of Oz, The (1939)   | 4.157594498053099  |  2   |
|   1    |   Blues Brothers, The (1980)  |  4.08572193588307  |  3   |
|   1    |         Brazil (1985)         | 4.0813398918061505 |  4   |
|   1    |   Clockwork Orange, A (1971)  | 4.079682390943075  |  5   |
|   1    |      Trainspotting (1996)     | 4.078340228453184  |  6   |
|   1    |        Outbreak (1995)        | 4.072718497171904  |  7   |
|   1    |     Thelma & Louise (1991)    | 4.0295099725871335 |  8   |
|   1    |  Being John Malkovich (1999)  | 4.023838992968107  |  9   |
|   1    |      Citizen Kane (1941)      | 4.021632905855681  |  10  |
|   30   | Twelve Monkeys (a.k.a. 12 ... | 4.539844851747061  |  1   |
|   30

In [5]:
# Recommendations for new users
new_user = model.recommend([139000])

# New user and limiting number of recommendations
lim_3 = model.recommend(users=[139000], k=3)

# Include items seen during training
with_seen = model.recommend(users=[1], exclude_known=False)

In [6]:
# Save model
model.save('movie_recs.model')

# Reload model
model = tc.load_model('movie_recs.model')

In [7]:
# Predict the rating a user would give an item
f_model = tc.factorization_recommender.create(train, user_id='userId', item_id='title', target='rating')
f_results = f_model.recommend(users=[1, 30, 138491], k=3)


# Predict how likely it is that a user would rate the movie highly
# Absolute values of predicted scores may not match actual ratings
s_model = tc.item_similarity_recommender.create(train, 'userId', 'title', target='rating')
s_results = s_model.recommend(users=[1, 30, 138491], k=3)

In [10]:
s_results

userId,title,score,rank
1,"Matrix, The (1999)",0.0846668144206067,1
1,Indiana Jones and the Temple of Doom (1984) ...,0.0841276979112958,2
1,Groundhog Day (1993),0.0796128382215966,3
30,"Fifth Element, The (1997)",0.089690129603109,1
30,Aliens (1986),0.0877993683661184,2
30,Star Wars: Episode I - The Phantom Menace (1 ...,0.0873225004442276,3
138491,"Silence of the Lambs, The (1991) ...",0.3210084784030914,1
138491,Terminator 2: Judgment Day (1991) ...,0.3124572312831878,2
138491,Star Wars: Episode V - The Empire Strikes Back ...,0.30881143450737,3


In [8]:

rf_model = tc.ranking_factorization_recommender.create(train, 'userId', 'title', target='rating')
rf_results = rf_model.recommend(users=[1, 30, 138491], k=3)

In [11]:
rf_results

userId,title,score,rank
1,Groundhog Day (1993),4.417391614794634,1
1,True Lies (1994),4.411330321609877,2
1,Outbreak (1995),4.390012563943766,3
30,Donnie Darko (2001),4.537588331654571,1
30,Harry Potter and the Prisoner of Azkaban ...,4.498054399892591,2
30,Groundhog Day (1993),4.460989709749721,3
138491,Donnie Darko (2001),4.5211877932696565,1
138491,Groundhog Day (1993),4.468249808803104,2
138491,Outbreak (1995),4.411800693526767,3


In [9]:
models = [f_model, s_model, rf_model]
model_names = ['Factorization', 'Item similarity', 'Ranking factorization']
tc.recommender.util.compare_models(test, models, model_names=model_names, metric='rmse', target='rating')


PROGRESS: Evaluate model Factorization

Overall RMSE: 0.9220283478766871

Per User RMSE (best)
+--------+---------------------+-------+
| userId |         rmse        | count |
+--------+---------------------+-------+
|  2266  | 0.17270567447578025 |   3   |
+--------+---------------------+-------+
[1 rows x 3 columns]


Per User RMSE (worst)
+--------+--------------------+-------+
| userId |        rmse        | count |
+--------+--------------------+-------+
|  3453  | 3.6999108653853248 |   6   |
+--------+--------------------+-------+
[1 rows x 3 columns]


Per Item RMSE (best)
+-----------------------+----------------------+-------+
|         title         |         rmse         | count |
+-----------------------+----------------------+-------+
| Faces of Death (1978) | 0.001157165503054891 |   1   |
+-----------------------+----------------------+-------+
[1 rows x 3 columns]


Per Item RMSE (worst)
+-------------------------------+-------------------+-------+
|             title

[{'rmse_by_user': Columns:
  	userId	int
  	rmse	float
  	count	int
  
  Rows: 998
  
  Data:
  +--------+--------------------+-------+
  | userId |        rmse        | count |
  +--------+--------------------+-------+
  |  2779  | 1.2100913906400592 |   5   |
  |  118   | 1.4133347510279122 |   6   |
  |  3365  | 0.8107400383926773 |   42  |
  |  839   | 0.5936498439083114 |   8   |
  |  2807  | 1.1131878374303268 |   11  |
  |  2043  | 1.095789414801873  |   18  |
  |  3159  | 2.1339540707228006 |   6   |
  |  699   | 0.7629859166930952 |   57  |
  |  2465  | 1.005205111135264  |   22  |
  |  2445  | 0.4285714963978198 |   5   |
  +--------+--------------------+-------+
  [998 rows x 3 columns]
  Note: Only the head of the SFrame is printed.
  You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
  'rmse_by_item': Columns:
  	title	str
  	rmse	float
  	count	int
  
  Rows: 5712
  
  Data:
  +--------------------------------+---------------------+-------+