# Effective movie recommendation system 
## (also solves the cold-start problem)

In [1]:
%load_ext lab_black

In [2]:
import turicreate
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv("df.csv")

In [4]:
# remove unnamed column
del df["Unnamed: 0"]
df.head(5)

Unnamed: 0,Cust_Id,Rating,Movie_Id
0,712664,5.0,3
1,1331154,4.0,3
2,2632461,3.0,3
3,44937,5.0,3
4,656399,4.0,3


In [5]:
# Recommendation with Collaborative Filtering (memory-based algorithm, it recommends items based on previous ratings)
# User-User collaborative filtering (users having higher correlation will tend to be similar.)
# Item-Item collaborative filtering (item/movies that are similar to each other are recommended)

In [6]:
df_sf = turicreate.SFrame(df)

In [7]:
# User-User: recommend top 5 movies based on the most popular choices (all the users receive the same recommendations)
# Training the model
popularity_model = turicreate.popularity_recommender.create(
    df_sf, user_id="Cust_Id", item_id="Movie_Id", target="Rating"
)

In [8]:
# Making recommendations (example) - print top 5 recommendations for the first 3 users
popularity_recomm = popularity_model.recommend(users=list(df_sf["Cust_Id"][0:3]), k=5)
popularity_recomm.print_rows(num_rows=15)

+---------+----------+-------------------+------+
| Cust_Id | Movie_Id |       score       | rank |
+---------+----------+-------------------+------+
|  712664 |   3456   | 4.665432098765432 |  1   |
|  712664 |   2102   | 4.589824034920202 |  2   |
|  712664 |   3444   | 4.520766378244747 |  3   |
|  712664 |   1476   | 4.461601211979955 |  4   |
|  712664 |   4238   | 4.457773512476008 |  5   |
| 1331154 |   3456   | 4.665432098765432 |  1   |
| 1331154 |   2102   | 4.589824034920202 |  2   |
| 1331154 |   3444   | 4.520766378244747 |  3   |
| 1331154 |   4238   | 4.457773512476008 |  4   |
| 1331154 |   2568   | 4.456026058631922 |  5   |
| 2632461 |   3456   | 4.665432098765432 |  1   |
| 2632461 |   2102   | 4.589824034920202 |  2   |
| 2632461 |   3444   | 4.520766378244747 |  3   |
| 2632461 |   1476   | 4.461601211979955 |  4   |
| 2632461 |   4238   | 4.457773512476008 |  5   |
+---------+----------+-------------------+------+
[15 rows x 4 columns]



In [9]:
# Item-Item - recommend movies based on past personal preferences (different users will have a different set of recommendations - personalized recommendations)
# Training the model
item_sim_model = turicreate.item_similarity_recommender.create(
    df_sf,
    user_id="Cust_Id",
    item_id="Movie_Id",
    target="Rating",
    similarity_type="cosine",
)

In [10]:
# Making recommendations (example) - print top 5 recommendations for the first 3 users
item_sim_recomm = item_sim_model.recommend(users=list(df_sf["Cust_Id"][0:3]), k=5)
item_sim_recomm.print_rows(num_rows=15)

+---------+----------+---------------------+------+
| Cust_Id | Movie_Id |        score        | rank |
+---------+----------+---------------------+------+
|  712664 |   3962   |  0.6944930362896841 |  1   |
|  712664 |   3938   |  0.6064882626950415 |  2   |
|  712664 |   3427   |  0.556006958575848  |  3   |
|  712664 |   2152   |  0.5285343614106621 |  4   |
|  712664 |   2660   | 0.43074481734812586 |  5   |
| 1331154 |   3860   |  1.2493619265326534 |  1   |
| 1331154 |   1962   |  1.0075475355707497 |  2   |
| 1331154 |   1202   |  0.6959467910858522 |  3   |
| 1331154 |   2391   |  0.6806661773876972 |  4   |
| 1331154 |   4330   |  0.5497861219697209 |  5   |
| 2632461 |   4306   |  1.7303073085271394 |  1   |
| 2632461 |   2862   |  1.7176445653805366 |  2   |
| 2632461 |   2452   |  1.675116940186574  |  3   |
| 2632461 |   2782   |  1.614187824038359  |  4   |
| 2632461 |   3938   |  1.5115973284611335 |  5   |
+---------+----------+---------------------+------+
[15 rows x 4

In [11]:
# user Cold Start - Making recommendations for a new user
# (it is not possible to provide personalized recommendations for a new user)

# If the model has never seen the user,
# then it defaults to recommending popular items
if sum(df_sf["Cust_Id"] == 12) == 0:
    print("The user 12 is new")
popularity_model.recommend(users=[12], k=5)

The user 12 is new


Cust_Id,Movie_Id,score,rank
12,3456,4.665432098765432,1
12,2102,4.589824034920202,2
12,3444,4.520766378244747,3
12,1476,4.461601211979955,4
12,4238,4.457773512476008,5


In [12]:
# item Cold Start - Making recommendations for a new item

# If the model has never seen the item,
# then it defaults to score = 0 (which is the minimum)
if sum(df_sf["Movie_Id"] == 12) == 0:
    print("The item 12 is new")
item_sim_model.recommend(users=list(df_sf["Cust_Id"][0:3]), items=[12])

The item 12 is new


Cust_Id,Movie_Id,score,rank
712664,12,0.0,1
1331154,12,0.0,1
2632461,12,0.0,1
