## Part 3: Building a Recommender System with Implicit Feedback

### Step 1: Import Dependencies

> Note: this notebook uses implicit v0.3.6

In [1]:
import numpy as np
import pandas as pd
import implicit

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

### Step 2: Load the Data

In [66]:
ratings = pd.read_csv("https://s3-us-west-2.amazonaws.com/recommender-tutorial/ratings.csv")
movies = pd.read_csv("https://s3-us-west-2.amazonaws.com/recommender-tutorial/movies.csv")

### Step 3: Transforming the Data

In [15]:
from scipy.sparse import csr_matrix

def create_X(df):
    """
    Generates a sparse matrix from ratings dataframe.
    
    Args:
        df: pandas dataframe
    
    Returns:
        X: sparse matrix
        user_mapper: dict that maps user id's to user indices
        user_inv_mapper: dict that maps user indices to user id's
        movie_mapper: dict that maps movie id's to movie indices
        movie_inv_mapper: dict that maps movie indices to movie id's
    """
    N = df['userId'].nunique()
    M = df['movieId'].nunique()

    user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
    movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(M))))
    
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"])))
    movie_inv_mapper = dict(zip(list(range(M)), np.unique(df["movieId"])))
    
    user_index = [user_mapper[i] for i in df['userId']]
    movie_index = [movie_mapper[i] for i in df['movieId']]

    X = csr_matrix((df["rating"], (movie_index, user_index)), shape=(M, N))
    
    return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper

In [16]:
X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_X(ratings)

#### Creating Movie Title Mappers

create helper functions to easily interpret movie titles from movie indices and vice versa. 

In [67]:
movie_title_mapper = dict(zip(movies['title'], movies['movieId']))
movie_title_inv_mapper = dict(zip(movies['movieId'], movies['title']))

In [68]:
def get_movie_index(title): 
    movie_id = movie_title_mapper[title]
    movie_idx = movie_mapper[movie_id]
    return movie_idx

def get_movie_title(movie_idx): 
    movie_id = movie_inv_mapper[movie_idx]
    title = movie_title_inv_mapper[movie_id]
    return title 

In [73]:
get_movie_index('Forrest Gump (1994)')

314

In [74]:
get_movie_title(314)

'Forrest Gump (1994)'

### Step 4: Generating Recommendations

In [19]:
model = implicit.als.AlternatingLeastSquares(factors=50)
model.fit(X)

100%|██████████| 15.0/15 [00:06<00:00,  2.26it/s]


In [47]:
from fuzzywuzzy import process
def movie_finder(title):
    all_titles = movies['title'].tolist()
    closest_match = process.extractOne(title,all_titles)
    return closest_match[0]

In [62]:
movie_of_interest = movie_finder('forrest gump')
movie_index = get_movie_index(movie_of_interest)

related = model.similar_items(movie_index)

In [65]:
print(f"Because you watched {movie_of_interest}...")
for r in related:
    recommended_title = get_movie_title(r[0])
    if recommended_title != movie_of_interest:
        print(recommended_title)

Because you watched Forrest Gump (1994)...
Shawshank Redemption, The (1994)
Pulp Fiction (1994)
Silence of the Lambs, The (1991)
Schindler's List (1993)
Braveheart (1995)
Jurassic Park (1993)
Apollo 13 (1995)
Seven (a.k.a. Se7en) (1995)
Terminator 2: Judgment Day (1991)


### Step 5: Generating User-Item Recommendations

In [140]:
user_id = 22

user_ratings = ratings[ratings['userId']==user_id].merge(movies[['movieId', 'title']])
user_ratings = user_ratings.sort_values('rating', ascending=False)
top_5 = user_ratings.head()
top_5

Unnamed: 0,userId,movieId,rating,timestamp,title
22,22,3489,5.0,1268726106,Hook (1991)
86,22,49272,5.0,1268727326,Casino Royale (2006)
60,22,7438,5.0,1268726785,Kill Bill: Vol. 2 (2004)
3,22,318,5.0,1268726193,"Shawshank Redemption, The (1994)"
4,22,356,5.0,1268726309,Forrest Gump (1994)


In [141]:
bottom_5 = user_ratings[user_ratings['rating']<3].tail()
bottom_5

Unnamed: 0,userId,movieId,rating,timestamp,title
78,22,38038,0.5,1268727279,Wallace & Gromit in The Curse of the Were-Rabb...
81,22,44191,0.5,1268727414,V for Vendetta (2006)
90,22,53519,0.5,1268727137,Death Proof (2007)
89,22,52281,0.5,1268726845,Grindhouse (2007)
118,22,74789,0.5,1268726132,Alice in Wonderland (2010)


In [142]:
user_idx = user_mapper[user_id]

In [143]:
X_t = X.T.tocsr()
recommendations = model.recommend(user_idx, X_t)

recommendations

[(4131, 0.9661345),
 (3617, 0.8320355),
 (1938, 0.73343456),
 (5324, 0.72751635),
 (4170, 0.7152841),
 (4153, 0.69513565),
 (7355, 0.6877072),
 (1733, 0.6860013),
 (257, 0.65937334),
 (3609, 0.6487046)]

In [144]:
for r in recommendations:
    recommended_title = get_movie_title(r[0])
    print(recommended_title)

Lord of the Rings: The Two Towers, The (2002)
Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)
Matrix, The (1999)
Shaun of the Dead (2004)
City of God (Cidade de Deus) (2002)
Catch Me If You Can (2002)
Inception (2010)
American History X (1998)
Pulp Fiction (1994)
Ocean's Eleven (2001)
