# 9.2 Exercises: Recommender System
# Rahul Rajeev

In [2]:
# libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
links = pd.read_csv('links.csv')
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')
tags = pd.read_csv('tags.csv')

In [4]:
links

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0
...,...,...,...
9737,193581,5476944,432131.0
9738,193583,5914996,445030.0
9739,193585,6397426,479308.0
9740,193587,8391976,483455.0


In [5]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [6]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [7]:
tags

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,Highly quotable,1445714996
2,2,60756,will ferrell,1445714992
3,2,89774,Boxing story,1445715207
4,2,89774,MMA,1445715200
...,...,...,...,...
3678,606,7382,for katie,1171234019
3679,606,7936,austere,1173392334
3680,610,3265,gun fu,1493843984
3681,610,3265,heroic bloodshed,1493843978


I will first try a collaborative filtering recommendation system based on a similar project done on Kaggle. 
The model will use nearest neighbors to find the closest movie id vectors using the cosine metric with the angle in between.
The sources will be cited below.

In [8]:
# first I need to create a pivot table using the ratings data set using columns as 
# userid and rows as movie id, the values will be the ratings

rating_pivot = ratings.pivot_table(values='rating',columns='userId',
                                   index='movieId').fillna(0)
print('Shape of this pivot table :',rating_pivot.shape)
rating_pivot.head()

Shape of this pivot table : (9724, 610)


userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
# next I need to select a machine learning algo with an appropriate metric to 
# group the movie vectors after doing some research I found that a good metric to use 
# for movie recommendations is cosine

from sklearn.neighbors import NearestNeighbors
nn_algo = NearestNeighbors(metric='cosine')
nn_algo.fit(rating_pivot)

In [10]:
# now I need to develop a class for the recommender using the model to 
# predict the neighbors with the smallest cosine metric 

class Recommender:
    def __init__(self):
        # the history list of movies watched
        self.hist = []
        # has history is defaulted to false first, this condition 
        self.ishist = False
    
    # this method recommends movies based on the movie entered
    # the example project started with 5 neighbors, but I will use 10 
    # because we want it to recommend 10 movies
    def recommend_on_movie(self, movie, n_recommend = 10):
        
        # setting history to be true since we just watched a movie
        self.ishist = True
        
        # finding the movie id from thie movies dataframe using the 
        # movie ID in the ratings dataframe
        movieid = int(movies[movies['title']==movie]['movieId'])
        
        # appending that movie to the history list
        self.hist.append(movieid)
        
        # calculating the euclidean distances and neighbors 
        # for the clustered movie reviews using the location of the 
        # movie id vector, find the 10 nearest movies
        distance, neighbors = nn_algo.kneighbors([rating_pivot.loc[movieid]], 
                                                 n_neighbors = n_recommend + 1)
        
        # finding the movie id numbers
        movieids = [rating_pivot.iloc[i].name for i in neighbors[0]]
        
        # recommended list splitting by new line and spaces 
        recommended = [str(movies[movies['movieId']==m_id]['title']).split('\n')
                       [0].split('  ')[-1] 
                       #as long as the m_id is not the movie id of the movie watched
                       for m_id in movieids if m_id not in [movieid]]
        
        # return the recommended list of 10 movies
        return recommended[:n_recommend]
    
    # This method will recommend movies based on history stored 
    # in self.hist list
    def recommend_on_history(self, n_recommend = 10):
        
        # if the condition of having a history is false, then the method 
        # shouldn't go through and instead return 
        
        # no history found
        if self.ishist == False:
            return print('No history found!')
        
        # continue with the rest of the method
        
        # getting the history list 
        history = np.array([list(rating_pivot.loc[m_id]) 
                            for m_id in self.hist])
        
        # same as before except using the average movieid from 
        # history primary vector and finding 10 + the number of movies 
        # in the hist list
        distance, neighbors = nn_algo.kneighbors([np.average(history,axis=0)], 
                                                 n_neighbors= n_recommend + 
                                                 len(self.hist))
        
        # finding the movie id numbers
        movieids = [rating_pivot.iloc[i].name for i in neighbors[0]]
        
        # recommended list splitting by new line and spaces 
        recommended = [str(movies[movies['movieId']==m_id]['title']).split('\n')
                       [0].split('  ')[-1] 
                       #as long as the m_id is not the movie id of the movie watched
                       for m_id in movieids if m_id not in self.hist]

        # return the recommended list of 10 movies
        return recommended[:n_recommend]

In [11]:
# initialize
recommender = Recommender()  

In [12]:
# checking history
recommender.recommend_on_history()

No history found!


In [13]:
# getting the first set of movies
recommender.recommend_on_movie('Toy Story (1995)')

['Toy Story 2 (1999)',
 'Jurassic Park (1993)',
 'Independence Day (a.k.a. ID4) (1996)',
 'Star Wars: Episode IV - A New Hope (1977)',
 'Forrest Gump (1994)',
 'Lion King, The (1994)',
 'Star Wars: Episode VI - Return of the Jedi (1983)',
 'Mission: Impossible (1996)',
 'Groundhog Day (1993)',
 'Back to the Future (1985)']

In [14]:
# checking history after watching a movie
recommender.recommend_on_history()

['Toy Story 2 (1999)',
 'Jurassic Park (1993)',
 'Independence Day (a.k.a. ID4) (1996)',
 'Star Wars: Episode IV - A New Hope (1977)',
 'Forrest Gump (1994)',
 'Lion King, The (1994)',
 'Star Wars: Episode VI - Return of the Jedi (1983)',
 'Mission: Impossible (1996)',
 'Groundhog Day (1993)',
 'Back to the Future (1985)']

Just double checking that Toy Story isn't in the recommended list after being in the history list.

In [15]:
# getting the second set of movies
recommender.recommend_on_movie('Jurassic Park (1993)')

['Terminator 2: Judgment Day (1991)',
 'Forrest Gump (1994)',
 'Braveheart (1995)',
 'Fugitive, The (1993)',
 'Speed (1994)',
 'Batman (1989)',
 'Independence Day (a.k.a. ID4) (1996)',
 'Apollo 13 (1995)',
 'True Lies (1994)',
 'Lion King, The (1994)']

In [16]:
# getting the third set of movies
recommender.recommend_on_movie('Batman (1989)')

['Batman Forever (1995)',
 'True Lies (1994)',
 'Terminator 2: Judgment Day (1991)',
 'Fugitive, The (1993)',
 'Jurassic Park (1993)',
 'Dances with Wolves (1990)',
 'Stargate (1994)',
 'Mask, The (1994)',
 'Aladdin (1992)',
 'Braveheart (1995)']

In [17]:
# getting a fourth set of movies
recommender.recommend_on_movie('Aladdin (1992)')

['Beauty and the Beast (1991)',
 'Lion King, The (1994)',
 'Jurassic Park (1993)',
 'True Lies (1994)',
 'Batman (1989)',
 'Ace Ventura: Pet Detective (1994)',
 'Mrs. Doubtfire (1993)',
 'Die Hard: With a Vengeance (1995)',
 'Batman Forever (1995)',
 'Apollo 13 (1995)']

In [18]:
# checking history after four movies
recommender.recommend_on_history()

['Lion King, The (1994)',
 'Terminator 2: Judgment Day (1991)',
 'True Lies (1994)',
 'Forrest Gump (1994)',
 'Beauty and the Beast (1991)',
 'Fugitive, The (1993)',
 'Apollo 13 (1995)',
 'Mask, The (1994)',
 'Braveheart (1995)',
 'Speed (1994)']

Making sure that the movies watched (in the history list) are not in the recommended list.

**Sources:**

M, D. (2022, July 7). What is cosine similarity and how is it used in machine learning?. 
Analytics India Magazine. https://analyticsindiamag.com/cosine-similarity-in-machine-learning/#:~:text=Cosine%20similarity%20is%20used%20as,of%20texts%20in%20the%20document. 

Sachinsarkar. (2021, November 6). Movielens Movie Recommendation System. Kaggle. https://www.kaggle.com/code/sachinsarkar/movielens-movie-recommendation-system/notebook 

WilliamVorhies. (2017, January 17). 5 types of recommenders. Data Science Central. 
https://www.datasciencecentral.com/5-types-of-recommenders/ 








__
__
_
