In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Datasets

In [2]:
movies = pd.read_csv('D:\DML\ml-25m\ml-25m\movies.csv')
ratings = pd.read_csv('D:\\DML\\ml-25m\\ml-25m\\ratings.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510


# Recommendation System

In [4]:
rating = pd.pivot_table(ratings,values = ["rating","userId"],index = "movieId", aggfunc = max)
rating.head()

Unnamed: 0_level_0,rating,userId
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,5.0,162538
2,5.0,162533
3,5.0,162529
4,5.0,162516
5,5.0,162529


## Machine Learning Model Training for Recommending  Movies Based on Users Ratings

In [6]:
from sklearn.neighbors import NearestNeighbors
nn = NearestNeighbors(metric = 'cosine')
nn.fit(rating)

NearestNeighbors(metric='cosine')

## Collaborative Filtering Recommendation

In [7]:
class Recommender:
    def __init__(self):
        self.hist = [] 
        self.ishist = False 
        
    def recommend_on_movie(self,movie,n_reccomend = 5):
        self.ishist = True
        movieid = int(movies[movies['title']==movie]['movieId'])
        self.hist.append(movieid)
        distance,neighbors = nn.kneighbors([rating.loc[movieid]],n_neighbors=n_reccomend+1)
        movieids = [rating.iloc[i].name for i in neighbors[0]]
        recommeds = [str(movies[movies['movieId']==mid]['title']).split('\n')[0].split('  ')[-1] for mid in movieids if mid not in [movieid]]
        return recommeds[:n_reccomend]
    
    def recommend_on_history(self,n_reccomend = 5):
        if self.ishist == False:
            return print('No history found')
        history = np.array([list(rating.loc[mid]) for mid in self.hist])
        distance,neighbors = nn.kneighbors([np.average(history,axis=0)],n_neighbors=n_reccomend + len(self.hist))
        movieids = [rating.iloc[i].name for i in neighbors[0]]
        recommeds = [str(movies[movies['movieId']==mid]['title']).split('\n')[0].split('  ')[-1] for mid in movieids if mid not in self.hist]
        return recommeds[:n_reccomend]

In [8]:
# linitializing the Recommender Object
recommender = Recommender()  

In [9]:
# Recommendation based on past watched movies, but the object just initialized. So, therefore no history found
recommender.recommend_on_history()    

No history found


In [10]:
# Recommendation based on this movie 
bride = recommender.recommend_on_movie('Tigerland (2000)')
bride



["Vegas Vacation (National Lampoon's Las Vegas V...",
 'Angels with Dirty Faces (1938)',
 'Biutiful (2010)',
 'Twister (1996)',
 'Moulin Rouge (2001)']

In [137]:
# Recommendation based on past watched movies, and this time a movie is there in the history.
recommender.recommend_on_history()



["Vegas Vacation (National Lampoon's Las Vegas V...",
 'Angels with Dirty Faces (1938)',
 'Biutiful (2010)',
 'Twister (1996)',
 'Moulin Rouge (2001)']

In [138]:
# Recommendation based on this movie
recommender.recommend_on_movie('Tigerland (2000)')



["Vegas Vacation (National Lampoon's Las Vegas V...",
 'Angels with Dirty Faces (1938)',
 'Biutiful (2010)',
 'Twister (1996)',
 'Moulin Rouge (2001)']

In [139]:
# Recommendation based on past watched movies, and this time two movies is there in the history.
recommender.recommend_on_history()




['Biutiful (2010)',
 'Moulin Rouge (2001)',
 'Angels with Dirty Faces (1938)',
 'Twister (1996)',
 "Vegas Vacation (National Lampoon's Las Vegas V..."]

In [140]:
# Recommendation based on this movie
recommender.recommend_on_movie('Grumpier Old Men (1995)')



['My Girl (1991)',
 'Kiss Before Dying, A (1956)',
 'War and Peace (1956)',
 'My Girl 2 (1994)',
 'Control (2007)']

In [141]:
# Recommendation based on this movie
recommender.recommend_on_movie('Money Train (1995)')



['Omen, The (2006)',
 'Birdman: Or (The Unexpected Virtue of Ignoranc...',
 'Southern Comfort (1981)',
 'Sweet Hereafter, The (1997)',
 'Mouse Hunt (1997)']

## Content Based Filtering

In [142]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(stop_words = 'english')
genres  =  vectorizer.fit_transform(movies.genres).toarray()
contents = pd.DataFrame(genres, columns = vectorizer.get_feature_names())
contents.head()



Unnamed: 0,action,adventure,animation,children,comedy,crime,documentary,drama,fantasy,fi,film,genres,horror,imax,listed,musical,mystery,noir,romance,sci,thriller,war,western
0,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Machine Learning Model training for Recommending movies based on movies contents.

In [143]:
from sklearn.neighbors import NearestNeighbors
nn = NearestNeighbors(metric='cosine')
nn.fit(contents)

NearestNeighbors(metric='cosine')

In [144]:
class Recommender:
    def __init__(self):
        # This list will stored movies that called atleast ones using recommend_on_movie method
        self.hist = [] 
        self.ishist = False # Check if history is empty
    
    # This method will recommend movies based on a movie that passed as the parameter
    def recommend_on_movie(self,movie,n_reccomend = 5):
        self.ishist = True
        iloc = movies[movies['title']==movie].index[0]
        self.hist.append(iloc)
        distance,neighbors = nn.kneighbors([contents.iloc[iloc]],n_neighbors=n_reccomend+1)
        recommeds = [movies.iloc[i]['title'] for i in neighbors[0] if i not in [iloc]]
        return recommeds[:n_reccomend]
    
    def recommend_on_history(self,n_reccomend = 5):
        if self.ishist == False:
            return print('No history found')
        history = np.array([list(contents.iloc[iloc]) for iloc in self.hist])
        distance,neighbors = nn.kneighbors([np.average(history,axis=0)],n_neighbors=n_reccomend + len(self.hist))
        recommeds = [movies.iloc[i]['title'] for i in neighbors[0] if i not in self.hist]
        return recommeds[:n_reccomend]

In [145]:
# linitializing the Recommender Object
recommender = Recommender()  

In [146]:
recommender.recommend_on_history()   

No history found


In [147]:
# Recommendation based on this movie 
recommender.recommend_on_movie('Sudden Death (1995)')



['Bloodfisted Brothers (1978)',
 'Border (1997)',
 'Dragon Lee Vs. The 5 Brothers (1978)',
 'American Ninja 4: The Annihilation (1990)',
 'Bruce Lee Fights Back from the Grave (1976)']

In [148]:
# Recommendation based on past watched movies, and this time a movie is there in the history.
recommender.recommend_on_history()



['Bloodfisted Brothers (1978)',
 'Border (1997)',
 'Dragon Lee Vs. The 5 Brothers (1978)',
 'American Ninja 4: The Annihilation (1990)',
 'Bruce Lee Fights Back from the Grave (1976)']

In [149]:
# Recommendation based on this movie
recommender.recommend_on_movie('Waiting to Exhale (1995)')



['Hope Floats (1998)',
 'Souls for Sale (1923)',
 '3 Idiots (2009)',
 'The Mystery of Happiness (2014)',
 'Badhaai Ho (2018)']

In [110]:
# Recommendation based on this movie
recommender.recommend_on_movie('GoldenEye (1995)')



['Quantum of Solace (2008)',
 'The Devil-Ship Pirates (1964)',
 'Night Train to Mundo Fine (1966)',
 'A Cry in the Wild (1990)',
 'Diamonds Are Forever (1971)']

In [111]:
recommender.recommend_on_history()



['Unstoppable (2004)',
 'Hunting Party, The (2007)',
 'Bail Out (1990)',
 'Chill Factor (1999)',
 'Mummy Returns, The (2001)']