<a href="https://colab.research.google.com/github/premkumar25/Movie-Recommendation-KNN/blob/main/Movie_Recommendation_KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
%matplotlib inline
warnings.filterwarnings('ignore')

In [3]:
movies_df = pd.read_csv('movies.txt')
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
rating_df = pd.read_csv('rating.txt')
rating_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [5]:
df = pd.merge(movies_df, rating_df, on='movieId')
df.drop(['timestamp','genres'],axis=1, inplace=True)
df.head()

Unnamed: 0,movieId,title,userId,rating
0,1,Toy Story (1995),1,4.0
1,1,Toy Story (1995),5,4.0
2,1,Toy Story (1995),7,4.5
3,1,Toy Story (1995),15,2.5
4,1,Toy Story (1995),17,4.5


In [6]:
df.isnull().sum()

movieId    0
title      0
userId     0
rating     0
dtype: int64

### Grouping by the title and the count of review for each movie

In [7]:
movie_rating_count = (df.groupby(by=['title'])['rating'].count().reset_index().rename(columns={'rating':'total_rating_count'}))
movie_rating_count.head()

Unnamed: 0,title,total_rating_count
0,'Hellboy': The Seeds of Creation (2004),1
1,'Round Midnight (1986),2
2,'Salem's Lot (2004),1
3,'Til There Was You (1997),2
4,'Tis the Season for Love (2015),1


In [8]:
total_movie_rating_count_df = pd.merge(df, movie_rating_count, on='title')
total_movie_rating_count_df.head()

Unnamed: 0,movieId,title,userId,rating,total_rating_count
0,1,Toy Story (1995),1,4.0,205
1,1,Toy Story (1995),5,4.0,205
2,1,Toy Story (1995),7,4.5,205
3,1,Toy Story (1995),15,2.5,205
4,1,Toy Story (1995),17,4.5,205


In [9]:
total_movie_rating_count_df.describe()

Unnamed: 0,movieId,userId,rating,total_rating_count
count,93228.0,93228.0,93228.0,93228.0
mean,19318.053771,303.447108,3.521603,57.761788
std,35491.159175,171.031434,1.04234,60.677294
min,1.0,1.0,0.5,1.0
25%,1193.0,160.0,3.0,13.0
50%,2947.0,305.0,3.5,38.0
75%,7701.0,448.0,4.0,83.0
max,193609.0,599.0,5.0,321.0


## Set the popularity value

Movies with rating higher than popularity value will only be considered. This helps in reducing the wrong recommendation like if one movie has only one review that wont be considered

In [10]:
popularity_level =  50
popular_movie_rating = total_movie_rating_count_df.query('total_rating_count>=@popularity_level')
popular_movie_rating.head()

Unnamed: 0,movieId,title,userId,rating,total_rating_count
0,1,Toy Story (1995),1,4.0,205
1,1,Toy Story (1995),5,4.0,205
2,1,Toy Story (1995),7,4.5,205
3,1,Toy Story (1995),15,2.5,205
4,1,Toy Story (1995),17,4.5,205


### Convert to pivot table

In [11]:
movie_pivot_df = popular_movie_rating.pivot_table(values='rating',index='title',columns='userId').fillna(0)
movie_pivot_df.head()

userId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,559,560,561,562,563,564,565,566,567,568,569,570,571,572,573,574,575,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
10 Things I Hate About You (1999),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.5,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0
12 Angry Men (1957),0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,5.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2001: A Space Odyssey (1968),0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,2.0,3.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,3.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,5.0
28 Days Later (2002),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0,1.5,0.0,0.0,0.0,0.0,0.0,3.0,4.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,4.0,4.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0
300 (2007),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,0.0,3.5,0.0,0.0,4.0,0.0,0.0,0.0,4.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3.5,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Convert the pivot table to np array

In [12]:
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors 

Here we are using cosine similarity to identify the distance between two movies

In [14]:
movie_pivot_df_matrix = csr_matrix(movie_pivot_df.values)

knn_model = NearestNeighbors(metric='cosine', algorithm='brute')


In [19]:
knn_model.fit(movie_pivot_df_matrix)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

In [55]:
movie_index = np.random.choice(movie_pivot_df_matrix.shape[0])
distance, indeces = knn_model.kneighbors(movie_pivot_df.iloc[movie_index,:].values.reshape(1,-1), n_neighbors=6)

In [56]:
print(len(distance.flatten()))
print(indeces)
print(movie_index)

6
[[108 195 203 107  42 233]]
108


### Display the recommended movies

In [65]:
for i in range(0, len(distance.flatten())):
  if i == 0:
    index_value = [j[i] for j in indeces]
    print('selected movie is: ',movie_pivot_df.index[index_value[0]])
  else:
    index_value = [j[i] for j in indeces]
    print('Related movies: ',movie_pivot_df.index[index_value[0]])

selected movie is:  Dark Knight, The (2008)
Related movies:  Inception (2010)
Related movies:  Iron Man (2008)
Related movies:  Dark Knight Rises, The (2012)
Related movies:  Batman Begins (2005)
Related movies:  Lord of the Rings: The Return of the King, The (2003)
