In [96]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [97]:
ratings = pd.read_csv("drive/MyDrive/Dataset2/ratings.csv")
movies = pd.read_csv("drive/MyDrive/Dataset2/movies.csv")

In [98]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [99]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [100]:
ratings = ratings[['userId', 'movieId', 'rating']]
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [101]:
movies.shape


(9742, 3)

In [102]:
ratings.shape

(100836, 3)

In [103]:
x=ratings['userId'].value_counts()>200

In [104]:
y = x[x].index

In [105]:
y

Int64Index([414, 599, 474, 448, 274, 610,  68, 380, 606, 288,
            ...
            119, 563, 263,  73, 586, 220, 246, 234, 452, 385],
           dtype='int64', length=133)

In [106]:
ratings = ratings[ratings['userId'].isin(y)]

In [107]:
ratings.shape

(68284, 3)

In [108]:
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [109]:
movies_ratings = pd.merge(ratings, movies, on="movieId")
movie_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [110]:
movie_ratings.shape

(100836, 6)

In [111]:
numbrer_rating = movie_ratings.groupby('title')['rating'].count().reset_index()

In [112]:
numbrer_rating.rename(columns={'rating':'No.Of Rating'}, inplace=True)

In [113]:
numbrer_rating.head()

Unnamed: 0,title,No.Of Rating
0,'71 (2014),1
1,'Hellboy': The Seeds of Creation (2004),1
2,'Round Midnight (1986),2
3,'Salem's Lot (2004),1
4,'Til There Was You (1997),2


In [114]:
final_rating = movie_ratings.merge(numbrer_rating, on='title')

In [115]:
final_rating.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres,No.Of Rating
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,215
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,215
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,215
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,215
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,215


In [116]:
final_rating.shape

(100836, 7)

In [117]:
final_rating = final_rating[final_rating['No.Of Rating'] >= 50]

In [118]:
final_rating.shape

(41362, 7)

In [119]:
movie_pivot = final_rating.pivot_table(columns='userId', index='title', values='rating')

In [120]:
movie_pivot.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10 Things I Hate About You (1999),,,,,,,,,,,...,,,3.0,,5.0,,,,,
12 Angry Men (1957),,,,5.0,,,,,,,...,5.0,,,,,,,,,
2001: A Space Odyssey (1968),,,,,,,4.0,,,,...,,,5.0,,,5.0,,3.0,,4.5
28 Days Later (2002),,,,,,,,,,,...,,,,,,,,3.5,,5.0
300 (2007),,,,,,,,,,3.0,...,,,,,3.0,,,5.0,,4.0


In [121]:
movie_pivot.shape

(450, 606)

In [122]:
movie_pivot.fillna(0, inplace=True)

In [123]:
movie_sparse = csr_matrix(movie_pivot)

In [124]:
model = NearestNeighbors(algorithm='brute')

In [125]:
model.fit(movie_sparse)

In [126]:
np.where(movie_pivot.index == 200)

(array([], dtype=int64),)

In [149]:
movie_pivot.index[449]

'Zoolander (2001)'

In [144]:
def based_content_filtering(movie_name):
    movie_id = np.where(movie_pivot.index==movie_name)[0][0]
    distances, suggestions = model.kneighbors(movie_pivot.iloc[movie_id, :].values.reshape(1, -1), n_neighbors=6)

    for i in range(len(suggestions)):
        if i==0:
            print('The Suggestions For', movie_name,"Are : ")
        if not i:
            print(movie_pivot.index[suggestions[i]])

In [150]:
based_content_filtering('Zoolander (2001)')

The Suggestions For Zoolander (2001) Are : 
Index(['Zoolander (2001)', 'Anchorman: The Legend of Ron Burgundy (2004)',
       'Wild Wild West (1999)', 'Wedding Crashers (2005)',
       'Miss Congeniality (2000)', 'Knocked Up (2007)'],
      dtype='object', name='title')
