<h4>Importing Libraries</h4>

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

<h4>Importing data set</h4>

In [2]:
movies=pd.read_csv("movies.csv")
ratings=pd.read_csv("ratings.csv")

In [3]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


<h4> Data Cleaning</h4>

In [5]:
#We only use movie id and title from movies data so we will drop genres
movies=movies.drop("genres",axis=1)

In [6]:
movies.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [7]:
#From ratings we only use user Id , movie id and rating 
ratings=ratings.drop("timestamp",axis=1)

In [8]:
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [9]:
#We will look for shape of data set
print(ratings.shape)
print(movies.shape)

(100836, 3)
(9742, 2)


In [10]:
#I want to convert this data set in such a way that my movie id sholud be the row and user id will be the column 
#And column should be filled by the ratings
movies_users=ratings.pivot(index="movieId",columns="userId",values="rating").fillna(0)

In [11]:
movies_users.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
from scipy.sparse import csr_matrix

In [13]:
#We are converting our dataframe into a sparse matrix
movies_sparse=csr_matrix(movies_users.values)
movies_sparse

<9724x610 sparse matrix of type '<class 'numpy.float64'>'
	with 100836 stored elements in Compressed Sparse Row format>

<h4>Model Fitting</h4>

In [14]:
from sklearn.neighbors import NearestNeighbors
model=NearestNeighbors(metric='cosine',algorithm='brute',n_neighbors=20)
#We are looking each cell in matrix so it is bruteforce 
model.fit(movies_sparse)

In [15]:
pip install fuzzywuzzy

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


from fuzzywuzzy import process
#To increase our searching process speed

In [42]:
def recommender(movie_name,data,n):
    idx=process.extractOne(movie_name,movies['title'])[2]
    print("movie selected:",movies['title'][idx],"index:",idx)
    print("Searching for recommondation.........")
    distance,indices=model.kneighbors(data[idx],n_neighbors=n)
    for i in indices:
        print(movies['title'][i].where(i!=idx))

<h4>Prediction</h4>

In [43]:
recommender("Jumanji",movies_sparse,10)

movie selected: Jumanji (1995) index: 1
Searching for recommondation.........
1                                         NaN
322                     Lion King, The (1994)
436                     Mrs. Doubtfire (1993)
325                          Mask, The (1994)
418                      Jurassic Park (1993)
504                         Home Alone (1990)
483    Nightmare Before Christmas, The (1993)
506                            Aladdin (1992)
512               Beauty and the Beast (1991)
18      Ace Ventura: When Nature Calls (1995)
Name: title, dtype: object
