In [1]:
!pip install fuzzywuzzy

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [6]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from fuzzywuzzy import process

In [7]:
movies_df = pd.read_csv('movies.csv', usecols=['movieId','title'],dtype={'movieId':'int32','title':'str'})
ratings_df = pd.read_csv('ratings.csv',usecols=['userId','movieId','rating'],dtype={'userId':'int32','movieId':'int32','rating':'float32'})


In [8]:
movies_df.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [9]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [10]:
#converting ratings_df into pivot matrix
matrix = ratings_df.pivot(index='movieId',columns='userId',values='rating').fillna(0)
matrix

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193583,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193585,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193587,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
matrix_csr = csr_matrix(matrix.values)

In [12]:
#defining model
#using cosine similarity as metric to find similarity between two vectors
#using brute algorithm to traverse entire matrix and find similarity
model_knn = NearestNeighbors(metric='cosine',algorithm='brute',n_neighbors=20)


In [25]:
def recommender(name,model,data,n_recommendations):
    model.fit(data)
    #for greater matching accuracy
    idx = process.extractOne(name,movies_df['title'])[2]
    print('Movie Selected : {} MovieId:{}'.format(movies_df['title'][idx],idx))
    print('Searching for recommendations...')
    #we need to pass the entire row of the selected movie
    distances, indices = model.kneighbors(data[idx],n_neighbors=n_recommendations)
    for item in indices:
        print(movies_df['title'][item].where(item!=idx))

In [28]:
name1 = input("enter the movie name: ")
recommender(name1,model_knn,matrix_csr,20)

enter the movie name: jurassic park
Movie Selected : Jurassic Park (1993) MovieId:418
Searching for recommendations...
418                                                  NaN
507                    Terminator 2: Judgment Day (1991)
314                                  Forrest Gump (1994)
97                                     Braveheart (1995)
398                                 Fugitive, The (1993)
334                                         Speed (1994)
509                                        Batman (1989)
615                 Independence Day (a.k.a. ID4) (1996)
123                                     Apollo 13 (1995)
337                                     True Lies (1994)
322                                Lion King, The (1994)
506                                       Aladdin (1992)
325                                     Mask, The (1994)
436                                Mrs. Doubtfire (1993)
508                            Dances with Wolves (1990)
257                       