###**Import Library**

In [6]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

###**Read Data**

In [7]:
df_movies = pd.read_csv('movies.csv', usecols=['movieId','title'], dtype={'movieId':'int32','title':'str'})
df_ratings = pd.read_csv('ratings.csv', usecols=['userId','movieId','rating'],dtype={'userId':'int32','movieId':'int32','rating':'float32'})

###**EDA**

In [8]:
df_movies.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [9]:
df_ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [10]:
df_movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9742 entries, 0 to 9741
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  9742 non-null   int32 
 1   title    9742 non-null   object
dtypes: int32(1), object(1)
memory usage: 114.3+ KB


In [11]:
df_ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 3 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   userId   100836 non-null  int32  
 1   movieId  100836 non-null  int32  
 2   rating   100836 non-null  float32
dtypes: float32(1), int32(2)
memory usage: 1.2 MB


In [12]:
# sparse matrix
movies_users = df_ratings.pivot(index='movieId', columns='userId',values='rating').fillna(0)
mat_movies_users = csr_matrix(movies_users.values)

###**Model Building**

In [13]:
# KNN
model_knn= NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20)
model_knn.fit(mat_movies_users)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=20, p=2,
                 radius=1.0)

###**Recommendation System**

In [18]:
from fuzzywuzzy import process
def recommender(movie_name, data,model, n_recommendations ):
    model.fit(data)
    idx=process.extractOne(movie_name, df_movies['title'])[2]
    print('Movie Selected: ',df_movies['title'][idx], 'Index: ',idx)
    print('Searching for recommendations.....')
    distances, indices=model.kneighbors(data[idx], n_neighbors=n_recommendations)
    for i in indices:
        print(df_movies['title'][i].where(i!=idx))



In [19]:
recommender('iron man', mat_movies_users, model_knn,20)

Movie Selected:  Iron Man (2008) Index:  6743
Searching for recommendations.....
6743                                            NaN
7197                                  Garage (2007)
7195                        Merry Madagascar (2009)
7354                             A-Team, The (2010)
6726                         Superhero Movie (2008)
7137                         Thirst (Bakjwi) (2009)
7026                                 Scorpio (1973)
7571                                 Win Win (2011)
3880                  Look Who's Talking Now (1993)
6388    After the Wedding (Efter brylluppet) (2006)
7601                       Idiots and Angels (2008)
6755                Nina's Heavenly Delights (2006)
7022                                   Earth (2007)
7338                          Blue Valentine (2010)
4421                         What's Up, Doc? (1972)
6195                             Silent Hill (2006)
5885                                   Crash (2004)
3740                              M