In [25]:
import numpy as np 
import pandas as pd 
import os
from scipy.sparse import csr_matrix

In [26]:
pip install fuzzywuzzy



In [27]:
pip install python-Levenshtein



In [28]:
from fuzzywuzzy import fuzz

In [29]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [30]:
path = '/content/drive/MyDrive/Datasets/movies.csv'
movies = pd.read_csv(path)

In [31]:
path1 = '/content/drive/MyDrive/Datasets/ratings.csv'
ratings = pd.read_csv(path1)

In [32]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [33]:
ratings.drop('timestamp',axis=1)

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0
...,...,...,...
100831,610,166534,4.0
100832,610,168248,5.0
100833,610,168250,5.0
100834,610,168252,5.0


In [34]:
df_movie = ratings.pivot(index='movieId',columns='userId',values='rating').fillna(0)

In [35]:
df_movie.head(5)

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
movie_mat_sparse = csr_matrix(df_movie.values)

In [37]:
from sklearn.neighbors import NearestNeighbors
knn_model = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)

In [38]:
df_ratings_cnt = pd.DataFrame(ratings.groupby('rating').size(), columns=['count'])
df_ratings_cnt

Unnamed: 0_level_0,count
rating,Unnamed: 1_level_1
0.5,1370
1.0,2811
1.5,1791
2.0,7551
2.5,5550
3.0,20047
3.5,13136
4.0,26818
4.5,8551
5.0,13211


In [39]:
df_movies_cnt_1 = pd.DataFrame(ratings.groupby('movieId').size(), columns=['count'])
df_movies_cnt_1.head()

Unnamed: 0_level_0,count
movieId,Unnamed: 1_level_1
1,215
2,110
3,52
4,7
5,49


In [40]:
movie_to_idx = {
    movie: i for i, movie in 
    enumerate(list(movies.set_index('movieId').loc[df_movie.index].title))
}

In [41]:
knn_model.fit(movie_mat_sparse)

NearestNeighbors(algorithm='brute', metric='cosine', n_jobs=-1, n_neighbors=20)

In [42]:
def matching(map, movie, verbose=True):
    match_tuple = []
    for title, idx in map.items():
        ratio = fuzz.ratio(title.lower(), movie.lower())
        if ratio >= 60:
            match_tuple.append((title, idx, ratio))

    match_tuple = sorted(match_tuple, key=lambda x: x[2])[::-1]
    if not match_tuple:
        print('Oops! No match')
        return
    if verbose:
        print('Possible matches: {0}\n'.format([x[0] for x in match_tuple]))
    return match_tuple[0][1]

In [43]:
def make_recommendation(model, data, map, movie, n_recommend):
    model.fit(data)
    print('Your movie:', movie)
    idx = matching(map, movie, verbose=True)
    
    distances, indices = model.kneighbors(data[idx], n_neighbors=n_recommend+1)
    
    recommends = \
        sorted(list(zip(indices.squeeze().tolist(), distances.squeeze().tolist())), key=lambda x: x[1])[:0:-1]
    rev_map = {v: k for k, v in map.items()}
   
    print('Recommendations for {}:'.format(movie))
    for i, (idx, dist) in enumerate(recommends):
        print('{0}: {1}, with distance of {2}'.format(i+1, rev_map[idx], dist))

In [46]:
my_movie = 'Happy Gilmore'

make_recommendation(
    model=knn_model,
    data=movie_mat_sparse,
    movie=my_movie,
    map=movie_to_idx,
    n_recommend=5)

Your movie: Happy Gilmore
Possible matches: ['Happy Gilmore (1996)']

Recommendations for Happy Gilmore:
1: Wayne's World (1992), with distance of 0.5176091794637476
2: Dumb & Dumber (Dumb and Dumber) (1994), with distance of 0.5011943873328781
3: Nutty Professor, The (1996), with distance of 0.4985457733989128
4: American Pie (1999), with distance of 0.48721511308283727
5: Billy Madison (1995), with distance of 0.45205007558397514
