In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df_movie = pd.read_csv("movie.csv")
df_movie

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
27273,131254,Kein Bund für's Leben (2007),Comedy
27274,131256,"Feuer, Eis & Dosenbier (2002)",Comedy
27275,131258,The Pirates (2014),Adventure
27276,131260,Rentun Ruusu (2001),(no genres listed)


In [3]:
df_rating = pd.read_csv('rating.csv')
df_rating

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40
...,...,...,...,...
20000258,138493,68954,4.5,2009-11-13 15:42:00
20000259,138493,69526,4.5,2009-12-03 18:31:48
20000260,138493,69644,3.0,2009-12-07 18:10:57
20000261,138493,70286,5.0,2009-11-13 15:42:24


In [4]:
movie = df_movie.loc[:,{"movieId","title"}]
rating = df_rating.loc[:,{"userId","movieId","rating"}]

In [5]:
data = pd.merge(movie,rating)
data = data.iloc[:1000000,:]
user_movie_table = data.pivot_table(index = ["title"],columns = ["userId"],values = "rating").fillna(0)
user_movie_table

userId,1,2,3,4,5,6,7,8,10,11,...,138484,138485,138486,138487,138488,138489,138490,138491,138492,138493
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Ace Ventura: When Nature Calls (1995),0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,3.5,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
Across the Sea of Time (1995),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Amazing Panda Adventure, The (1995)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"American President, The (1995)",0.0,0.0,0.0,0.0,5.0,0.0,4.0,0.0,4.0,0.0,...,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Angela (1995),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
When Night Is Falling (1995),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"White Balloon, The (Badkonake sefid) (1995)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
White Squall (1996),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Wings of Courage (1995),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# We choose random movie.
query_index = np.random.choice(user_movie_table.shape[0])
print("movie id:",query_index)
print("Choosen Movie is: ",user_movie_table.index[query_index])

movie id: 111
Choosen Movie is:  Race the Sun (1996)


In [7]:
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

user_movie_table_matrix = csr_matrix(user_movie_table.values)

model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(user_movie_table_matrix)

distances, indices = model_knn.kneighbors(user_movie_table.iloc[query_index,:].values.reshape(1,-1), n_neighbors = 6)

In [8]:
print(distances)
print(indices)

[[1.11022302e-15 8.43329281e-01 8.84002523e-01 8.95768624e-01
  9.03765385e-01 9.05033131e-01]]
[[111  74  17 125 119 103]]


In [9]:
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'.format(user_movie_table.index[query_index]))
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, user_movie_table.index[indices.flatten()[i]], distances.flatten()[i]))

Recommendations for Race the Sun (1996):

1: It Takes Two (1995), with distance of 0.8433292813516989:
2: Big Green, The (1995), with distance of 0.884002523194305:
3: Steal Big, Steal Little (1995), with distance of 0.895768623956341:
4: Shadows (Cienie) (1988), with distance of 0.9037653851226727:
5: Nueba Yol (1995), with distance of 0.9050331311078893:
