In [51]:
import pandas as pd


In [52]:
# Read the data set
ratings=pd.read_csv('/content/toy_dataset.csv', index_col=0)
ratings

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
user 1,4.0,5.0,3.0,,2.0,1.0
user 2,5.0,3.0,3.0,2.0,2.0,
user 3,1.0,,,4.0,5.0,4.0
user 4,,2.0,1.0,4.0,,3.0
user 5,1.0,,2.0,3.0,3.0,4.0


In [53]:
# Replace NaN values with 0
ratings=ratings.fillna(0)
ratings

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
user 1,4.0,5.0,3.0,0.0,2.0,1.0
user 2,5.0,3.0,3.0,2.0,2.0,0.0
user 3,1.0,0.0,0.0,4.0,5.0,4.0
user 4,0.0,2.0,1.0,4.0,0.0,3.0
user 5,1.0,0.0,2.0,3.0,3.0,4.0


In [54]:
# Transpose the matrix
ratings=ratings.T
ratings

Unnamed: 0,user 1,user 2,user 3,user 4,user 5
action1,4.0,5.0,1.0,0.0,1.0
action2,5.0,3.0,0.0,2.0,0.0
action3,3.0,3.0,0.0,1.0,2.0
romantic1,0.0,2.0,4.0,4.0,3.0
romantic2,2.0,2.0,5.0,0.0,3.0
romantic3,1.0,0.0,4.0,3.0,4.0


In [55]:
# Use K-Nearest Neighbors (KNN) algorithm with cosine distance measurement
from sklearn.neighbors import NearestNeighbors
knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(ratings.values)
distances, indices = knn.kneighbors(ratings.values, n_neighbors=3)

In [56]:
# Show distances
distances

array([[0.00000000e+00, 7.78536489e-02, 1.34151282e-01],
       [0.00000000e+00, 1.20536881e-01, 1.34151282e-01],
       [0.00000000e+00, 7.78536489e-02, 1.20536881e-01],
       [0.00000000e+00, 7.99125875e-02, 2.40927885e-01],
       [1.11022302e-16, 1.90476190e-01, 2.40927885e-01],
       [0.00000000e+00, 7.99125875e-02, 1.90476190e-01]])

In [57]:
# Show indices
indices

array([[0, 2, 1],
       [1, 2, 0],
       [2, 0, 1],
       [3, 5, 4],
       [4, 5, 3],
       [5, 3, 4]])

In [58]:
ratings1 = ratings.copy()

n_neighbors=3

user_index = ratings.columns.tolist().index('user 3')

for m,n in list(enumerate(ratings.index)):

  # find movies without ratings by user 3 (0 rating)
  if ratings.iloc[m, user_index] == 0:
    sim_movies = indices[m].tolist()
    movie_distances = distances[m].tolist()
    
    # The neighrest neighbors list often includes the movie m itself. So, we remove it to have the nearest neighbors only. 
    if m in sim_movies:
      id_movie = sim_movies.index(m)
      sim_movies.remove(m)
      movie_distances.pop(id_movie) 

    # Sometimes, there are many zero distances and hence, the list does not include the movie m itself. 
    # Therefore, we remove the farthest movie in the list. 
    else:
      sim_movies = sim_movies[:n_neighbors-1]
      movie_distances = movie_distances[:n_neighbors-1]
        
    # The cosine similiarity distance in KNN is '1-mathematical computation of cosine distance'. Thereofre, '1-cosine distance in KNN' is 
    #computed to get the cosine distance according to its mathematical expression.    
    movie_similarity = [1-x for x in movie_distances]
    movie_similarity2 = movie_similarity.copy()
    sum_ratings = 0

    # for each similar movie
    for s in range(0, len(movie_similarity)):
      
      # check if the rating of a similar movie is zero
      if ratings.iloc[sim_movies[s], user_index] == 0:

        # ignore the rating 
        movie_similarity2.pop(s)
      else:
        sum_ratings = sum_ratings + movie_similarity[s]*ratings.iloc[sim_movies[s],user_index]

    # check if the number of the ratings with non-zero is positive
    if len(movie_similarity2) > 0:
      
      # check if the sum of the ratings of the similar movies is positive.
      if sum(movie_similarity2) > 0:
        predicted_rate = sum_ratings/sum(movie_similarity2)

      else:
        predicted_rate = 0

    else:
      predicted_rate = 0

    ratings1.iloc[m,user_index] = predicted_rate

In [59]:
# Show the ratings
ratings1

Unnamed: 0,user 1,user 2,user 3,user 4,user 5
action1,4.0,5.0,1.0,0.0,1.0
action2,5.0,3.0,1.0,2.0,0.0
action3,3.0,3.0,1.0,1.0,2.0
romantic1,0.0,2.0,4.0,4.0,3.0
romantic2,2.0,2.0,5.0,0.0,3.0
romantic3,1.0,0.0,4.0,3.0,4.0


In [69]:
# Create the movie recommender
def recommend_movies(user, num_movies):

  print('The list of the movies that {} has watched:'.format(user))

  for m in ratings[ratings[user] > 0][user].index.tolist():
    print(m)
  
  recommended_movies = []

  for m in ratings[ratings[user] == 0].index.tolist():

    index_df = ratings.index.tolist().index(m)
    predicted_rating = ratings1.iloc[index_df, ratings1.columns.tolist().index(user)]
    recommended_movies.append((m, predicted_rating))

  sorted_movies = sorted(recommended_movies, key=lambda x:x[1], reverse=True)
  print('')
  print('The list of the recommended movies:')
  rank = 1
  for recommended_movie in sorted_movies[:num_movies]:
    
    print('Recommendation {}: {} (Predicted Rating: {})'.format(rank, recommended_movie[0], recommended_movie[1]))
    rank = rank + 1

In [70]:
# Recommend 2 movies to user 3
recommend_movies('user 3', 2)

The list of the movies that user 3 has watched:
action1
romantic1
romantic2
romantic3

The list of the recommended movies:
Recommendation 1: action2 (Predicted Rating: 1.0)
Recommendation 2: action3 (Predicted Rating: 1.0)
