## **Movies Recommendation: Neighborhood-Based Collaborative filtering**

### **Import necessary library**

In [1]:
import pandas as pd 
import numpy as np 
from utilities import *
from collaborative_filtering import NeighborhoodCF, build_utility_matrix, eval

In [2]:
train_data = pd.read_csv('resources/data/train_val_test/ratings_train.csv', header=None, names=['UserID', 'MovieID', 'Rating'])
test_data = pd.read_csv('resources/data/train_val_test/ratings_test.csv', header=None, names=['UserID', 'MovieID', 'Rating'])
val_data = pd.read_csv('resources/data/train_val_test/ratings_val.csv', header=None, names=['UserID', 'MovieID', 'Rating'])

In [3]:
train_data = pd.concat([train_data, val_data], ignore_index=True)
train_data = train_data.reset_index()
train_data = train_data.drop(columns=['index'])

In [4]:
train_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,36527471,1659337,8
1,65089494,105695,10
2,23249666,1628064,9
3,7776823,110413,10
4,122607281,2948372,6


In [5]:
train_data.tail()

Unnamed: 0,UserID,MovieID,Rating
4161861,77954315,113870,7
4161862,4580541,91877,6
4161863,25506300,1877832,9
4161864,27367887,435761,8
4161865,364025,185014,9


In [6]:
num_users = train_data.UserID.unique().shape[0]
num_movies = train_data.MovieID.unique().shape[0]
num_users, num_movies

(9130, 9814)

In [7]:
test_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,45430544,83798,7
1,34807113,1250777,9
2,67646271,80120,4
3,51265424,1490017,8
4,51235794,11813216,8


In [8]:
test_data.tail()

Unnamed: 0,UserID,MovieID,Rating
462425,30899304,1136608,9
462426,22328566,108783,10
462427,26159893,333780,7
462428,35725947,3569230,6
462429,23018536,1232829,8


In [9]:
num_users = test_data.UserID.unique().shape[0]
num_movies = test_data.MovieID.unique().shape[0]
num_users, num_movies

(9130, 9814)

In [10]:
movies = train_data.MovieID.drop_duplicates()
movies = pd.DataFrame(movies, columns=['MovieID'])
movies = movies.sort_values('MovieID', ascending=True)
movies = movies[['MovieID']]
movies = movies.reset_index()

In [11]:
users = train_data.UserID.drop_duplicates()
users = pd.DataFrame(users, columns=['UserID'])
users = users.sort_values('UserID', ascending=True)
users = users[['UserID']]
users = users.reset_index()

### **Build Utility Matrix**

In [12]:
utility_matrix = build_utility_matrix(train_data)
utility_matrix.shape

(9130, 9814)

## **User-User CF**

In [13]:
UUCF_cosine = NeighborhoodCF(utility_matrix=utility_matrix, k_neighbors=50)

In [14]:
cosine_uu_predicted_ratings, length = UUCF_cosine.predict_ratings()
length

9130

In [15]:
eval(
    cosine_uu_predicted_ratings,
    test_data = test_data,
    users=users,
    movies=movies
)

{'RMSE': 1.670311975492313,
 'Precision': 0.9069937963895703,
 'Recall': 0.6937897725803243,
 'F1': 0.7861937515185968}

In [16]:
recommendation = UUCF_cosine.recommend_using_predicted_ratings(id=685, predicted_ratings=cosine_uu_predicted_ratings, movies=movies, users=users, top_n=25)
print('Recommend movies for user 685 and predicted ratings:')
recommendation

Recommend movies for user 685 and predicted ratings:


[(50083, 9),
 (54215, 9),
 (60196, 9),
 (62622, 9),
 (73486, 9),
 (75314, 8),
 (76759, 8),
 (78748, 8),
 (78788, 8),
 (80684, 8),
 (81398, 8),
 (99685, 8),
 (111161, 8),
 (114709, 8),
 (120737, 8),
 (133093, 8),
 (167261, 8),
 (338013, 8),
 (425112, 8),
 (469494, 8),
 (1856101, 8),
 (2024544, 8),
 (2267998, 8),
 (2278388, 8),
 (2562232, 8)]

In [17]:
recommendation = UUCF_cosine.recommend(id=685, movies=movies, users=users, top_n=25)
print('Recommend movies for user 685 and predicted ratings:')
recommendation

Recommend movies for user 685 and predicted ratings:


[(50083, 9),
 (54215, 9),
 (60196, 9),
 (62622, 9),
 (73486, 9),
 (75314, 8),
 (76759, 8),
 (78748, 8),
 (78788, 8),
 (80684, 8),
 (81398, 8),
 (99685, 8),
 (111161, 8),
 (114709, 8),
 (120737, 8),
 (133093, 8),
 (167261, 8),
 (338013, 8),
 (425112, 8),
 (469494, 8),
 (1856101, 8),
 (2024544, 8),
 (2267998, 8),
 (2278388, 8),
 (2562232, 8)]

In [18]:
# UUCF_cosine.print_recommendation(predicted_ratings=cosine_uu_predicted_ratings, movies=movies, users=users)

## **Item-Item CF**

In [19]:
IICF_cosine = NeighborhoodCF(utility_matrix=utility_matrix, k_neighbors=30, uu_cf=False)

In [20]:
cosine_ii_predicted_ratings, length = IICF_cosine.predict_ratings()
length

9814

In [21]:
eval(
    cosine_ii_predicted_ratings,
    test_data = test_data,
    users=users,
    movies=movies
)

{'RMSE': 1.6331672788697844,
 'Precision': 0.9099554346344821,
 'Recall': 0.6994871012219047,
 'F1': 0.7909596957121661}

In [22]:
recommendation = IICF_cosine.recommend_using_predicted_ratings(id=105695, predicted_ratings=cosine_ii_predicted_ratings, movies=movies, users=users, top_n=25)
print('Recommend users for movie 105695 and predicted ratings:')
recommendation

Recommend users for movie 105695 and predicted ratings:


[(278097, 10),
 (1799755, 10),
 (7288474, 10),
 (17732486, 10),
 (22862821, 10),
 (28257047, 10),
 (34767049, 10),
 (35589838, 10),
 (37989341, 10),
 (38428110, 10),
 (47584221, 10),
 (52234690, 10),
 (61246379, 10),
 (62100914, 10),
 (78181837, 10),
 (79319221, 10),
 (82574739, 10),
 (85470785, 10),
 (88179967, 10),
 (119155254, 10),
 (123990191, 10),
 (134418584, 10),
 (160672695, 10),
 (168830426, 10),
 (170780184, 10)]

In [23]:
recommendation = IICF_cosine.recommend(id=105695, movies=movies, users=users, top_n=25)
print('Recommend users for movie 105695 and predicted ratings:')
recommendation

Recommend users for movie 105695 and predicted ratings:


[(278097, 10),
 (1799755, 10),
 (7288474, 10),
 (17732486, 10),
 (22862821, 10),
 (28257047, 10),
 (34767049, 10),
 (35589838, 10),
 (37989341, 10),
 (38428110, 10),
 (47584221, 10),
 (52234690, 10),
 (61246379, 10),
 (62100914, 10),
 (78181837, 10),
 (79319221, 10),
 (82574739, 10),
 (85470785, 10),
 (88179967, 10),
 (119155254, 10),
 (123990191, 10),
 (134418584, 10),
 (160672695, 10),
 (168830426, 10),
 (170780184, 10)]

In [24]:
# IICF_cosine.print_recommendation(predicted_ratings=cosine_ii_predicted_ratings, movies=movies, users=users)