## **Hyperparameter Tuning: Neighborhood-Based Collaborative Filtering**

### **Import necessary library**

In [3]:
import pandas as pd 
import numpy as np 
import math 
from collaborative_filtering import build_utility_matrix, neighborhood_hyperparameter_tuning

In [4]:
train_data = pd.read_csv('resources/data/train_val_test/ratings_train.csv', header=None, names=['UserID', 'MovieID', 'Rating'])
test_data = pd.read_csv('resources/data/train_val_test/ratings_test.csv', header=None, names=['UserID', 'MovieID', 'Rating'])
val_data = pd.read_csv('resources/data/train_val_test/ratings_val.csv', header=None, names=['UserID', 'MovieID', 'Rating'])

In [5]:
train_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,36527471,1659337,8
1,65089494,105695,10
2,23249666,1628064,9
3,7776823,110413,10
4,122607281,2948372,6


In [6]:
train_data.tail()

Unnamed: 0,UserID,MovieID,Rating
3699431,58251797,4729430,8
3699432,93006392,2545428,8
3699433,18815933,2537176,6
3699434,28443655,414982,3
3699435,1067456,108399,10


In [7]:
num_users = train_data.UserID.unique().shape[0]
num_movies = train_data.MovieID.unique().shape[0]
num_users, num_movies

(9130, 9814)

In [8]:
test_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,45430544,83798,7
1,34807113,1250777,9
2,67646271,80120,4
3,51265424,1490017,8
4,51235794,11813216,8


In [9]:
test_data.tail()

Unnamed: 0,UserID,MovieID,Rating
462425,30899304,1136608,9
462426,22328566,108783,10
462427,26159893,333780,7
462428,35725947,3569230,6
462429,23018536,1232829,8


In [10]:
num_users = test_data.UserID.unique().shape[0]
num_movies = test_data.MovieID.unique().shape[0]
num_users, num_movies

(9130, 9814)

In [11]:
val_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,131612777,891592,2
1,23320225,2382320,7
2,28071275,45758,3
3,23856336,68699,10
4,47196478,95327,7


In [12]:
val_data.tail()

Unnamed: 0,UserID,MovieID,Rating
462425,77954315,113870,7
462426,4580541,91877,6
462427,25506300,1877832,9
462428,27367887,435761,8
462429,364025,185014,9


In [13]:
num_users = val_data.UserID.unique().shape[0]
num_movies = val_data.MovieID.unique().shape[0]
num_users, num_movies

(9130, 9814)

In [14]:
movies = train_data.MovieID.drop_duplicates()
movies = pd.DataFrame(movies, columns=['MovieID'])
movies = movies.sort_values('MovieID', ascending=True)
movies = movies[['MovieID']]
movies = movies.reset_index()

In [15]:
users = train_data.UserID.drop_duplicates()
users = pd.DataFrame(users, columns=['UserID'])
users = users.sort_values('UserID', ascending=True)
users = users[['UserID']]
users = users.reset_index()

### **Build Utility Matrix**

In [16]:
utility_matrix = build_utility_matrix(train_data)
utility_matrix

array([[0., 0., 7., ..., 0., 0., 0.],
       [0., 0., 0., ..., 7., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

### **Defining the Hyperparameter Grid**

In [17]:
from itertools import product

# Define the hyperparameter grid for User-User CF
k_neighbors = [10, 15, 20, 25, 30, 35, 40, 45, 50]
uu_cf = [True] 
cosine = [True, False]

# Create all possible combinations of hyperparameters
uu_hyperparameter_combinations = list(product(k_neighbors, uu_cf, cosine))


In [18]:
from itertools import product

# Define the hyperparameter grid for User-User CF
k_neighbors = [10, 15, 20, 25, 30, 35, 40, 45, 50]
uu_cf = [False] 
cosine = [True, False]

# Create all possible combinations of hyperparameters
ii_hyperparameter_combinations = list(product(k_neighbors, uu_cf, cosine))

### **Evaluating each combination**

In [19]:
uu_results_df = neighborhood_hyperparameter_tuning(
    R=utility_matrix,
    hyperparameter_combinations=uu_hyperparameter_combinations,
    validation_ratio=0.2
)

Evaluating combination 1/18: k_neighbors=10, uu_cf=True, cosine=True
Evaluating combination 2/18: k_neighbors=10, uu_cf=True, cosine=False


  c /= stddev[:, None]
  c /= stddev[None, :]


Evaluating combination 3/18: k_neighbors=15, uu_cf=True, cosine=True
Evaluating combination 4/18: k_neighbors=15, uu_cf=True, cosine=False


  c /= stddev[:, None]
  c /= stddev[None, :]


Evaluating combination 5/18: k_neighbors=20, uu_cf=True, cosine=True
Evaluating combination 6/18: k_neighbors=20, uu_cf=True, cosine=False


  c /= stddev[:, None]
  c /= stddev[None, :]


Evaluating combination 7/18: k_neighbors=25, uu_cf=True, cosine=True
Evaluating combination 8/18: k_neighbors=25, uu_cf=True, cosine=False


  c /= stddev[:, None]
  c /= stddev[None, :]


Evaluating combination 9/18: k_neighbors=30, uu_cf=True, cosine=True
Evaluating combination 10/18: k_neighbors=30, uu_cf=True, cosine=False


  c /= stddev[:, None]
  c /= stddev[None, :]


Evaluating combination 11/18: k_neighbors=35, uu_cf=True, cosine=True
Evaluating combination 12/18: k_neighbors=35, uu_cf=True, cosine=False


  c /= stddev[:, None]
  c /= stddev[None, :]


Evaluating combination 13/18: k_neighbors=40, uu_cf=True, cosine=True
Evaluating combination 14/18: k_neighbors=40, uu_cf=True, cosine=False


  c /= stddev[:, None]
  c /= stddev[None, :]


Evaluating combination 15/18: k_neighbors=45, uu_cf=True, cosine=True
Evaluating combination 16/18: k_neighbors=45, uu_cf=True, cosine=False


  c /= stddev[:, None]
  c /= stddev[None, :]


Evaluating combination 17/18: k_neighbors=50, uu_cf=True, cosine=True
Evaluating combination 18/18: k_neighbors=50, uu_cf=True, cosine=False


  c /= stddev[:, None]
  c /= stddev[None, :]


In [20]:
# Find the combination with the lowest validation RMSE
best_result_uu = uu_results_df.loc[uu_results_df['rmse'].idxmin()]
print("Best Hyperparameters for User-User CF:")
print(best_result_uu)

Best Hyperparameters for User-User CF:
k_neighbors          50
uu_cf              True
cosine             True
rmse           1.720172
Name: 16, dtype: object


In [21]:
ii_results_df = neighborhood_hyperparameter_tuning(
    R=utility_matrix,
    hyperparameter_combinations=ii_hyperparameter_combinations,
    validation_ratio=0.2
)

Evaluating combination 1/18: k_neighbors=10, uu_cf=False, cosine=True
Evaluating combination 2/18: k_neighbors=10, uu_cf=False, cosine=False
Evaluating combination 3/18: k_neighbors=15, uu_cf=False, cosine=True
Evaluating combination 4/18: k_neighbors=15, uu_cf=False, cosine=False
Evaluating combination 5/18: k_neighbors=20, uu_cf=False, cosine=True
Evaluating combination 6/18: k_neighbors=20, uu_cf=False, cosine=False
Evaluating combination 7/18: k_neighbors=25, uu_cf=False, cosine=True
Evaluating combination 8/18: k_neighbors=25, uu_cf=False, cosine=False
Evaluating combination 9/18: k_neighbors=30, uu_cf=False, cosine=True
Evaluating combination 10/18: k_neighbors=30, uu_cf=False, cosine=False
Evaluating combination 11/18: k_neighbors=35, uu_cf=False, cosine=True
Evaluating combination 12/18: k_neighbors=35, uu_cf=False, cosine=False
Evaluating combination 13/18: k_neighbors=40, uu_cf=False, cosine=True
Evaluating combination 14/18: k_neighbors=40, uu_cf=False, cosine=False
Evaluati

In [22]:
# Find the combination with the lowest validation RMSE
best_result_ii = ii_results_df.loc[ii_results_df['rmse'].idxmin()]
print("Best Hyperparameters for Item-Item CF:")
print(best_result_ii)

Best Hyperparameters for Item-Item CF:
k_neighbors          30
uu_cf             False
cosine             True
rmse           1.695939
Name: 8, dtype: object
