## **Hyperparemeter Tuning: Matrix Factorized Collaborative Filtering**

### **Import necessary library**

In [3]:
import pandas as pd 
import numpy as np 
import math 
from collaborative_filtering import build_utility_matrix, mf_hyperparameter_tuning

In [4]:
train_data = pd.read_csv('resources/data/train_val_test/ratings_train.csv', header=None, names=['UserID', 'MovieID', 'Rating'])
test_data = pd.read_csv('resources/data/train_val_test/ratings_test.csv', header=None, names=['UserID', 'MovieID', 'Rating'])
val_data = pd.read_csv('resources/data/train_val_test/ratings_val.csv', header=None, names=['UserID', 'MovieID', 'Rating'])

In [5]:
train_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,36527471,1659337,8
1,65089494,105695,10
2,23249666,1628064,9
3,7776823,110413,10
4,122607281,2948372,6


In [6]:
train_data.tail()

Unnamed: 0,UserID,MovieID,Rating
3699431,58251797,4729430,8
3699432,93006392,2545428,8
3699433,18815933,2537176,6
3699434,28443655,414982,3
3699435,1067456,108399,10


In [7]:
num_users = train_data.UserID.unique().shape[0]
num_movies = train_data.MovieID.unique().shape[0]
num_users, num_movies

(9130, 9814)

In [8]:
test_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,45430544,83798,7
1,34807113,1250777,9
2,67646271,80120,4
3,51265424,1490017,8
4,51235794,11813216,8


In [9]:
test_data.tail()

Unnamed: 0,UserID,MovieID,Rating
462425,30899304,1136608,9
462426,22328566,108783,10
462427,26159893,333780,7
462428,35725947,3569230,6
462429,23018536,1232829,8


In [10]:
num_users = test_data.UserID.unique().shape[0]
num_movies = test_data.MovieID.unique().shape[0]
num_users, num_movies

(9130, 9814)

In [11]:
val_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,131612777,891592,2
1,23320225,2382320,7
2,28071275,45758,3
3,23856336,68699,10
4,47196478,95327,7


In [12]:
val_data.tail()

Unnamed: 0,UserID,MovieID,Rating
462425,77954315,113870,7
462426,4580541,91877,6
462427,25506300,1877832,9
462428,27367887,435761,8
462429,364025,185014,9


In [13]:
num_users = val_data.UserID.unique().shape[0]
num_movies = val_data.MovieID.unique().shape[0]
num_users, num_movies

(9130, 9814)

In [14]:
movies = train_data.MovieID.drop_duplicates()
movies = pd.DataFrame(movies, columns=['MovieID'])
movies = movies.sort_values('MovieID', ascending=True)
movies = movies[['MovieID']]
movies = movies.reset_index()

In [15]:
movie_ids = movies.MovieID.tolist()

In [16]:
users = train_data.UserID.drop_duplicates()
users = pd.DataFrame(users, columns=['UserID'])
users = users.sort_values('UserID', ascending=True)
users = users[['UserID']]
users = users.reset_index()

In [17]:
user_ids = users.UserID.tolist()

### **Build Utility Matrix**

In [18]:
train_R = build_utility_matrix(train_data)
train_R

array([[0., 0., 7., ..., 0., 0., 0.],
       [0., 0., 0., ..., 7., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [19]:
val_R = build_utility_matrix(val_data)

### **Defining the Hyperparameter Grid**

In [20]:
from itertools import product

# Define the hyperparameter grid
K_values = [30, 35, 40]
learning_rates = [0.0005, 0.001, 0.005]
regularization_values = [0.05, 0.1]
epochs_values = [10]

# Create all possible combinations of hyperparameters
hyperparameter_combinations = list(product(K_values, learning_rates, regularization_values, epochs_values))


### **Evaluating each combination**

In [21]:
results_df = mf_hyperparameter_tuning(
    train_R=train_R,
    hyperparameter_combinations=hyperparameter_combinations,
    val_R=val_R
)

Evaluating combination 1/18: K=30, lr=0.0005, reg=0.05, epochs=10
Epoch: 1 - Train RMSE: 1.9154, Validation RMSE: 1.8240
Epoch: 2 - Train RMSE: 1.7925, Validation RMSE: 1.7511
Epoch: 3 - Train RMSE: 1.7415, Validation RMSE: 1.7120
Epoch: 4 - Train RMSE: 1.7115, Validation RMSE: 1.6867
Epoch: 5 - Train RMSE: 1.6910, Validation RMSE: 1.6688
Epoch: 6 - Train RMSE: 1.6761, Validation RMSE: 1.6554
Epoch: 7 - Train RMSE: 1.6647, Validation RMSE: 1.6449
Epoch: 8 - Train RMSE: 1.6558, Validation RMSE: 1.6366
Epoch: 9 - Train RMSE: 1.6485, Validation RMSE: 1.6297
Epoch: 10 - Train RMSE: 1.6426, Validation RMSE: 1.6241
Evaluating combination 2/18: K=30, lr=0.0005, reg=0.1, epochs=10
Epoch: 1 - Train RMSE: 1.9190, Validation RMSE: 1.8260
Epoch: 2 - Train RMSE: 1.8014, Validation RMSE: 1.7540
Epoch: 3 - Train RMSE: 1.7536, Validation RMSE: 1.7153
Epoch: 4 - Train RMSE: 1.7254, Validation RMSE: 1.6903
Epoch: 5 - Train RMSE: 1.7064, Validation RMSE: 1.6725
Epoch: 6 - Train RMSE: 1.6926, Validation R

In [22]:
# Find the combination with the lowest validation RMSE
best_result = results_df.loc[results_df['validation_RMSE'].idxmin()]
print("Best Hyperparameters:")
print(best_result)

Best Hyperparameters:
K                  30.000000
learning_rate       0.005000
regularization      0.050000
epochs             10.000000
validation_RMSE     1.475371
Name: 4, dtype: float64
