## **Hyperparemeter Tuning: Matrix Factorized Collaborative Filtering**

### **Import necessary library**

In [1]:
import pandas as pd 
import numpy as np 
import math 
from collaborative_filtering import MatrixFactorizationCF, build_utility_matrix, hyperparameter_tuning, train_validation_split

In [2]:
# import pandas as pd

# part_ids = [2, 3, 4, 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71]

# # Read the first file
# data = pd.read_csv('resources/ratings/users_ratings_part1.csv', header=None)

# # Loop through other files
# for id in part_ids:
#     file_path = f'resources/ratings/users_ratings_part{id}.csv'
#     try:
#         tmp = pd.read_csv(file_path, on_bad_lines="skip", header=None)  # Skip problematic rows
#         data = pd.concat([data, tmp], ignore_index=True)
#     except Exception as e:
#         print(f"Error reading file {file_path}: {e}")

# data.columns =['UserID', 'MovieID', 'Rating']

data = pd.read_csv('resources/ratings/users_ratings_part1.csv', header=None)
data.columns =['UserID', 'MovieID', 'Rating']

In [3]:
data = data.sort_values(by=['UserID', 'MovieID'])
data = data.drop_duplicates()
data = data.reset_index()
data = data.drop(columns=['index'])

In [4]:
data.head()

Unnamed: 0,UserID,MovieID,Rating
0,9423,27977,8
1,9423,31381,9
2,9423,32138,10
3,9423,33467,10
4,9423,38650,10


In [5]:
data.tail()

Unnamed: 0,UserID,MovieID,Rating
54972,186180127,468569,10
54973,186180127,1255953,10
54974,186180127,9362722,10
54975,186180127,11329280,10
54976,186180127,15239678,10


In [6]:
# Count the number of interactions for each movie
movie_counts =data['MovieID'].value_counts()

# Filter out movies appear < 10 times 
movies_to_keep = movie_counts[movie_counts >= 10].index
data = data[data['MovieID'].isin(movies_to_keep)]

# Count the number of interactions for each user
user_counts = data['UserID'].value_counts()

# Filter out users appear < 10 times 
users_to_keep = user_counts[user_counts >= 10].index 
data = data[data['UserID'].isin(users_to_keep)]

In [7]:
num_users = data.UserID.unique().shape[0]
num_movies = data.MovieID.unique().shape[0]
num_users, num_movies

(267, 910)

### **Split data**

In [8]:
def split_data(df, test_size=0.2, random_state=42):
    np.random.seed(random_state)
    
    # Preprocessing Step: Remove movies with frequency = 1
    movie_counts = df['MovieID'].value_counts()
    movies_to_keep = movie_counts[movie_counts > 1].index
    df = df[df['MovieID'].isin(movies_to_keep)].reset_index(drop=True)
    
    # Initialize sets to keep track of test indices
    test_indices = set()
    
    # Ensure each user has at least one entry in test set
    for user_id, group in df.groupby('UserID'):
        if len(group) >= 1:
            test_count = max(1, int(len(group) * test_size))
            test_sample = np.random.choice(group.index, size=test_count, replace=False)
            test_indices.update(test_sample)
    
    # Ensure each movie has at least one entry in test set
    for movie_id, group in df.groupby('MovieID'):
        # Find indices not already in test set
        remaining_indices = list(set(group.index) - test_indices)
        if remaining_indices:
            test_sample = np.random.choice(remaining_indices, size=1, replace=False)
            test_indices.update(test_sample)
    
    # Create test and train datasets
    test_df = df.loc[list(test_indices)].reset_index(drop=True)
    train_df = df.drop(list(test_indices)).reset_index(drop=True)
    
    # Optional: Verify that both train and test have the same users and movies
    common_users = set(train_df['UserID']).intersection(set(test_df['UserID']))
    common_movies = set(train_df['MovieID']).intersection(set(test_df['MovieID']))
    
    train_df = train_df[train_df['UserID'].isin(common_users) & train_df['MovieID'].isin(common_movies)].reset_index(drop=True)
    test_df = test_df[test_df['UserID'].isin(common_users) & test_df['MovieID'].isin(common_movies)].reset_index(drop=True)
    
    return train_df, test_df

# Example usage:
# Assuming your dataframe is named `df` and has columns 'UserID' and 'MovieID'
# train, test = split_data(df, test_size=0.2, random_state=42)


In [9]:
train_data, test_data = split_data(data)

In [10]:
train_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,9423,27977,8
1,9423,31381,9
2,9423,32138,10
3,9423,33467,10
4,9423,38650,10


In [11]:
train_data.tail()

Unnamed: 0,UserID,MovieID,Rating
28376,186180127,172495,10
28377,186180127,407887,10
28378,186180127,468569,10
28379,186180127,9362722,10
28380,186180127,15239678,10


In [12]:
num_users = train_data.UserID.unique().shape[0]
num_movies = train_data.MovieID.unique().shape[0]
num_users, num_movies

(267, 910)

In [13]:
test_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,106825954,365748,1
1,9423,53125,9
2,9423,52618,8
3,9423,53291,7
4,106825954,372784,8


In [14]:
test_data.tail()

Unnamed: 0,UserID,MovieID,Rating
8134,106825954,209144,9
8135,106825954,234215,7
8136,106825954,245429,9
8137,106825954,268380,8
8138,106825954,290334,7


In [15]:
num_users = test_data.UserID.unique().shape[0]
num_movies = test_data.MovieID.unique().shape[0]
num_users, num_movies

(267, 910)

In [16]:
movies = train_data.MovieID.drop_duplicates()
movies = pd.DataFrame(movies, columns=['MovieID'])
movies = movies.sort_values('MovieID', ascending=True)
movies = movies[['MovieID']]
movies = movies.reset_index()

In [17]:
movie_ids = movies.MovieID.tolist()

In [18]:
users = train_data.UserID.drop_duplicates()
users = pd.DataFrame(users, columns=['UserID'])
users = users.sort_values('UserID', ascending=True)
users = users[['UserID']]
users = users.reset_index()

In [19]:
user_ids = users.UserID.tolist()

### **Build Utility Matrix**

In [20]:
utility_matrix = build_utility_matrix(train_data)
utility_matrix

array([[ 8.,  9., 10., ...,  0.,  0.,  0.],
       [10.,  7.,  9., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ...,
       [ 0.,  0.,  0., ...,  7.,  8.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.]])

### **Train, validation split utility_matrix**

In [21]:
def train_validation_split(R: np.ndarray, validation_ratio: float = 0.2, seed: int = 42) -> tuple[np.ndarray, np.ndarray]:
    """
    Split the rating matrix R into training and validation sets.
    
    Parameters:
    - R (np.ndarray): User-item rating matrix.
    - validation_ratio (float): Proportion of ratings to include in the validation set.
    - seed (int): Random seed for reproducibility.
    
    Returns:
    - train_R (np.ndarray): Training rating matrix.
    - validation_R (np.ndarray): Validation rating matrix.
    """
    np.random.seed(seed)
    train_R = R.copy()
    validation_R = np.zeros(R.shape)
    
    for user in range(R.shape[0]):
        rated_items = np.where(R[user, :] > 0)[0]
        if len(rated_items) == 0:
            continue  # Skip users with no ratings
        n_validation = max(1, int(len(rated_items) * validation_ratio))
        validation_items = np.random.choice(rated_items, size=n_validation, replace=False)
        train_R[user, validation_items] = 0
        validation_R[user, validation_items] = R[user, validation_items]
    
    return train_R, validation_R

### **Defining the Hyperparameter Grid**

In [22]:
from itertools import product

# Define the hyperparameter grid
K_values = [30, 35, 40, 45, 50]
learning_rates = [0.0005, 0.001, 0.005, 0.01, 0.05, 0.1]
regularization_values = [0.005, 0.01, 0.02, 0.05, 0.1]
epochs_values = [50]

# Create all possible combinations of hyperparameters
hyperparameter_combinations = list(product(K_values, learning_rates, regularization_values, epochs_values))


### **Evaluating each combination**

In [23]:
from sklearn.metrics import root_mean_squared_error

def hyperparameter_tuning(
    R: np.ndarray,
    user_ids: list[int],
    movie_ids: list[int],
    hyperparameter_combinations: list[tuple[int, float, float, int]],
    validation_ratio: float = 0.2,
    top_n: int = 5
) -> pd.DataFrame:
    """
    Perform hyperparameter tuning for the MatrixFactorizationCF model.
    
    Parameters:
    - R (np.ndarray): Original user-item rating matrix.
    - user_ids (List[int]): List of unique UserIDs.
    - movie_ids (List[int]): List of unique MovieIDs.
    - hyperparameter_combinations (List[Tuple[int, float, float, int]]): List of hyperparameter tuples.
    - validation_ratio (float): Proportion of ratings to include in the validation set.
    - top_n (int): Number of top recommendations to consider during evaluation.
    
    Returns:
    - results_df (pd.DataFrame): DataFrame containing hyperparameters and corresponding validation RMSE.
    """
    results = []
    
    # Split the data once to ensure consistency across hyperparameter evaluations
    train_R, validation_R = train_validation_split(R, validation_ratio=validation_ratio)
    
    for idx, (K, lr, reg, epochs) in enumerate(hyperparameter_combinations):
        print(f"Evaluating combination {idx + 1}/{len(hyperparameter_combinations)}: K={K}, lr={lr}, reg={reg}, epochs={epochs}")
        
        # Initialize and train the model
        mf = MatrixFactorizationCF(
            R=train_R,
            K=K,
            learning_rate=lr,
            epochs=epochs,
            regularization=reg,
            uu_mf=True,  # or False, depending on your focus
            min_rating=1.0,
            max_rating=10.0
        )
        mf.train()
        
        # Generate predictions
        predicted_R = mf.full_prediction()
        
        # Evaluate on validation set
        # Only consider non-zero entries in validation_R
        val_users, val_items = np.where(validation_R > 0)
        val_true = validation_R[val_users, val_items]
        val_pred = predicted_R[val_users, val_items]
        
        # Filter out NaN values
        valid_indices = ~np.isnan(val_true) & ~np.isnan(val_pred)
        val_true = val_true[valid_indices]
        val_pred = val_pred[valid_indices]
        
        # Calculate RMSE
        if len(val_true) > 0:  # Ensure no empty array
            rmse = root_mean_squared_error(val_true, val_pred) 
        else:
            rmse = float('inf')  # Handle edge case of no valid comparisons
        
        # Record the results
        results.append({
            'K': K,
            'learning_rate': lr,
            'regularization': reg,
            'epochs': epochs,
            'validation_RMSE': rmse
        })
    
    # Convert results to DataFrame
    results_df = pd.DataFrame(results)
    return results_df

In [24]:
results_df = hyperparameter_tuning(
    R=utility_matrix,
    user_ids=user_ids,
    movie_ids=movie_ids,
    hyperparameter_combinations=hyperparameter_combinations,
    validation_ratio=0.2,
    top_n=5
)

Evaluating combination 1/150: K=30, lr=0.0005, reg=0.005, epochs=50
Epoch: 1 - RMSE: 1.9218
Epoch: 2 - RMSE: 1.8995
Epoch: 3 - RMSE: 1.8793
Epoch: 4 - RMSE: 1.8610
Epoch: 5 - RMSE: 1.8444
Epoch: 6 - RMSE: 1.8294
Epoch: 7 - RMSE: 1.8156
Epoch: 8 - RMSE: 1.8031
Epoch: 9 - RMSE: 1.7916
Epoch: 10 - RMSE: 1.7810
Epoch: 11 - RMSE: 1.7713
Epoch: 12 - RMSE: 1.7624
Epoch: 13 - RMSE: 1.7541
Epoch: 14 - RMSE: 1.7465
Epoch: 15 - RMSE: 1.7394
Epoch: 16 - RMSE: 1.7328
Epoch: 17 - RMSE: 1.7266
Epoch: 18 - RMSE: 1.7208
Epoch: 19 - RMSE: 1.7154
Epoch: 20 - RMSE: 1.7104
Epoch: 21 - RMSE: 1.7056
Epoch: 22 - RMSE: 1.7011
Epoch: 23 - RMSE: 1.6969
Epoch: 24 - RMSE: 1.6929
Epoch: 25 - RMSE: 1.6891
Epoch: 26 - RMSE: 1.6855
Epoch: 27 - RMSE: 1.6821
Epoch: 28 - RMSE: 1.6788
Epoch: 29 - RMSE: 1.6757
Epoch: 30 - RMSE: 1.6727
Epoch: 31 - RMSE: 1.6699
Epoch: 32 - RMSE: 1.6672
Epoch: 33 - RMSE: 1.6646
Epoch: 34 - RMSE: 1.6621
Epoch: 35 - RMSE: 1.6597
Epoch: 36 - RMSE: 1.6574
Epoch: 37 - RMSE: 1.6552
Epoch: 38 - RMSE

  np.sum(self.P[i, :] ** 2) +
  self.Q[j, :] += self.lr * (e * P_i_old - self.regularization * self.Q[j, :])
  total_loss += e**2 + self.regularization * (
  self.b_u[i] ** 2 +
  self.b_m[j] ** 2
  np.sum(self.Q[j, :] ** 2) +
  self.P[i, :] += self.lr * (e * self.Q[j, :] - self.regularization * self.P[i, :])
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Epoch: 7 - RMSE: nan
Epoch: 8 - RMSE: nan
Epoch: 9 - RMSE: nan
Epoch: 10 - RMSE: nan
Epoch: 11 - RMSE: nan
Epoch: 12 - RMSE: nan
Epoch: 13 - RMSE: nan
Epoch: 14 - RMSE: nan
Epoch: 15 - RMSE: nan
Epoch: 16 - RMSE: nan
Epoch: 17 - RMSE: nan
Epoch: 18 - RMSE: nan
Epoch: 19 - RMSE: nan
Epoch: 20 - RMSE: nan
Epoch: 21 - RMSE: nan
Epoch: 22 - RMSE: nan
Epoch: 23 - RMSE: nan
Epoch: 24 - RMSE: nan
Epoch: 25 - RMSE: nan
Epoch: 26 - RMSE: nan
Epoch: 27 - RMSE: nan
Epoch: 28 - RMSE: nan
Epoch: 29 - RMSE: nan
Epoch: 30 - RMSE: nan
Epoch: 31 - RMSE: nan
Epoch: 32 - RMSE: nan
Epoch: 33 - RMSE: nan
Epoch: 34 - RMSE: nan
Epoch: 35 - RMSE: nan
Epoch: 36 - RMSE: nan
Epoch: 37 - RMSE: nan
Epoch: 38 - RMSE: nan
Epoch: 39 - RMSE: nan
Epoch: 40 - RMSE: nan
Epoch: 41 - RMSE: nan
Epoch: 42 - RMSE: nan
Epoch: 43 - RMSE: nan
Epoch: 44 - RMSE: nan
Epoch: 45 - RMSE: nan
Epoch: 46 - RMSE: nan
Epoch: 47 - RMSE: nan
Epoch: 48 - RMSE: nan
Epoch: 49 - RMSE: nan
Epoch: 50 - RMSE: nan
Evaluating combination 27/150: K=30

  np.sum(self.P[i, :] ** 2) +
  np.sum(self.Q[j, :] ** 2) +
  self.Q[j, :] += self.lr * (e * P_i_old - self.regularization * self.Q[j, :])
  total_loss += e**2 + self.regularization * (
  self.b_u[i] ** 2 +
  self.b_m[j] ** 2
  self.P[i, :] += self.lr * (e * self.Q[j, :] - self.regularization * self.P[i, :])


Epoch: 6 - RMSE: nan
Epoch: 7 - RMSE: nan
Epoch: 8 - RMSE: nan
Epoch: 9 - RMSE: nan
Epoch: 10 - RMSE: nan
Epoch: 11 - RMSE: nan
Epoch: 12 - RMSE: nan
Epoch: 13 - RMSE: nan
Epoch: 14 - RMSE: nan
Epoch: 15 - RMSE: nan
Epoch: 16 - RMSE: nan
Epoch: 17 - RMSE: nan
Epoch: 18 - RMSE: nan
Epoch: 19 - RMSE: nan
Epoch: 20 - RMSE: nan
Epoch: 21 - RMSE: nan
Epoch: 22 - RMSE: nan
Epoch: 23 - RMSE: nan
Epoch: 24 - RMSE: nan
Epoch: 25 - RMSE: nan
Epoch: 26 - RMSE: nan
Epoch: 27 - RMSE: nan
Epoch: 28 - RMSE: nan
Epoch: 29 - RMSE: nan
Epoch: 30 - RMSE: nan
Epoch: 31 - RMSE: nan
Epoch: 32 - RMSE: nan
Epoch: 33 - RMSE: nan
Epoch: 34 - RMSE: nan
Epoch: 35 - RMSE: nan
Epoch: 36 - RMSE: nan
Epoch: 37 - RMSE: nan
Epoch: 38 - RMSE: nan
Epoch: 39 - RMSE: nan
Epoch: 40 - RMSE: nan
Epoch: 41 - RMSE: nan
Epoch: 42 - RMSE: nan
Epoch: 43 - RMSE: nan
Epoch: 44 - RMSE: nan
Epoch: 45 - RMSE: nan
Epoch: 46 - RMSE: nan
Epoch: 47 - RMSE: nan
Epoch: 48 - RMSE: nan
Epoch: 49 - RMSE: nan
Epoch: 50 - RMSE: nan
Evaluating com

  np.sum(self.P[i, :] ** 2) +
  self.Q[j, :] += self.lr * (e * P_i_old - self.regularization * self.Q[j, :])
  total_loss += e**2 + self.regularization * (
  self.b_u[i] ** 2 +
  self.b_m[j] ** 2
  np.sum(self.Q[j, :] ** 2) +
  self.P[i, :] += self.lr * (e * self.Q[j, :] - self.regularization * self.P[i, :])


Epoch: 6 - RMSE: nan
Epoch: 7 - RMSE: nan
Epoch: 8 - RMSE: nan
Epoch: 9 - RMSE: nan
Epoch: 10 - RMSE: nan
Epoch: 11 - RMSE: nan
Epoch: 12 - RMSE: nan
Epoch: 13 - RMSE: nan
Epoch: 14 - RMSE: nan
Epoch: 15 - RMSE: nan
Epoch: 16 - RMSE: nan
Epoch: 17 - RMSE: nan
Epoch: 18 - RMSE: nan
Epoch: 19 - RMSE: nan
Epoch: 20 - RMSE: nan
Epoch: 21 - RMSE: nan
Epoch: 22 - RMSE: nan
Epoch: 23 - RMSE: nan
Epoch: 24 - RMSE: nan
Epoch: 25 - RMSE: nan
Epoch: 26 - RMSE: nan
Epoch: 27 - RMSE: nan
Epoch: 28 - RMSE: nan
Epoch: 29 - RMSE: nan
Epoch: 30 - RMSE: nan
Epoch: 31 - RMSE: nan
Epoch: 32 - RMSE: nan
Epoch: 33 - RMSE: nan
Epoch: 34 - RMSE: nan
Epoch: 35 - RMSE: nan
Epoch: 36 - RMSE: nan
Epoch: 37 - RMSE: nan
Epoch: 38 - RMSE: nan
Epoch: 39 - RMSE: nan
Epoch: 40 - RMSE: nan
Epoch: 41 - RMSE: nan
Epoch: 42 - RMSE: nan
Epoch: 43 - RMSE: nan
Epoch: 44 - RMSE: nan
Epoch: 45 - RMSE: nan
Epoch: 46 - RMSE: nan
Epoch: 47 - RMSE: nan
Epoch: 48 - RMSE: nan
Epoch: 49 - RMSE: nan
Epoch: 50 - RMSE: nan
Evaluating com

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Epoch: 7 - RMSE: nan
Epoch: 8 - RMSE: nan
Epoch: 9 - RMSE: nan
Epoch: 10 - RMSE: nan
Epoch: 11 - RMSE: nan
Epoch: 12 - RMSE: nan
Epoch: 13 - RMSE: nan
Epoch: 14 - RMSE: nan
Epoch: 15 - RMSE: nan
Epoch: 16 - RMSE: nan
Epoch: 17 - RMSE: nan
Epoch: 18 - RMSE: nan
Epoch: 19 - RMSE: nan
Epoch: 20 - RMSE: nan
Epoch: 21 - RMSE: nan
Epoch: 22 - RMSE: nan
Epoch: 23 - RMSE: nan
Epoch: 24 - RMSE: nan
Epoch: 25 - RMSE: nan
Epoch: 26 - RMSE: nan
Epoch: 27 - RMSE: nan
Epoch: 28 - RMSE: nan
Epoch: 29 - RMSE: nan
Epoch: 30 - RMSE: nan
Epoch: 31 - RMSE: nan
Epoch: 32 - RMSE: nan
Epoch: 33 - RMSE: nan
Epoch: 34 - RMSE: nan
Epoch: 35 - RMSE: nan
Epoch: 36 - RMSE: nan
Epoch: 37 - RMSE: nan
Epoch: 38 - RMSE: nan
Epoch: 39 - RMSE: nan
Epoch: 40 - RMSE: nan
Epoch: 41 - RMSE: nan
Epoch: 42 - RMSE: nan
Epoch: 43 - RMSE: nan
Epoch: 44 - RMSE: nan
Epoch: 45 - RMSE: nan
Epoch: 46 - RMSE: nan
Epoch: 47 - RMSE: nan
Epoch: 48 - RMSE: nan
Epoch: 49 - RMSE: nan
Epoch: 50 - RMSE: nan
Evaluating combination 88/150: K=40

  np.sum(self.Q[j, :] ** 2) +
  self.P[i, :] += self.lr * (e * self.Q[j, :] - self.regularization * self.P[i, :])
  total_loss += e**2 + self.regularization * (
  self.b_u[i] ** 2 +
  self.b_m[j] ** 2
  np.sum(self.P[i, :] ** 2) +
  self.Q[j, :] += self.lr * (e * P_i_old - self.regularization * self.Q[j, :])
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Epoch: 5 - RMSE: nan
Epoch: 6 - RMSE: nan
Epoch: 7 - RMSE: nan
Epoch: 8 - RMSE: nan
Epoch: 9 - RMSE: nan
Epoch: 10 - RMSE: nan
Epoch: 11 - RMSE: nan
Epoch: 12 - RMSE: nan
Epoch: 13 - RMSE: nan
Epoch: 14 - RMSE: nan
Epoch: 15 - RMSE: nan
Epoch: 16 - RMSE: nan
Epoch: 17 - RMSE: nan
Epoch: 18 - RMSE: nan
Epoch: 19 - RMSE: nan
Epoch: 20 - RMSE: nan
Epoch: 21 - RMSE: nan
Epoch: 22 - RMSE: nan
Epoch: 23 - RMSE: nan
Epoch: 24 - RMSE: nan
Epoch: 25 - RMSE: nan
Epoch: 26 - RMSE: nan
Epoch: 27 - RMSE: nan
Epoch: 28 - RMSE: nan
Epoch: 29 - RMSE: nan
Epoch: 30 - RMSE: nan
Epoch: 31 - RMSE: nan
Epoch: 32 - RMSE: nan
Epoch: 33 - RMSE: nan
Epoch: 34 - RMSE: nan
Epoch: 35 - RMSE: nan
Epoch: 36 - RMSE: nan
Epoch: 37 - RMSE: nan
Epoch: 38 - RMSE: nan
Epoch: 39 - RMSE: nan
Epoch: 40 - RMSE: nan
Epoch: 41 - RMSE: nan
Epoch: 42 - RMSE: nan
Epoch: 43 - RMSE: nan
Epoch: 44 - RMSE: nan
Epoch: 45 - RMSE: nan
Epoch: 46 - RMSE: nan
Epoch: 47 - RMSE: nan
Epoch: 48 - RMSE: nan
Epoch: 49 - RMSE: nan
Epoch: 50 - RMS

  np.sum(self.Q[j, :] ** 2) +
  np.sum(self.P[i, :] ** 2) +
  self.Q[j, :] += self.lr * (e * P_i_old - self.regularization * self.Q[j, :])
  total_loss += e**2 + self.regularization * (
  self.b_u[i] ** 2 +
  self.b_m[j] ** 2
  self.P[i, :] += self.lr * (e * self.Q[j, :] - self.regularization * self.P[i, :])
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Epoch: 4 - RMSE: nan
Epoch: 5 - RMSE: nan
Epoch: 6 - RMSE: nan
Epoch: 7 - RMSE: nan
Epoch: 8 - RMSE: nan
Epoch: 9 - RMSE: nan
Epoch: 10 - RMSE: nan
Epoch: 11 - RMSE: nan
Epoch: 12 - RMSE: nan
Epoch: 13 - RMSE: nan
Epoch: 14 - RMSE: nan
Epoch: 15 - RMSE: nan
Epoch: 16 - RMSE: nan
Epoch: 17 - RMSE: nan
Epoch: 18 - RMSE: nan
Epoch: 19 - RMSE: nan
Epoch: 20 - RMSE: nan
Epoch: 21 - RMSE: nan
Epoch: 22 - RMSE: nan
Epoch: 23 - RMSE: nan
Epoch: 24 - RMSE: nan
Epoch: 25 - RMSE: nan
Epoch: 26 - RMSE: nan
Epoch: 27 - RMSE: nan
Epoch: 28 - RMSE: nan
Epoch: 29 - RMSE: nan
Epoch: 30 - RMSE: nan
Epoch: 31 - RMSE: nan
Epoch: 32 - RMSE: nan
Epoch: 33 - RMSE: nan
Epoch: 34 - RMSE: nan
Epoch: 35 - RMSE: nan
Epoch: 36 - RMSE: nan
Epoch: 37 - RMSE: nan
Epoch: 38 - RMSE: nan
Epoch: 39 - RMSE: nan
Epoch: 40 - RMSE: nan
Epoch: 41 - RMSE: nan
Epoch: 42 - RMSE: nan
Epoch: 43 - RMSE: nan
Epoch: 44 - RMSE: nan
Epoch: 45 - RMSE: nan
Epoch: 46 - RMSE: nan
Epoch: 47 - RMSE: nan
Epoch: 48 - RMSE: nan
Epoch: 49 - RMSE

In [25]:
# Find the combination with the lowest validation RMSE
best_result = results_df.loc[results_df['validation_RMSE'].idxmin()]
print("Best Hyperparameters:")
print(best_result)

Best Hyperparameters:
K                  45.000000
learning_rate       0.005000
regularization      0.100000
epochs             50.000000
validation_RMSE     1.613005
Name: 104, dtype: float64
