# Model Evaluation
In this work, we evaluate the performance of our model using Root Mean Square Error (RMSE). 

In [1]:
import numpy as np
from numpy import ndarray

## Data Preparation

In [2]:
# Load 5 folds of the dataset
# Each fold contains a training set and a test set
import pandas as pd
training_ratings: list[ndarray] = []
test_ratings: list[ndarray] = []
for i in range(0, 5):
    training_ratings.append(pd.read_csv(f'../data/ml-100k/u{i+1}.base', sep='\t', 
                                        usecols=[0, 1, 2],
                                        names=['user_id', 'item_id', 'rating']).to_numpy())
    test_ratings.append(pd.read_csv(f'../data/ml-100k/u{i+1}.test', sep='\t',
                                    usecols=[0, 1, 2],
                                    names=['user_id', 'item_id', 'rating']).to_numpy())
training_ratings[0][0:10, :] # user_id, item_id, rating
len(training_ratings)

5

In [3]:
data_info = pd.read_csv('../data/ml-100k/u.info', sep=' ', header=None)
n_users = data_info.iloc[0, 0]
n_items = data_info.iloc[1, 0]
print('Number of users:', n_users)
print('Number of items:', n_items)

Number of users: 943
Number of items: 1682


In [4]:
# For content-based filtering, we need the genres info of the movies
# Genres name
genres = pd.read_csv('../data/ml-100k/u.genre', sep='|', encoding='latin-1',
                           usecols=[0],names=['genre'])
genres.head(5)

Unnamed: 0,genre
0,unknown
1,Action
2,Adventure
3,Animation
4,Children's


In [5]:
# Genres info of the movies
movies_genres = pd.read_csv('../data/ml-100k/u.item', sep='|', encoding='latin-1',
                     index_col=0, header=None, 
                     names=genres['genre'].to_list(), 
                     usecols=list(range(5, 24)))
movies_genres.head(5) 

Unnamed: 0_level_0,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
unknown,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0


## Content-Based Filtering

### Data Preprocessing
Now each movie feature is represented as a vector, where each dimension corresponds to a genre. We transform the genres vector of each movie into a TF-IDF vector. 

TF-IDF (Term Frequency-Inverse Document Frequency) is a statistical measure used to evaluate the importance of a word in a document relative to a collection of documents (corpus). It is commonly used in text mining and information retrieval to transform text data into numerical vectors that can be used for machine learning algorithms. Here we use TF-IDF to make the genre representation of movies more meaningful for similarity calculations.

- Term Frequency (TF): Measures how often a genre appears for a movie. For example, if a movie belongs to multiple genres, the frequency of each genre is considered.
- Inverse Document Frequency (IDF): Reduces the weight of genres that are common across many movies (e.g., "Drama") and increases the weight of rare genres (e.g., "Sci-Fi").

In [6]:
from sklearn.feature_extraction.text import TfidfTransformer
tfidf_transformer = TfidfTransformer()
tfidf_item_matrix = tfidf_transformer.fit_transform(movies_genres)
item_features = pd.DataFrame(tfidf_item_matrix.toarray(),
                           columns=tfidf_transformer.get_feature_names_out(),
                           index=movies_genres.index)
item_features.head(5) # item_id, genre1, genre2, ...

Unnamed: 0_level_0,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
unknown,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,0.0,0.0,0.74066,0.573872,0.349419,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,0.536767,0.65097,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.536767,0.0,0.0
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
0,0.710652,0.0,0.0,0.0,0.539759,0.0,0.0,0.451259,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,0.0,0.0,0.0,0.0,0.0,0.735504,0.0,0.363186,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.571953,0.0,0.0


### Evaluate Content-Based Filtering

In [7]:
movies_genres.to_numpy().shape

(1682, 18)

In [8]:
# Run content-based filtering on the dataset without trasforming the item features by TF-IDF
from content_based_filtering import ContentBasedFiltering
total_rmse_train = 0
total_rmse_test = 0
for i in range(0, 5):
    recommender = ContentBasedFiltering(n_users, n_items, movies_genres.to_numpy()) # Use the original item features
    recommender.fit(training_ratings[i])
    
    rmse_training = recommender.evaluateRMSE(training_ratings[i])
    rmse_test = recommender.evaluateRMSE(test_ratings[i])
    print(f"Fold {i+1}: RMSE on Training Dataset: {rmse_training}, RMSE on Testing Dataset: {rmse_test}")
    total_rmse_train += rmse_training
    total_rmse_test += rmse_test
print(f"Average RMSE on Training Dataset: {total_rmse_train/5}, Average RMSE on Testing Dataset: {total_rmse_test/5}")

Fold 1: RMSE on Training Dataset: [0.89295831], RMSE on Testing Dataset: [1.17266072]
Fold 2: RMSE on Training Dataset: [0.89446298], RMSE on Testing Dataset: [1.14484388]
Fold 3: RMSE on Training Dataset: [0.8982215], RMSE on Testing Dataset: [1.14019007]
Fold 4: RMSE on Training Dataset: [0.8984773], RMSE on Testing Dataset: [1.1497412]
Fold 5: RMSE on Training Dataset: [0.8979186], RMSE on Testing Dataset: [1.16950065]
Average RMSE on Training Dataset: [0.89640774], Average RMSE on Testing Dataset: [1.1553873]


In [9]:
# Run content-based filtering on the dataset with TF-IDF item features
total_rmse_train = 0
total_rmse_test = 0
for i in range(0, 5):
    # Use the TF-IDF transformed item features
    recommender = ContentBasedFiltering(n_users, n_items, item_features.to_numpy())
    recommender.fit(training_ratings[i])
    
    rmse_training = recommender.evaluateRMSE(training_ratings[i])
    rmse_test = recommender.evaluateRMSE(test_ratings[i])
    print(f"Fold {i+1}: RMSE on Training Dataset: {rmse_training}, RMSE on Testing Dataset: {rmse_test}")
    total_rmse_train += rmse_training
    total_rmse_test += rmse_test
print(f"Average RMSE on Training Dataset: {total_rmse_train/5}, Average RMSE on Testing Dataset: {total_rmse_test/5}")

Fold 1: RMSE on Training Dataset: [0.89194995], RMSE on Testing Dataset: [1.16719429]
Fold 2: RMSE on Training Dataset: [0.89299088], RMSE on Testing Dataset: [1.13735167]
Fold 3: RMSE on Training Dataset: [0.89711895], RMSE on Testing Dataset: [1.12976507]
Fold 4: RMSE on Training Dataset: [0.89728205], RMSE on Testing Dataset: [1.14549288]
Fold 5: RMSE on Training Dataset: [0.89712948], RMSE on Testing Dataset: [1.14884867]
Average RMSE on Training Dataset: [0.89529426], Average RMSE on Testing Dataset: [1.14573052]


## Neighborhood-Based Collaborative Filtering

### User-Based Collaborative Filtering
The avarage RMSE on test set is 1.0595, which is a bit better than the results of the content-based filtering. 

In [10]:
from neighbourhood_based_CF import NeighborhoodCF

total_rmse_train = 0
total_rmse_test = 0

for i in range(0, 5):
    recommender = NeighborhoodCF(n_users, n_items, k=30, uuCF=True)
    recommender.fit(training_ratings[i])
    
    rmse_training = recommender.evaluateRMSE(training_ratings[i])
    rmse_test = recommender.evaluateRMSE(test_ratings[i])
    print(f"Fold {i+1}: RMSE on Training Dataset: {rmse_training}, RMSE on Testing Dataset: {rmse_test}")
    total_rmse_train += rmse_training
    total_rmse_test += rmse_test
print(f"Average RMSE on Training Dataset: {total_rmse_train/5}, Average RMSE on Testing Dataset: {total_rmse_test/5}")

Fold 1: RMSE on Training Dataset: 1.0330093580940434, RMSE on Testing Dataset: 1.0690665424742682
Fold 2: RMSE on Training Dataset: 1.0567169655907669, RMSE on Testing Dataset: 1.0704344072354874
Fold 3: RMSE on Training Dataset: 1.0555516469135295, RMSE on Testing Dataset: 1.055567648406658
Fold 4: RMSE on Training Dataset: 1.0556828803321838, RMSE on Testing Dataset: 1.0554910489039748
Fold 5: RMSE on Training Dataset: 1.0361981927209163, RMSE on Testing Dataset: 1.0470508556484694
Average RMSE on Training Dataset: 1.047431808730288, Average RMSE on Testing Dataset: 1.0595221005337716


### Item-based CF: 
- Compute the similarity between items (movies) based on user ratings.
- Use the similarity scores to recommend items to users based on their past ratings.

In [11]:
from neighbourhood_based_CF import NeighborhoodCF

total_rmse_train = 0
total_rmse_test = 0
# Item-based collaborative filtering
for i in range(0, 5):
    recommender = NeighborhoodCF(n_users, n_items, k=30, uuCF=False)
    recommender.fit(training_ratings[i])
    
    rmse_training = recommender.evaluateRMSE(training_ratings[i])
    rmse_test = recommender.evaluateRMSE(test_ratings[i])
    print(f"Fold {i+1}: RMSE on Training Dataset: {rmse_training}, RMSE on Testing Dataset: {rmse_test}")
    total_rmse_train += rmse_training
    total_rmse_test += rmse_test
print(f"Average RMSE on Training Dataset: {total_rmse_train/5}, Average RMSE on Testing Dataset: {total_rmse_test/5}")

Fold 1: RMSE on Training Dataset: 1.014389622363792, RMSE on Testing Dataset: 1.0483106587889226
Fold 2: RMSE on Training Dataset: 1.021522394554863, RMSE on Testing Dataset: 1.0518583152549124
Fold 3: RMSE on Training Dataset: 1.0165509811303082, RMSE on Testing Dataset: 1.0400619217902314
Fold 4: RMSE on Training Dataset: 1.0151171558842027, RMSE on Testing Dataset: 1.0371367224622197
Fold 5: RMSE on Training Dataset: 1.0096257912395818, RMSE on Testing Dataset: 1.0388607220524215
Average RMSE on Training Dataset: 1.0154411890345496, Average RMSE on Testing Dataset: 1.0432456680697413
