## **Movies Recommendation: Matrix Factorized Collaborative Filtering**

### **Import necessary library**

In [3]:
import pandas as pd 
import numpy as np 
import math
from collaborative_filtering import MatrixFactorizationCF, build_utility_matrix, eval

In [4]:
train_data = pd.read_csv('resources/data/train_val_test/ratings_train.csv', header=None, names=['UserID', 'MovieID', 'Rating'])
test_data = pd.read_csv('resources/data/train_val_test/ratings_test.csv', header=None, names=['UserID', 'MovieID', 'Rating'])
val_data = pd.read_csv('resources/data/train_val_test/ratings_val.csv', header=None, names=['UserID', 'MovieID', 'Rating'])

In [5]:
train_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,36527471,1659337,8
1,65089494,105695,10
2,23249666,1628064,9
3,7776823,110413,10
4,122607281,2948372,6


In [6]:
train_data.tail()

Unnamed: 0,UserID,MovieID,Rating
3699431,58251797,4729430,8
3699432,93006392,2545428,8
3699433,18815933,2537176,6
3699434,28443655,414982,3
3699435,1067456,108399,10


In [7]:
num_users = train_data.UserID.unique().shape[0]
num_movies = train_data.MovieID.unique().shape[0]
num_users, num_movies

(9130, 9814)

In [8]:
test_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,45430544,83798,7
1,34807113,1250777,9
2,67646271,80120,4
3,51265424,1490017,8
4,51235794,11813216,8


In [9]:
test_data.tail()

Unnamed: 0,UserID,MovieID,Rating
462425,30899304,1136608,9
462426,22328566,108783,10
462427,26159893,333780,7
462428,35725947,3569230,6
462429,23018536,1232829,8


In [10]:
num_users = test_data.UserID.unique().shape[0]
num_movies = test_data.MovieID.unique().shape[0]
num_users, num_movies

(9130, 9814)

In [11]:
val_data.head()

Unnamed: 0,UserID,MovieID,Rating
0,131612777,891592,2
1,23320225,2382320,7
2,28071275,45758,3
3,23856336,68699,10
4,47196478,95327,7


In [12]:
val_data.tail()

Unnamed: 0,UserID,MovieID,Rating
462425,77954315,113870,7
462426,4580541,91877,6
462427,25506300,1877832,9
462428,27367887,435761,8
462429,364025,185014,9


In [13]:
num_users = val_data.UserID.unique().shape[0]
num_movies = val_data.MovieID.unique().shape[0]
num_users, num_movies

(9130, 9814)

In [14]:
movies = train_data.MovieID.drop_duplicates()
movies = pd.DataFrame(movies, columns=['MovieID'])
movies = movies.sort_values('MovieID', ascending=True)
movies = movies[['MovieID']]
movies = movies.reset_index()

In [15]:
users = train_data.UserID.drop_duplicates()
users = pd.DataFrame(users, columns=['UserID'])
users = users.sort_values('UserID', ascending=True)
users = users[['UserID']]
users = users.reset_index()

### **Build Utility Matrix**

In [16]:
train_R = build_utility_matrix(train_data)
train_R

array([[0., 0., 7., ..., 0., 0., 0.],
       [0., 0., 0., ..., 7., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [17]:
val_R = build_utility_matrix(val_data)
val_R

array([[8., 8., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

## **User-based MF**

In [18]:
UUMF = MatrixFactorizationCF(R=train_R, K=30, learning_rate=0.005, epochs=50, regularization=0.05)
UUMF.train(val_R)

Epoch: 1 - Train RMSE: 1.7212, Validation RMSE: 1.6257
Model saved to best_uumf_model.joblib.
Epoch: 2 - Train RMSE: 1.6297, Validation RMSE: 1.5990
Model saved to best_uumf_model.joblib.
Epoch: 3 - Train RMSE: 1.6136, Validation RMSE: 1.5834
Model saved to best_uumf_model.joblib.
Epoch: 4 - Train RMSE: 1.5935, Validation RMSE: 1.5539
Model saved to best_uumf_model.joblib.
Epoch: 5 - Train RMSE: 1.5724, Validation RMSE: 1.5348
Model saved to best_uumf_model.joblib.
Epoch: 6 - Train RMSE: 1.5556, Validation RMSE: 1.5171
Model saved to best_uumf_model.joblib.
Epoch: 7 - Train RMSE: 1.5407, Validation RMSE: 1.5038
Model saved to best_uumf_model.joblib.
Epoch: 8 - Train RMSE: 1.5281, Validation RMSE: 1.4930
Model saved to best_uumf_model.joblib.
Epoch: 9 - Train RMSE: 1.5170, Validation RMSE: 1.4840
Model saved to best_uumf_model.joblib.
Epoch: 10 - Train RMSE: 1.5069, Validation RMSE: 1.4759
Model saved to best_uumf_model.joblib.
Epoch: 11 - Train RMSE: 1.4977, Validation RMSE: 1.4695
Mod

1.4503506346479929

In [19]:
uumf_model = MatrixFactorizationCF.load_model('best_uumf_model.joblib', train_R)

Model loaded from best_uumf_model.joblib.


In [20]:
uu_predicted_R = uumf_model.full_prediction()

In [21]:
eval(
    uu_predicted_R,
    test_data=test_data,
    users=users,
    movies=movies        
)

{'RMSE': 1.4517803300300467,
 'Precision': 0.9202185319823873,
 'Recall': 0.6982773428496173,
 'F1-Score': 0.7940307557723417}

In [22]:
recommendation = uumf_model.recommend_using_predicted_ratings(id=36527471, predicted_ratings= uu_predicted_R, movies=movies, users=users, top_n=25)
print('Recommend movies for user 36527471 and predicted ratings:')
recommendation

Recommend movies for user 36527471 and predicted ratings:


[(1000252, 10),
 (1615553, 10),
 (1615944, 10),
 (1683085, 10),
 (1683086, 10),
 (1683088, 10),
 (2084342, 10),
 (2140553, 10),
 (2301449, 10),
 (2301451, 10),
 (2301455, 10),
 (2832378, 10),
 (3060860, 10),
 (3060910, 10),
 (3866850, 10),
 (4283088, 10),
 (4283094, 10),
 (4832268, 10),
 (5229638, 10),
 (7074030, 10),
 (9166696, 10),
 (11172022, 10),
 (12187032, 10),
 (12187040, 10),
 (12187044, 10)]

In [23]:
recommendation = uumf_model.recommend(id=36527471, movies=movies, users=users, top_n=25)
print('Recommend movies for user 36527471 and predicted ratings:')
recommendation

Recommend movies for user 36527471 and predicted ratings:


[(1000252, 10),
 (1615553, 10),
 (1615944, 10),
 (1683085, 10),
 (1683086, 10),
 (1683088, 10),
 (2084342, 10),
 (2140553, 10),
 (2301449, 10),
 (2301451, 10),
 (2301455, 10),
 (2832378, 10),
 (3060860, 10),
 (3060910, 10),
 (3866850, 10),
 (4283088, 10),
 (4283094, 10),
 (4832268, 10),
 (5229638, 10),
 (7074030, 10),
 (9166696, 10),
 (11172022, 10),
 (12187032, 10),
 (12187040, 10),
 (12187044, 10)]

In [24]:
# UUMF.print_recommendation(predicted_R=predicted_R, movies=movies, users=users)

## **Item-based MF**

In [25]:
IIMF = MatrixFactorizationCF(R=train_R, K=30, learning_rate=0.005, epochs=50, regularization=0.05, uu_mf=False)
IIMF.train(val_R)

Epoch: 1 - Train RMSE: 1.7211, Validation RMSE: 1.6256
Model saved to best_iimf_model.joblib.
Epoch: 2 - Train RMSE: 1.6295, Validation RMSE: 1.5984
Model saved to best_iimf_model.joblib.
Epoch: 3 - Train RMSE: 1.6115, Validation RMSE: 1.5772
Model saved to best_iimf_model.joblib.
Epoch: 4 - Train RMSE: 1.5887, Validation RMSE: 1.5503
Model saved to best_iimf_model.joblib.
Epoch: 5 - Train RMSE: 1.5707, Validation RMSE: 1.5341
Model saved to best_iimf_model.joblib.
Epoch: 6 - Train RMSE: 1.5558, Validation RMSE: 1.5181
Model saved to best_iimf_model.joblib.
Epoch: 7 - Train RMSE: 1.5414, Validation RMSE: 1.5041
Model saved to best_iimf_model.joblib.
Epoch: 8 - Train RMSE: 1.5288, Validation RMSE: 1.4932
Model saved to best_iimf_model.joblib.
Epoch: 9 - Train RMSE: 1.5176, Validation RMSE: 1.4845
Model saved to best_iimf_model.joblib.
Epoch: 10 - Train RMSE: 1.5076, Validation RMSE: 1.4766
Model saved to best_iimf_model.joblib.
Epoch: 11 - Train RMSE: 1.4983, Validation RMSE: 1.4701
Mod

1.4496348120276787

In [26]:
iimf_model = MatrixFactorizationCF.load_model("best_iimf_model.joblib", train_R)

Model loaded from best_iimf_model.joblib.


In [27]:
ii_predicted_R = iimf_model.full_prediction()

In [28]:
eval(
    ii_predicted_R,
    test_data=test_data,
    users=users,
    movies=movies        
)

{'RMSE': 1.4517127892572503,
 'Precision': 0.920375505419648,
 'Recall': 0.6984736913478218,
 'F1-Score': 0.7942161357342519}

In [29]:
ii_predicted_R[ii_predicted_R > 10]

array([], dtype=float64)

In [30]:
recommendation = iimf_model.recommend_using_predicted_ratings(id=105695, predicted_ratings= ii_predicted_R, movies=movies, users=users, top_n=25)
print('Recommend users for movie 105695 and predicted ratings:')
recommendation

Recommend users for movie 105695 and predicted ratings:


[(1690952, 10),
 (1819213, 10),
 (1887253, 10),
 (1906746, 10),
 (1937893, 10),
 (4496495, 10),
 (5426623, 10),
 (18246891, 10),
 (19556601, 10),
 (25228468, 10),
 (29812060, 10),
 (30198461, 10),
 (32796562, 10),
 (33279366, 10),
 (36181843, 10),
 (36284954, 10),
 (36672583, 10),
 (43105756, 10),
 (49952068, 10),
 (61036395, 10),
 (79319221, 10),
 (97480288, 10),
 (131218083, 10),
 (160672695, 10),
 (168830426, 10)]

In [31]:
recommendation = iimf_model.recommend(id=105695, movies=movies, users=users, top_n=25)
print('Recommend users for movie 105695 and predicted ratings:')
recommendation

Recommend users for movie 105695 and predicted ratings:


[(1690952, 10),
 (1819213, 10),
 (1887253, 10),
 (1906746, 10),
 (1937893, 10),
 (4496495, 10),
 (5426623, 10),
 (18246891, 10),
 (19556601, 10),
 (25228468, 10),
 (29812060, 10),
 (30198461, 10),
 (32796562, 10),
 (33279366, 10),
 (36181843, 10),
 (36284954, 10),
 (36672583, 10),
 (43105756, 10),
 (49952068, 10),
 (61036395, 10),
 (79319221, 10),
 (97480288, 10),
 (131218083, 10),
 (160672695, 10),
 (168830426, 10)]

In [32]:
# IIMF.print_recommendation(predicted_R=predicted_R, movies=movies, users=users)