In [1]:
import numpy as np
import pandas as pd

In [2]:
import utilities # codeTimer context manager.
import data_preparation # Load dataset and build required matrices.
import factorisation # WALS factorisation.
import recommender # Recommender system.

### Loading dataset and creating recommender system

In [3]:
np.random.seed(17)

mov, rat = data_preparation.importDataset()
rec = recommender.recommenderSystem(mov, rat)
print("Prediction MAE: {}".format(rec.predictionError()))

The dataframe contains 610 users and 2999 items.
Prediction MAE: 46.288121662982874


### Selecting best regression coefficient with CV

In [None]:
np.random.seed(17)

reg_lambda = [0.10, 0.15, 0.20]
n_folds = 5
n_iter = 4

# reg_lambda is required to be a list.
with utilities.codeTimer("Best regression lambda CV"):
    best_lambda = rec.bestLambdaCV(n_folds, n_iter, reg_lambda)

Performing 5 fold CV...
Performing WALS algoritm...
Train error: 1.0741867303575912
Test error: 3.9090256020741205
Train error: 0.6374463502300293
Test error: 3.913290559845525
Train error: 0.5455225611364052
Test error: 3.9051336593811867
Train error: 0.5117137358313536
Test error: 3.9011769978241038
...Done!
Performing WALS algoritm...
Train error: 0.7899390111772768
Test error: 3.7927653994602557
Train error: 0.5795203519027392
Test error: 3.888116908868742
Train error: 0.5247132326834631
Test error: 3.90041546149647
Train error: 0.5008661247419636
Test error: 3.9029892220073483
...Done!
Performing WALS algoritm...
Train error: 0.780173761329322
Test error: 3.797027213874125
Train error: 0.5760359290540016
Test error: 3.886884902572187


### Factorisation

In [None]:
reg_lambda = best_lambda
n_iter = 5

with utilities.codeTimer("WALS factorisation"):
    train, test = rec.performFactorisation(reg_lambda, n_iter)

### Recommendation

In [None]:
def recommend(rec_system, user_id):
    return rec_system.answerQuery(user_id)
        
def bestRated(rec_system, user_id):
    user_movies = rec_system.getUserMovies(user_id)
    return user_movies.sort_values(by = "Rating", ascending = False)

In [None]:
user_id = 4
recommend(rec, user_id).head(10)

In [None]:
bestRated(rec, user_id).head(10)

### Similar items
Some suggestions:
* 911: Star Wars Episode VI
* 786: Dumbo
* 957: The Shining
* 474: Blade Runner

In [None]:
rec.suggestSimilar(474)

### New user recommendation

In [None]:
np.random.seed(17)

new_user, new_user_id = rec.generateNewUser(8)
np.shape(rec.R)

In [None]:
with utilities.codeTimer("New user factorisation"):
    rec.addNewUser(new_user, reg_lambda)
np.shape(rec.R)

In [None]:
recommend(rec, new_user_id).head(10)

In [None]:
bestRated(rec, new_user_id).head(10)

### Cold start problem
If a new user has rated less than 10 movies, the most popular and unseen movies will be recommended.

In [None]:
np.random.seed(17)

new_user, new_user_id = rec.generateNewUser(8)
np.shape(rec.R)

with utilities.codeTimer("New user factorisation"):
    rec.addNewUser(new_user, reg_lambda)

In [None]:
recommend(rec, new_user_id).head(10)