# COMM7380 Recommender Systems for Digital Media

In [None]:
# Install NetworkX, Matplotlib, Pandas, Numpy using pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install numpy

# Matrix Factorization

In [None]:
import pandas as pd
import numpy as np

Let's recreate the example in the slides.

![Ratings](../img/L9-ratings.png)

We have 6 movies and 6 users and the respective ratings.
Note: the zeroes are representing the not rated items

In [None]:
movies = ['mib', 'st', 'av', 'b', 'ss', 'lm']
users = ['Sara', 'Jesper', 'Therese', 'Helle', 'Pietro', 'Ekaterina']

In [None]:
M = pd.DataFrame([
[5.0, 3.0, 0.0, 2.0, 2.0, 2.0],
[4.0, 3.0, 4.0, 0.0, 3.0, 3.0],
[5.0, 2.0, 5.0, 2.0, 1.0, 1.0],
[3.0, 5.0, 3.0, 0.0, 1.0, 1.0],
[3.0, 3.0, 3.0, 2.0, 4.0, 5.0],
[2.0, 3.0, 2.0, 3.0, 5.0, 5.0]],
columns=movies,
index=users)
M

Let's check one rating

In [None]:
M['mib']['Sara']

## Compute the matrix factorization

The easy way, use `numpy` library.

In [None]:
from numpy import linalg

U, Sigma, Vt = linalg.svd(M)

Let's print the matrices (a bit prettified considering only a certain number of `decimals`).

In [None]:
def prettify_matrix(matrix, decimals):
    print(np.array_str(matrix, precision=decimals, suppress_small=True))
    print('==========')

In [None]:
prettify_matrix(U, 3)
prettify_matrix(Sigma, 3)
prettify_matrix(Vt, 3)

Sigma ($\Sigma$) matrix here is returned by numpy function as a vector.

## Reducing the dimensionality 

Create a `rank_k` funtion to reduce the rank of the matrices to `k`.

This value is used to reduce the number of comlumns for `U` and the number of rows for `Vt`.
Sigma_reduced is a matrix with the `Sigma` vector values on the main diagonal (thru eye function multiplied by `Sigma` values). 

In [None]:
def rank_k(U, Sigma, Vt, k):
    U_reduced= np.mat(U[:,:k])
    Vt_reduced = np.mat(Vt[:k,:])
    Sigma_reduced = Sigma_reduced = np.eye(k)*Sigma[:k]
    
    return U_reduced, Sigma_reduced, Vt_reduced

In [None]:
U_reduced, Sigma_reduced, Vt_reduced = rank_k(U, Sigma, Vt, 4)

In [None]:
prettify_matrix(U_reduced, 3)
prettify_matrix(Sigma_reduced, 3)
prettify_matrix(Vt_reduced, 3)

Let's create the approximated rating matrix `M_hat` ($\hat M$) by multiplying the three reduced matrices

In [None]:
M_hat = U_reduced * Sigma_reduced * Vt_reduced

Comparing the results with M matrix

In [None]:
prettify_matrix(M_hat, 3)
prettify_matrix(M.to_numpy(), 3)

If we cant to save only `U` and `Vt`, we need to multiply the values of those matrices by the square root of the values of Sigma_reduced. The function `rank_k2` takes care of it.

In [None]:
def rank_k2(U, Sigma, Vt, k):
    U_reduced = np.mat(U[:,:k])
    Vt_reduced = np.mat(Vt[:k,:])
    Sigma_reduced = np.eye(k)*Sigma[:k]
    Sigma_sqrt = np.sqrt(Sigma_reduced)
    
    return U_reduced*Sigma_sqrt, Sigma_sqrt*Vt_reduced

In [None]:
U_sqrt, Vt_sqrt = rank_k2(U, Sigma, Vt, 4)

Let's compute the resulting matrix and compare to the previous one and the original ratings

In [None]:
M_hat2 = U_sqrt * Vt_sqrt

In [None]:
prettify_matrix(M_hat2, 3)
prettify_matrix(M_hat, 3)
prettify_matrix(M.to_numpy(), 3)

## Predicting a rating

By using the M_hat matrix we can directly access the predicted rating. Using a Pandas dataframe makes it easier.

In [None]:
dfM_hat = pd.DataFrame(M_hat, columns=movies, index=users).round(2)
dfM_hat['av']['Sara']

Predicting using U and Vt matrices is a bit more tricky, we can create a function for it 

In [None]:
def uvt_rating(U, Vt, user_index, item_index):
    rating = U[user_index]*Vt[:,item_index]
    return rating

In [None]:
jesper = users.index('Jesper')
av = movies.index('av')

jav_rating = uvt_rating(U_sqrt, Vt_sqrt, jesper, av)
print(jav_rating)

Check the real rating

In [None]:
M['av']['Jesper']

# SVD with implicit ratings
## Reading the User-Item Matrix

In previous lecture we have produced a user-item matrix based on the behaviour of the users. Starting from observations we were able to generate implicit ratings for various items in our movie catalog.

We read this information from a csv file, without the need to go through all the code again (it was saved by useng Pandas dataframe method `.to_csv()`).

In [None]:
# Read the dataset
uiMatrix = pd.read_csv('../data/ui_implicit_ratings.csv')
uiMatrix

The first column is the index of the original dataset and represent the information about our users' `user_id`. We need to bring it back to be the index of our dataframe.

In [None]:
# Give a name to the index column we just read
new_columns = uiMatrix.columns.values
new_columns[0] = 'user_id'
uiMatrix.columns = new_columns
# Set it as index
uiMatrix.set_index('user_id', inplace=True)
uiMatrix

In real cases it should not be done... by the way is a quik way to get the things done... let's fill up the `na` values with the user's average rating.

In [None]:
uiMatrix = uiMatrix.apply(lambda row: row.fillna(row.mean()), axis=1)
uiMatrix

Apply SVD to our dataset 

In [None]:
U, Sigma, Vt = linalg.svd(uiMatrix)

Reduce the rank to 2

In [None]:
U_reduced, Sigma_reduced, Vt_reduced = rank_k(U, Sigma, Vt, 2)

Compute the approximated rating matrix

In [None]:
uiMatrix_hat = U_reduced * Sigma_reduced * Vt_reduced

In [None]:
prettify_matrix(uiMatrix_hat, 3)
#prettify_matrix(uiMatrix.to_numpy(), 3)

## Predicting a rating

Predict a new rating for a missing rating in the original matrix (user 400004 and item 4501244)

In [None]:
dfM_hat = pd.DataFrame(uiMatrix_hat, columns=uiMatrix.columns, index=uiMatrix.index).round(3)
dfM_hat['4501244'][400004]

Let's check the precision of the prediciton for an existing rating

In [None]:
print('existing rating:', uiMatrix['4501244'][400003].round(3))
print('predicted rating:', dfM_hat['4501244'][400003])

- Course Instructor: Dr. Paolo Mengoni (Visiting Scholar, School of Communication, Hong Kong Baptist University) 
  - pmengoni@hkbu.edu.hk

- The codes in this notebook take insipiration from various sources. All codes are for educational purposes only and released under the CC1.0. 