In [None]:
# default_exp models.svd

# SVD
> Implementation of SVD-based recommender model.

A very popular technique for recommendation systems is via matrix factorization. The idea is to reduce the dimensionality of the data before calculating similar movies/users. We factorize the user-item matrix to obtain the user factors and item factors which are the low-dimensional embeddings such that 'similar' user/items are mapped to 'nearby' points.

This kind of analysis can generate matches that are impossible to find with the techniques discussed above as the latent factors can capture attributes which are hard for raw data to deciper e.g. a latent factor can correspond to the degree to which a movie is female oriented or degree to which there is a slow development of the charcters.

Moreover, the user and the movies are embedded to the same space, which provides a direct way to compute user-movie similarity.

We will use Singular Value Decomposition (SVD) for factorizing the matrix.

In [None]:
#export
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds

In [None]:
rating_df = pd.read_csv('https://raw.githubusercontent.com/sparsh-ai/rec-data-public/master/ml-other/ml100k_ratings.csv', sep=',', header=0)

In [None]:
#export
class SVDSimilarity:
    def fit(self, df, user_col='user_id', item_col='item_id', rating_col='rating', n_factors=50):
        ratings_mat = np.ndarray(
            shape=(np.max(df[item_col].values), np.max(df[user_col].values)), 
            dtype=np.uint8)
        ratings_mat[df[item_col].values-1, df[user_col].values-1] = df[rating_col].values
        # normalize the rating matrix
        normalised_mat = ratings_mat - np.asarray([(np.mean(ratings_mat, 1))]).T
        A = normalised_mat.T / np.sqrt(ratings_mat.shape[0] - 1)
        _, _, V = svds(A, n_factors)
        self.item_factors = V.T

    def recommend(self, itemid, top_k=5):
        item_row = self.item_factors[itemid].reshape(1,-1)
        similarity = cosine_similarity(item_row, self.item_factors)
        sort_indexes = list(np.argsort(-similarity)[0][1:top_k+1])
        return sort_indexes

In [None]:
model = SVDSimilarity()

model.fit(df=rating_df, user_col='userId', item_col='movieId')
model.recommend(itemid=10)

[106886, 57330, 34214, 26962, 97394]