In [28]:
from lib.extractor import svd_extractor
from scipy.sparse.linalg import svds
from scipy.sparse import csc_matrix
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Construct rating matrix

In [2]:
df = pd.read_csv("data/rating.csv")[['userId','tmdbId','rating']]

In [3]:
myExtractor = svd_extractor()
rating_matrix = myExtractor.build_rating_matrix(df)

In [8]:
s,u,v = myExtractor.svd_scipy(rating_matrix=rating_matrix,num_features=20)

In [9]:
print(f"Shape of u: {u.shape}, shape of s: {s.shape}, shape of v: {v.shape} ")

Shape of u: (20,), shape of s: (610, 20), shape of v: (9715, 20) 


# Step 2: Construct Cosine similarity matrix

## 2.1 Cosine similarity <br>
#### User as example

In [24]:
"""Cosine distance"""
cosine_dis = spatial.distance.cosine(s[0,:],s[1,:])
print(f"Cosine distance: {cosine_dis}")

Cosine distance: 1.0987159487002258


In [27]:
"""Cosine similarity"""
cosine_similarity = 1 - cosine_dis
print(f"Cosine similarity: {cosine_similarity}")

Cosine similarity: -0.09871594870022582


In [32]:
cosine_matrix = cosine_similarity([s[0,:],s[1,:]])
print(cosine_matrix)

[[ 1.         -0.09871595]
 [-0.09871595  1.        ]]


## 2.2 Cosine similarity matrix

In [37]:
"""User similarity matrix"""
user_similarity_matrix = cosine_similarity(s)

In [36]:
user_similarity_matrix.shape

(610, 610)

## 2.3 Most similar item based on similarity matrix

In [45]:
"""user example"""
user_similarity_matrix = cosine_similarity(s)

user_0 = user_similarity_matrix[0,:]

user_0.argsort()[::-1][:4]

array([  0,  45, 252,  56])

In [46]:
"""Item example"""
item_similarity_matrix = cosine_similarity(v)

item_0 = item_similarity_matrix[0,:]

item_0.argsort()[::-1][:4]

array([  0, 321, 826,  60])

## 2.4.1 Cosine similarity formula with numpy

In [83]:
m = s.transpose()

In [84]:
d = m.T @ m

norm = (m * m).sum(0, keepdims=True) ** .5

similarity_matrix_numpy = d / norm / norm.T

In [87]:
similarity_matrix_numpy.shape

(610, 610)

In [88]:
similarity_matrix_numpy

array([[ 1.        , -0.09871595, -0.18608504, ..., -0.39393495,
        -0.19938605, -0.09287304],
       [-0.09871595,  1.        , -0.02530091, ...,  0.0610331 ,
        -0.12946863, -0.10349019],
       [-0.18608504, -0.02530091,  1.        , ...,  0.39536761,
         0.11254893, -0.27953488],
       ...,
       [-0.39393495,  0.0610331 ,  0.39536761, ...,  1.        ,
         0.00669988,  0.16781229],
       [-0.19938605, -0.12946863,  0.11254893, ...,  0.00669988,
         1.        ,  0.57688158],
       [-0.09287304, -0.10349019, -0.27953488, ...,  0.16781229,
         0.57688158,  1.        ]])

## 2.4.2 Cosine similarity formula with tensorflow

In [89]:
features = s

In [90]:
dim = features.shape

constant = tf.constant(1e-9,dtype=tf.float32)

df = tf.placeholder(shape=[dim[0],dim[1]],dtype=tf.float32)

similar_user = tf.matmul(df,tf.transpose(df)) + constant

norm_user = tf.reshape(tf.sqrt(tf.diag_part(similar_user)),[-1,1])

norm_user_matrix = tf.matmul(norm_user,tf.transpose(norm_user))

similar_user = similar_user/norm_user_matrix

with tf.Session() as sess:
    similarity_matrix_tf = sess.run(similar_user,feed_dict={df:features})

In [92]:
cosine_matrix(similarity_matrix_tf).shape

(610, 610)

# Step 3: Modulize

In [94]:
class cosine_recommender:
    def __init__(self):
        pass
    def construct_cosine_matrix(self,features):
        similarity_matrix = cosine_similarity(features)
        return similarity_matrix
    
    def recommend(self,rating_matrix,ID,top_n=5):
        n_most_similar = rating_matrix[ID,:].argsort()[::-1][1:top_n+1]
        return n_most_similar
    
    def cosine_matrix_numpy(self,features):
        features = np.array(features)
        m = features.transpose()
        d = m.T @ m
        norm = (m * m).sum(0, keepdims=True) ** .5
        similarity_matrix_numpy = d / norm / norm.T
        return similarity_matrix_numpy
    
    def cosine_matrix_tf(self,features):
        '''
        Row based similirty

        default: user is in row, item in column
        ''' 
        features = np.array(features)

        dim = features.shape

        constant = tf.constant(1e-9,dtype=tf.float32)

        df = tf.placeholder(shape=[dim[0],dim[1]],dtype=tf.float32)

        similar_user = tf.matmul(df,tf.transpose(df)) + constant

        norm_user = tf.reshape(tf.sqrt(tf.diag_part(similar_user)),[-1,1])

        norm_user_matrix = tf.matmul(norm_user,tf.transpose(norm_user))

        similar_user = similar_user/norm_user_matrix


        with tf.Session() as sess:
            similarity_matrix_tf = sess.run(similar_user,feed_dict={df:features})

        return similarity_matrix_tf

In [95]:
myRecommender = cosine_recommender()

In [96]:
similarityMatrix = myRecommender.construct_cosine_matrix(s)

In [97]:
myRecommender.recommend(similarityMatrix,5)

array([239, 355, 250, 178,  26])