In [89]:
from scipy.sparse.linalg import svds
from scipy.sparse import csc_matrix
import pandas as pd
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


# Step 1: Process rating data

## 1.1 Read raw data

In [44]:
df = pd.read_csv("data/rating.csv")[['userId','tmdbId','rating']]

df.head()

In [46]:
print(len(df.tmdbId.unique()))

9715


In [60]:
print(len(df.userId.unique()))

610


In [64]:
print(max(df.userId))

610


## 1.2 Construct rating matrix

In [66]:
id_2_tmdb = {i:j for i,j in enumerate(df.tmdbId.unique())}
tmdb_2_id = {j:i for i,j in id_2_tmdb.items()}
print(f"Number of unique movie : {len(id_2_tmdb)}")

Number of unique movie : 9715


In [68]:
id_2_user = {i:j for i,j in enumerate(df.userId.unique())}
user_2_id = {j:i for i,j in id_2_user.items()}
print(f"Number of unique users : {len(id_2_user)}")

Number of unique users : 610


In [124]:
rating_matrix = np.zeros([len(id_2_user),len(id_2_tmdb)])

In [125]:
rating_matrix.shape

(610, 9715)

In [72]:
df[:10].

Unnamed: 0,userId,tmdbId,rating
0,1,862,4.0
1,5,862,4.0
2,7,862,4.5
3,15,862,2.5
4,17,862,4.5
5,18,862,3.5
6,19,862,4.0
7,21,862,3.5
8,27,862,3.0
9,31,862,5.0


In [78]:
for row in df.itertuples():
    rawUser, rawMovie, rating = row[1], row[2], row[3]
    user, movie = user_2_id[rawUser], tmdb_2_id[rawMovie]
    
    rating_matrix[user][movie] = rating

# Step 2: SVD modeling

## 2.1 SVD from Scipy

In [84]:
num_features = 10
u, s, vt = svds(rating_matrix, k=num_features) # k is the number of factors

In [111]:
def svd_scipy(rating_matrix,num_features=10):
    u, s, vt = svds(rating_matrix, k=num_features) # k is the number of factors
    return u,s,vt.transpose()

In [112]:
u,s,v = svd_scipy(rating_matrix,10)

In [114]:
print(f"Shape of u: {u.shape}, shape of s: {s.shape}, shape of v: {v.shape} ")

Shape of u: (610, 10), shape of s: (10,), shape of v: (9715, 10) 


## 2.2 SVD from Tensorflow

In [104]:
tf.reset_default_graph()
nb_users, nb_products = len(id_2_user), len(id_2_tmdb)
with tf.Session() as sess:
    rating_matrix_tf = tf.placeholder(tf.float32, shape=(nb_users, nb_products))
    S, U, V = tf.svd(rating_matrix_tf)
    
    s,u,v = sess.run([S,U,V],feed_dict={rating_matrix_tf:rating_matrix})

In [107]:
def svd_tf(rating_matrix):
    tf.reset_default_graph()
    nb_users, nb_products = rating_matrix.shape
    with tf.Session() as sess:
        rating_matrix_tf = tf.placeholder(tf.float32, shape=(nb_users, nb_products))
        S, U, V = tf.svd(rating_matrix_tf)

        s,u,v = sess.run([S,U,V],feed_dict={rating_matrix_tf:rating_matrix})
    return s,u,v

In [108]:
s,u,v = svd_tf(rating_matrix)

In [109]:
print(f"Shape of u: {u.shape}, shape of s: {s.shape}, shape of v: {v.shape} ")

Shape of u: (610, 610), shape of s: (610,), shape of v: (9715, 610) 


## 2.3 Calculate rating score

In [121]:
"""Reverse of svd decompose"""
pred_rating_matrix = u@np.diag(s)@v.transpose()

pred_rating_matrix.shape

(610, 9715)

In [123]:
pred_rating_matrix[0,:]

array([ 2.86156093,  0.97620424,  1.70280382, ..., -0.02350501,
       -0.02742251, -0.02742251])

In [122]:
np.argsort(pred_rating_matrix[0,:][-3:])

array([1, 2, 0])

# Step 3 Modulize

In [1]:
from scipy.sparse.linalg import svds
from scipy.sparse import csc_matrix
import pandas as pd
import numpy as np
import tensorflow as tf

class svd_extractor:
    def build_rating_matrix(self,df,userCol="userId",productCol="tmdbId",ratingCol="rating"):
        self.id_2_product = {i:j for i,j in enumerate(df[productCol].unique())}
        self.product_2_id = {j:i for i,j in self.id_2_product.items()}
        
        self.id_2_user = {i:j for i,j in enumerate(df[userCol].unique())}
        self.user_2_id = {j:i for i,j in self.id_2_user.items()}

        self.rating_matrix = np.zeros([len(self.id_2_user),len(self.id_2_product)])
        
        for row in df.itertuples():
            rawUser, rawProduct, rating = row[1], row[2], row[3]
            user, product = self.user_2_id[rawUser], self.product_2_id[rawProduct]

            self.rating_matrix[user][product] = rating
        return self.rating_matrix
    

    def svd_scipy(self,rating_matrix,num_features=10):
        u, s, vt = svds(rating_matrix, k=num_features) # k is the number of factors
        return u,s,vt.transpose()
    
        
    def svd_tf(self,rating_matrix):
        tf.reset_default_graph()
        nb_users, nb_products = rating_matrix.shape
        with tf.Session() as sess:
            rating_matrix_tf = tf.placeholder(tf.float32, shape=(nb_users, nb_products))
            S, U, V = tf.svd(rating_matrix_tf)

            s,u,v = sess.run([S,U,V],feed_dict={rating_matrix_tf:rating_matrix})
        return s,u,v    


  from ._conv import register_converters as _register_converters


In [2]:
myExtractor = svd_extractor()

In [130]:
rating_matrix = myExtractor.build_rating_matrix(df_rating=df,productCol="tmdbId",userCol="userId",ratingCol="rating")

In [132]:
s,u,v = myExtractor.svd_scipy(rating_matrix,num_features=10)

In [133]:
s,u,v = myExtractor.svd_tf(rating_matrix)