In [1]:
import numpy as np
import glob, time
from astropy.table import Table
from astropy.io import ascii

In [8]:
t = ascii.read('movie_ratings.csv')
nUser = np.max(t['userId']) # total number of users
nMovie = np.max(t['movieId']) # total number of movies

In [52]:
R = np.zeros(shape = (nUser, nMovie))

for nrow in np.arange(np.size(t)):
    i = t[nrow][0] - 1 # the userId
    j = t[nrow][1] - 1 # the movieId
    R[i][j] = t[nrow][2] # the rating

In [36]:
R

array([[4. , 0. , 4. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [2.5, 2. , 2. , ..., 0. , 0. , 0. ],
       [3. , 0. , 0. , ..., 0. , 0. , 0. ],
       [5. , 0. , 0. , ..., 0. , 0. , 0. ]])

In [6]:
class MF():

    def __init__(self, R, K, alpha, beta, iterations):
        """
        Perform matrix factorization to predict empty
        entries in a matrix.

        Arguments
        - R (ndarray)   : user-item rating matrix
        - K (int)       : number of latent dimensions
        - alpha (float) : learning rate
        - beta (float)  : regularization parameter
        """

        self.R = R
        self.num_users, self.num_items = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations

    def train(self):
        # Initialize user and item latent feature matrice
        self.P = np.random.normal(scale=1./self.K, size=(self.num_users, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_items, self.K))

        # Initialize the biases
        self.b_u = np.zeros(self.num_users)
        self.b_i = np.zeros(self.num_items)
        self.b = np.mean(self.R[np.where(self.R != 0)])

        # Create a list of training samples
        self.samples = [
            (i, j, self.R[i, j])
            for i in range(self.num_users)
            for j in range(self.num_items)
            if self.R[i, j] > 0
        ]

        # Perform stochastic gradient descent for number of iterations
        training_process = []
        for i in range(self.iterations):
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
            if (i+1) % 10 == 0:
                print("Iteration: %d ; error = %.4f" % (i+1, mse))

        return training_process

    def mse(self):
        """
        A function to compute the total mean square error
        """
        xs, ys = self.R.nonzero()
        predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x, y] - predicted[x, y], 2)
        return np.sqrt(error)

    def sgd(self):
        """
        Perform stochastic graident descent
        """
        for i, j, r in self.samples:
            # Computer prediction and error
            prediction = self.get_rating(i, j)
            e = (r - prediction)

            # Update biases
            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])

            # Update user and item latent feature matrices
            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])
            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])

    def get_rating(self, i, j):
        """
        Get the predicted rating of user i and item j
        """
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        return prediction

    def full_matrix(self):
        """
        Computer the full matrix using the resultant biases, P and Q
        """
        return self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T)

In [59]:
mf = MF(R, K = 300, alpha = 0.01, beta = 0.01, iterations = 20)

In [60]:
mf.train()

Iteration: 10 ; error = 261.5489
Iteration: 20 ; error = 215.0382


[(0, 284.00316014770704),
 (1, 276.77888601196764),
 (2, 272.9919012130982),
 (3, 270.3220464196065),
 (4, 268.29267385220834),
 (5, 266.7709052660004),
 (6, 265.3946081439484),
 (7, 264.09441822639957),
 (8, 262.8773206364828),
 (9, 261.54887487087865),
 (10, 259.9952580985771),
 (11, 257.8973438253403),
 (12, 255.17964069593202),
 (13, 251.6590862865269),
 (14, 247.25303778058108),
 (15, 242.02834938384228),
 (16, 236.05171264646168),
 (17, 229.4688822150217),
 (18, 222.373574601636),
 (19, 215.0381789642451)]

In [62]:
mf.full_matrix()

array([[4.55632261, 4.24605829, 4.01070011, ..., 4.17678598, 4.17764689,
        4.28310055],
       [3.84273147, 3.57529302, 3.27674442, ..., 3.49880458, 3.49872307,
        3.60656456],
       [2.54324209, 2.46035415, 2.04991022, ..., 2.36189515, 2.36780208,
        2.4678317 ],
       ...,
       [2.27984674, 2.49306459, 2.48837399, ..., 3.12837111, 3.13525803,
        3.25887597],
       [3.53580738, 3.2525453 , 2.98208487, ..., 3.20593264, 3.20628257,
        3.31551976],
       [4.29580169, 3.47544356, 3.47682362, ..., 3.56869852, 3.56573355,
        3.70795854]])

In [61]:
mf.mse()

215.0381789642451