In [1]:
class Normalize:

    @staticmethod
    def normalize(Y, R, YMean):
        m, n = Y.shape
        
        if YMean is None:
            YMeanVals = np.zeros((m, 1))
            
        YNorm = np.zeros(Y.shape)

        for i in range(0, m):
            idx = np.where(R[i, :] == 1)
            if YMean is None:
                YMeanVals[i] = np.mean(Y[i, idx])
                YNorm[i, idx] = Y[i, idx] - YMeanVals[i]
            else:
                YNorm[i, idx] = Y[i, idx] - YMean[i]
            
        return YNorm, YMean if YMean is not None else YMeanVals

In [None]:
class CollaborativeFilter():

    def __init__(self, *args):
        self.Y, \
        self.R, \
        self.numUser, \
        self.numMovies, \
        self.numFeature, \
        self.lam, \
        self.batchSize, \
        self.learner,\
        self.anim, \
        self.costs, \
        self.counts = args
        
    
    def cost(self, params):
        # Args
        #learning_rate, itr
        X = np.reshape(params[0:self.numMovies * self.numFeature], (self.numMovies, self.numFeature))
        Theta = np.reshape(params[self.numMovies * self.numFeature:], (self.numUser, self.numFeature))

        J = sum(sum(self.R * np.square((Theta.dot(X.T)).T - self.Y))) / 2 + \
                (self.lam * sum(sum(np.square(Theta)))) / 2 + \
                (self.lam * sum(sum(np.square(X)))) / 2
        
        self.costs.append(np.divide(J, 1000))
        self.counts.append(len(self.costs))
        
        if self.anim is not None:
            self.anim.plotScores(np.array(self.counts), np.array(self.costs))
        
        return J

    def gradient(self, params):
        # Args
        #learning_rate, itr
        X = np.reshape(params[0:self.numMovies * self.numFeature], (self.numMovies, self.numFeature))
        Theta = np.reshape(params[self.numMovies * self.numFeature:], (self.numUser, self.numFeature))

        grad = np.multiply((np.dot(X, Theta.T) - self.Y), self.R)
        # gradients
        if self.learner is 'movie':
            X_grad = np.dot(grad, Theta) + self.lam * X
            Theta_grad = Theta
        elif self.learner is 'user':
            X_grad = X
            Theta_grad = np.dot(grad.T, X) + self.lam * Theta
        else:
            X_grad = np.dot(grad, Theta) + self.lam * X
            Theta_grad = np.dot(grad.T, X) + self.lam * Theta

        params = np.concatenate((X_grad, Theta_grad)).ravel()

        return params
    
    def stochasticGradient(self, params):
        #learning_rate, itr
        X = np.reshape(params[0:self.numMovies * self.numFeature], (self.numMovies, self.numFeature))
        Theta = np.reshape(params[self.numMovies * self.numFeature:], (self.numUser, self.numFeature))
        
        # There is eps option to pass the learning rate but somehow
        # I don't trust that function.
#         itr.append(1)
#         lr = (1. / (1. + 0.01 * len(itr)))
        
        if self.learner is 'movie':
            X_grad = self.stochasticGradientMovie(X, Theta)
            Theta_grad = Theta_grad
        elif self.learner is 'user':
            X_grad = X
            Theta_grad = self.stochasticGradientUser(X, Theta)
        else:
            X_grad = self.stochasticGradientMovie( X, Theta)
            Theta_grad = self.stochasticGradientUser( X, Theta)

        params = np.concatenate((X_grad, Theta_grad)).ravel()

        return params
    
    def stochasticGradientMovie(self, X, Theta):
        
        # (1628 * 10)
        X_grad = np.zeros(X.shape)
        
        # This is stochastic gradient implementation, k is number of features
        # for i in n_m:
        #     for j in n_u:
        #        x_i_k = ((x_i_k * theta_j_k) - y_i_j) * theta_j_k + lam * x_i_k
        # Since we are calculating n_m gradients for n_m movies separately, there is no point
        # looping over number of movies. And so vectorizing the x_i_k gives below implementation.
        # So, to summarize (we are calculating the gradient of each movie for one user at a time)
        for i in range(0, self.numUser, self.batchSize):

            ThetaTemp = Theta[i:i+batchSize,:]
            grad = np.multiply((np.dot(X, ThetaTemp.T) - self.Y[:, i:i+self.batchSize]), self.R[:, i:i+self.batchSize])
            #lr * 
            X_grad = np.dot(grad, ThetaTemp) + self.lam * X
            
        return X_grad
    
    def stochasticGradientUser(self):
        # (944 * 10)
        Theta_grad = np.zeros(Theta.shape)
        
        # we are calculating the gradient of each user for one movie at a time
        for i in range(0, self.numMovies, self.batchSize):

            XTemp = X[i:i+self.batchSize,:]
            grad = np.multiply((np.dot(XTemp, Theta.T) - self.Y[i:i+self.batchSize, :]), self.R[i:i+self.batchSize, :])
            #lr * 
            Theta_grad = np.dot(grad.T, XTemp) + self.lam * Theta
            
        return Theta_grad
    
    def batchGradient(self, params):
        #learning_rate, itr
        X = np.reshape(params[0:self.numMovies * self.numFeature], (self.numMovies, self.numFeature))
        Theta = np.reshape(params[self.numMovies * self.numFeature:], (self.numUser, self.numFeature))
        
        # There is eps option to pass the learning rate but somehow
        # I don't trust that function.
#         itr.append(1)
#         lr = (1. / (1. + 0.01 * len(itr)))

        X_grad = np.zeros(X.shape)
        Theta_grad = np.zeros(Theta.shape)
        
        for i in range(0, self.numMovies, self.batchSize):
            idx = np.where(self.R[i:i+self.batchSize, :] == 1)
            ThetaTemp = Theta[idx[1]]
            YTemp = self.Y[i:i+batchSize, idx[1]]
            
            #lr * 
            X_grad[i:i+self.batchSize, :] = ((np.dot(X[i:i+self.batchSize, :], ThetaTemp.T) - YTemp).dot(ThetaTemp) \
                                                + self.lam * X[i:i+self.batchSize, :])

        for i in range(0, self.numUser, self.batchSize):
            idx = np.where(self.R[:, i:i+self.batchSize] == 1)
            XTemp = X[idx[0]]
            YTemp = self.Y[idx[0], i:i+self.batchSize]
            
            #lr * 
            Theta_grad[i:i+self.batchSize, :] = ((np.dot(XTemp,Theta[i:i+self.batchSize, :].T) - YTemp).T.dot(XTemp) \
                                                + self.lam * Theta[i:i+self.batchSize, :])

        params = np.concatenate((X_grad, Theta_grad)).ravel()

        return params

In [None]:
# class CollaborativeFilter:

#     @staticmethod
#     def cost(params, *args):
#         # Args
#         #learning_rate, itr
#         Y, R, numUser, numMovies, numFeature, lam, batchSize, learner,anim, costs, counts = args
#         X = np.reshape(params[0:numMovies * numFeature], (numMovies, numFeature))
#         Theta = np.reshape(params[numMovies * numFeature:], (numUser, numFeature))

#         J = sum(sum(R * np.square((Theta.dot(X.T)).T - Y))) / 2 + (lam * sum(sum(np.square(Theta)))) / 2 + (
#                 lam * sum(sum(np.square(X)))) / 2
        
#         costs.append(np.divide(J, 1000))
#         counts.append(len(costs))
        
#         if anim is not None:
#             anim.plotScores(np.array(counts), np.array(costs))
        
#         return J

#     @staticmethod
#     def gradient(params, *args):
#         # Args
#         #learning_rate, itr
#         Y, R, numUser, numMovies, numFeature, lam, batchSize,learner, anim, costs, counts = args
#         X = np.reshape(params[0:numMovies * numFeature], (numMovies, numFeature))
#         Theta = np.reshape(params[numMovies * numFeature:], (numUser, numFeature))

#         grad = np.multiply((np.dot(X, Theta.T) - Y), R)
#         # gradients
#         if learner is 'movie':
#             X_grad = np.dot(grad, Theta) + lam * X
#             Theta_grad = Theta
#         elif learner is 'user':
#             X_grad = X
#             Theta_grad = np.dot(grad.T, X) + lam * Theta
#         else:
#             X_grad = np.dot(grad, Theta) + lam * X
#             Theta_grad = np.dot(grad.T, X) + lam * Theta

#         params = np.concatenate((X_grad, Theta_grad)).ravel()

#         return params
    
#     @staticmethod
#     def stochasticGradient(params, *args):
#         #learning_rate, itr
#         Y, R, numUser, numMovies, numFeature, lam, batchSize, learner, anim, costs, counts = args
#         X = np.reshape(params[0:numMovies * numFeature], (numMovies, numFeature))
#         Theta = np.reshape(params[numMovies * numFeature:], (numUser, numFeature))
        
#         # There is eps option to pass the learning rate but somehow
#         # I don't trust that function.
# #         itr.append(1)
# #         lr = (1. / (1. + 0.01 * len(itr)))
        
#         if learner is 'movie':
#             X_grad = \
#                 CollaborativeFilter.stochasticGradientMovie(Y, R, numUser, numMovies, numFeature, lam, batchSize, X, Theta)
#             Theta_grad = Theta_grad
#         elif learner is 'user':
#             X_grad = X
#             Theta_grad = \
#                 CollaborativeFilter.stochasticGradientUser(Y, R, numUser, numMovies, numFeature, lam, batchSize, X, Theta)
#         else:
#             X_grad = \
#                 CollaborativeFilter.stochasticGradientMovie(Y, R, numUser, numMovies, numFeature, lam, batchSize, X, Theta)
#             Theta_grad = \
#                 CollaborativeFilter.stochasticGradientUser(Y, R, numUser, numMovies, numFeature, lam, batchSize, X, Theta)

#         params = np.concatenate((X_grad, Theta_grad)).ravel()

#         return params
    
#     @staticmethod
#     def stochasticGradientMovie(Y, R, numUser, numMovies, numFeature, lam, batchSize, X, Theta):
        
#         # (1628 * 10)
#         X_grad = np.zeros(X.shape)
        
#         # This is stochastic gradient implementation, k is number of features
#         # for i in n_m:
#         #     for j in n_u:
#         #        x_i_k = ((x_i_k * theta_j_k) - y_i_j) * theta_j_k + lam * x_i_k
#         # Since we are calculating n_m gradients for n_m movies separately, there is no point
#         # looping over number of movies. And so vectorizing the x_i_k gives below implementation.
#         # So, to summarize (we are calculating the gradient of each movie for one user at a time)
#         for i in range(0, numUser, batchSize):

#             ThetaTemp = Theta[i:i+batchSize,:]
#             grad = np.multiply((np.dot(X, ThetaTemp.T) - Y[:, i:i+batchSize]), R[:, i:i+batchSize])
#             #lr * 
#             X_grad = np.dot(grad, ThetaTemp) + lam * X
            
#         return X_grad
    
#     @staticmethod
#     def stochasticGradientUser(Y, R, numUser, numMovies, numFeature, lam, batchSize, X, Theta):
#         # (944 * 10)
#         Theta_grad = np.zeros(Theta.shape)
        
#         # we are calculating the gradient of each user for one movie at a time
#         for i in range(0, numMovies, batchSize):

#             XTemp = X[i:i+batchSize,:]
#             grad = np.multiply((np.dot(XTemp, Theta.T) - Y[i:i+batchSize, :]), R[i:i+batchSize, :])
#             #lr * 
#             Theta_grad = np.dot(grad.T, XTemp) + lam * Theta
            
#         return Theta_grad
    
#     @staticmethod
#     def batchGradient(params, *args):
#         #learning_rate, itr
#         Y, R, numUser, numMovies, numFeature, lam, batchSize, anim, costs, counts = args
#         X = np.reshape(params[0:numMovies * numFeature], (numMovies, numFeature))
#         Theta = np.reshape(params[numMovies * numFeature:], (numUser, numFeature))
        
#         # There is eps option to pass the learning rate but somehow
#         # I don't trust that function.
# #         itr.append(1)
# #         lr = (1. / (1. + 0.01 * len(itr)))

#         X_grad = np.zeros(X.shape)
#         Theta_grad = np.zeros(Theta.shape)
        
#         for i in range(0, numMovies, batchSize):
#             idx = np.where(R[i:i+batchSize, :] == 1)
#             ThetaTemp = Theta[idx[1]]
#             YTemp = Y[i:i+batchSize, idx[1]]
            
#             #lr * 
#             X_grad[i:i+batchSize, :] = ((np.dot(X[i:i+batchSize, :], ThetaTemp.T) - YTemp).dot(ThetaTemp) \
#                                                 + lam * X[i:i+batchSize, :])

#         for i in range(0, numUser, batchSize):
#             idx = np.where(R[:, i:i+batchSize] == 1)
#             XTemp = X[idx[0]]
#             YTemp = Y[idx[0], i:i+batchSize]
            
#             print("Shapes: ", X.shape , XTemp.shape, YTemp.shape, Theta_grad.shape, Theta_grad[i:i+batchSize, :].shape)
#             #lr * 
#             Theta_grad[i:i+batchSize, :] = ((np.dot(XTemp,Theta[i:i+batchSize, :].T) - YTemp).T.dot(XTemp) \
#                                                 + lam * Theta[i:i+batchSize, :])

#         params = np.concatenate((X_grad, Theta_grad)).ravel()

#         return params