# Movie Recommender System

In [1]:
import numpy as np
import numexpr as ne
import scipy.io
from scipy import optimize
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
ex8_movies = scipy.io.loadmat('data/ex8_movies.mat')
Y = ex8_movies['Y']
R = ex8_movies['R']

In [3]:
print('Y.shape =', Y.shape)
print('R.shape =', R.shape)

Y.shape = (1682, 943)
R.shape = (1682, 943)


In [4]:
# Parameter definitions
n = 100
n_m = np.size(Y, axis=0)
n_u = np.size(Y, axis=1)

## Collaborative filtering cost function

In [5]:
def cofi_cost(params, Y, R, num_users, num_movies, num_features, lamb):
    n = num_features
    n_u = num_users
    n_m = num_movies
    X = params[:n_m * n].reshape(n_m, n) # n_m x n
    Theta = params[n_m * n:].reshape(n_u, n) # n_u x n
    Y_rated = Y * R
    diff = (X @ Theta.T - Y_rated) * R # n_m x n_u
    # Compute cost
    X_reg_val = lamb/2 * np.sum(np.square(X))
    Theta_reg_val = lamb/2 * np.sum(np.square(Theta))
    J = 0.5 * np.sum(np.square(diff)) + X_reg_val + Theta_reg_val
    return J

In [6]:
def cofi_grad(params, Y, R, num_users, num_movies, num_features, lamb):
    n = num_features
    n_u = num_users
    n_m = num_movies
    X = params[:n_m * n].reshape(n_m, n) # n_m x n
    Theta = params[n_m * n:].reshape(n_u, n) # n_u x n
    Y_rated = Y * R
    diff = (X @ Theta.T - Y_rated) * R # n_m x n_u
    # Compute gradient respect to x
    X_grad = diff @ Theta + lamb * X # n_m x n
    Theta_grad = diff.T @ X + lamb * Theta # n_u x n
    # Flatten gradient arrays
    X_grad = X_grad.reshape(1, n_m * n)
    Theta_grad = Theta_grad.reshape(1, n_u * n)
    grad = np.append(X_grad, Theta_grad)
    return grad

### Test cost function

In [7]:
ex8_movieParams = scipy.io.loadmat('data/ex8_movieParams.mat')
X = ex8_movieParams['X']
Theta = ex8_movieParams['Theta']

In [8]:
print('X.shape =', X.shape)
print('Theta.shape =', Theta.shape)

X.shape = (1682, 10)
Theta.shape = (943, 10)


In [9]:
# Get ready to call cofi_cost
n_u = 4
n_m = 5
n = 3;
X_test = X[:n_m, :n]
Theta_test = Theta[:n_u, :n]
params = np.append(X_test.flatten(), Theta_test.flatten())
Y_test = Y[:n_m, :n_u];
R_test = R[:n_m, :n_u];

In [10]:
# Check value
J = cofi_cost(params, Y_test, R_test, n_u, n_m, n, 0)
actual_str = '{0:.2f}'.format(J)
expected_str = '{0:.2f}'.format(22.22)
print('Actual: {0}'.format(actual_str))
print('Expected: {0}'.format(expected_str))
assert actual_str == expected_str

Actual: 22.22
Expected: 22.22


In [11]:
n = 100
n_u = np.size(Y, axis=1)
n_m = np.size(Y, axis=0)
lamb = 0
# X_init = np.random.rand()
print('Number of users:', n_u)
print('Number of movies:', n_m)
print('Number of movie * user:', n_u * n_m)
print('Number of ratings:', np.count_nonzero(R))
print('Rated ratio: {0:.2f}%'.format(np.count_nonzero(R) / (n_u * n_m)* 100))

Number of users: 943
Number of movies: 1682
Number of movie * user: 1586126
Number of ratings: 100000
Rated ratio: 6.30%


In [12]:
X_init = np.random.rand(n_m, n) - 0.5
Theta_init = np.random.rand(n_u, n) - 0.5
params = np.append(X_init.reshape(1, n_m * n), Theta_init.reshape(1, n_u * n))

In [13]:
args = Y, R, n_u, n_m, n, lamb
res = optimize.fmin_cg(f=cofi_cost, x0=params, fprime=cofi_grad, args=args)
print(res)

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 4662
         Function evaluations: 7006
         Gradient evaluations: 7006
[ 0.85764535  0.09093657 -0.14825556 ...,  0.15758502  0.27025324
  0.52137769]


In [17]:
X = res[:n_m * n].reshape(n_m, n)
Theta = res[n_m * n:].reshape(n_u, n)