In [18]:
import scipy.stats as stats
import matplotlib.pyplot as plt
import matplotlib 
import pandas as pd
import numpy as np
from numpy import random
import pickle
import timeit
from sklearn.model_selection import GridSearchCV
from scipy.io import loadmat
import scipy.optimize as opt
from scipy.sparse.linalg import svds
%matplotlib inline

In [19]:
data=loadmat("data/Lab9/ex9_movies.mat")

In [20]:
Y=data["Y"]
R=data["R"]

In [21]:
def flatten_params(X, theta):
    return np.concatenate((X.ravel(), theta.ravel()))

def recover_params(params, n_movie, n_user, factor_num):
     return params[:n_movie * factor_num].reshape(n_movie, factor_num), params[n_movie * factor_num:].reshape(n_user, factor_num)

def cost_function(params, Y, R, factor_num, lambda_reg):
    n_movie, n_user = Y.shape
    X, theta = recover_params(params, n_movie, n_user, factor_num)
    inner = np.multiply(np.matmul(X, theta.T) - Y, R)
    reg_value=np.square(params).sum() * (lambda_reg / 2)
    return np.square(inner).sum() / 2 + reg_value

def run_gd_step(params, Y, R, factor_num, lambda_reg):
    n_movies, n_user = Y.shape
    X, theta = recover_params(params, n_movies, n_user, factor_num)
    inner = np.multiply(np.matmul(X, theta.T) - Y, R)
    X_grad = np.matmul(inner, theta)
    theta_grad = np.matmul(inner.T, X)
    return flatten_params(X_grad, theta_grad)+lambda_reg*params

In [22]:
ratings = np.zeros(Y.shape[0])
ratings[16] = 4
ratings[32] = 3
ratings[48] = 5
ratings[98] = 4
ratings[128] = 5
ratings[256] = 3
ratings[512] = 5
ratings[753] = 2
ratings[999] = 4
ratings[1001] = 5

In [23]:
Y = np.insert(Y, 0, ratings, axis=1)
R = np.insert(R, 0, ratings != 0, axis=1)

In [24]:
number_of_movies, number_of_users = Y.shape
factor_num = 20 
lambda_reg=10
theta = 3 * np.random.rand(number_of_users, factor_num)
X = 3 * np.random.rand(number_of_movies, factor_num)

In [25]:
params = flatten_params(X, theta)
Y_norm = Y - Y.mean()

In [26]:
res = opt.minimize(fun=cost_function,
                   x0=params,
                   args=(Y_norm, R, factor_num, lambda_reg),
                   method='TNC',
                   jac=run_gd_step)

In [27]:
res

     fun: 67178.77066239035
     jac: array([ 4.55567057e-06, -1.82166305e-06,  7.66621617e-06, ...,
        1.22958213e-07,  8.23059875e-07,  1.58767786e-06])
 message: 'Converged (|f_n-f_(n-1)| ~= 0)'
    nfev: 1800
     nit: 59
  status: 1
 success: True
       x: array([ 0.51013287,  0.86636894,  0.29504827, ...,  0.24678119,
       -0.24482926,  0.49397157])

In [28]:
X_trained, theta_trained = recover_params(res.x, number_of_movies, number_of_users, factor_num)

In [51]:
prediction = np.matmul(X_trained, theta_trained.T)
my_preds = prediction[:, 0] + Y.mean()
idx = np.argsort(my_preds)[::-1]

In [52]:
movie_list = []
with open('data/Lab9/movie_ids.txt', encoding='latin-1') as f:
    for line in f:
        tokens = line.strip().split(' ')
        movie_list.append(' '.join(tokens[1:]))

In [53]:
movie_list = np.array(movie_list)

In [54]:
for m in movie_list[idx][:20]:
    print(m)

Schindler's List (1993)
Shawshank Redemption, The (1994)
Star Wars (1977)
Raiders of the Lost Ark (1981)
Pulp Fiction (1994)
Usual Suspects, The (1995)
Good Will Hunting (1997)
Empire Strikes Back, The (1980)
Casablanca (1942)
Titanic (1997)
Wrong Trousers, The (1993)
Braveheart (1995)
Great Escape, The (1963)
Henry V (1989)
Silence of the Lambs, The (1991)
Wallace & Gromit: The Best of Aardman Animation (1996)
Princess Bride, The (1987)
Bridge on the River Kwai, The (1957)
As Good As It Gets (1997)
Manchurian Candidate, The (1962)


In [74]:
Y_filled = Y_norm.copy()
means=np.mean(Y_filled, axis=1)
for k, raitings in enumerate(Y_filled):
    Y_filled[k, R[k, :]==0]=means[k]
U, sigma, Vt = svds(Y_filled.astype(np.float), k = factor_num)
sigma = np.diag(sigma)
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)

In [75]:
my_preds = all_user_predicted_ratings[:, 0] + Y.mean()
idx = np.argsort(my_preds)[::-1]

In [76]:
movie_list = np.array(movie_list)
for m in movie_list[idx][:20]:
    print(m)

Star Wars (1977)
Fargo (1996)
Return of the Jedi (1983)
Contact (1997)
Raiders of the Lost Ark (1981)
Godfather, The (1972)
Toy Story (1995)
English Patient, The (1996)
Scream (1996)
Pulp Fiction (1994)
Silence of the Lambs, The (1991)
Empire Strikes Back, The (1980)
Liar Liar (1997)
Air Force One (1997)
Twelve Monkeys (1995)
Jerry Maguire (1996)
Rock, The (1996)
Titanic (1997)
Fugitive, The (1993)
Princess Bride, The (1987)
