In [1]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
import scipy.io as sio
import scipy.optimize as opt

In [2]:
#Utilities
def fold(*arg):
    return np.concatenate([arr.ravel() for arr in arg])

def unfold(params, num_users, num_movies, num_features):
    X = params[:num_movies * num_features].reshape(num_movies, num_features)
    Theta = params[num_movies * num_features:].reshape(num_users, num_features)
    return X, Theta

![Alt text](static/img/collaborative_filtering_cost.png)

In [3]:
def cost_fn(params, Y, R, num_users, num_movies, num_features, lmbda=0.0):
    """
    Collaborative filtering cost function.
    
    Parameters
    ----------
    params : array_like
        The parameters which will be optimized. This is a one
        dimensional vector of shape (num_movies x num_users, 1). It is the 
        concatenation of the feature vectors X and parameters Theta.
    
    Y : array_like
        A matrix of shape (num_movies x num_users) of user ratings of movies.
    
    R : array_like
        A (num_movies x num_users) matrix, where R[i, j] = 1 if the 
        i-th movie was rated by the j-th user.
    
    num_users : int
        Total number of users.
    
    num_movies : int
        Total number of movies.
    
    num_features : int
        Number of features to learn.
    
    lambda_ : float, optional
        The regularization coefficient.
    
    Returns
    -------
    J : float
        The value of the cost function at the given params.
    """
    X, Theta  = unfold(params, num_users, num_movies, num_features)
    regularization = (.5 * lmbda) * ((Theta ** 2).sum() + (X ** 2).sum())  
    J = .5 * (((X @ Theta.T - Y) * R) ** 2).sum() + regularization
    return J

![Alt text](static/img/collaborative_filtering_gradient.png)

In [4]:
def gradient(params, Y, R, num_users, num_movies, num_features, lmbda=0.0):
    """
    Gradient for collaborative filtering cost function
    
    Parameters
    ----------
    params : array_like
        The parameters which will be optimized. This is a one
        dimensional vector of shape (num_movies x num_users, 1). It is the 
        concatenation of the feature vectors X and parameters Theta.
    
    Y : array_like
        A matrix of shape (num_movies x num_users) of user ratings of movies.
    
    R : array_like
        A (num_movies x num_users) matrix, where R[i, j] = 1 if the 
        i-th movie was rated by the j-th user.
    
    num_users : int
        Total number of users.
    
    num_movies : int
        Total number of movies.
    
    num_features : int
        Number of features to learn.
    
    lambda_ : float, optional
        The regularization coefficient.
    
    Returns
    -------
    grad : array_like
        The gradient vector of the cost function at the given params.
        grad has a shape (num_movies x num_users, 1)
    """
    X, Theta  = unfold(params, num_users, num_movies, num_features)
    
    error = (X @ Theta.T - Y) * R
    
    X_grad  = error @ Theta + lmbda * X
    Theta_grad  = error.T @ X + lmbda * Theta
    return fold(X_grad, Theta_grad)

In [5]:
def normalizeRatings(Y, R):
    """
    Preprocess data by subtracting mean rating for every movie (every row).
    Parameters
    ----------
    Y : array_like
        The user ratings for all movies. A matrix of shape (num_movies x num_users).
    R : array_like
        Indicator matrix for movies rated by users. A matrix of shape (num_movies x num_users).
    Returns
    -------
    Ynorm : array_like
        A matrix of same shape as Y, after mean normalization.
    Ymean : array_like
        A vector of shape (num_movies, ) containing the mean rating for each movie.
    """
    m, n = Y.shape
    Ymean = np.zeros(m)
    Ynorm = np.zeros(Y.shape)

    for i in range(m):
        idx = R[i, :] == 1
        Ymean[i] = np.mean(Y[i, idx])
        Ynorm[i, idx] = Y[i, idx] - Ymean[i]

    return Ynorm, Ymean

In [6]:
# Data preparation
data = sio.loadmat('static/datasets/anomaly/ex8_movies.mat')
Y, R = data['Y'], data['R']

In [8]:
# creating movies list
with open('static/datasets/anomaly/movie_ids.txt') as f:
    movies = f.readlines()

movie_names = []    
for movie in movies:
    movie = ' '.join(movie[:-1].split()[1:])
    movie_names.append(movie)

In [9]:
my_ratings = np.zeros(len(movie_names))

my_ratings[0] = 4
my_ratings[97] = 2
my_ratings[6] = 3
my_ratings[11]= 5
my_ratings[53] = 4
my_ratings[63] = 5
my_ratings[65] = 3
my_ratings[68] = 5
my_ratings[182] = 4
my_ratings[225] = 5
my_ratings[354] = 5

print('New user ratings:')
print('-----------------')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(f'Rated {my_ratings[i]} stars: {movie_names[i]}')

New user ratings:
-----------------
Rated 4.0 stars: Toy Story (1995)
Rated 3.0 stars: Twelve Monkeys (1995)
Rated 5.0 stars: Usual Suspects, The (1995)
Rated 4.0 stars: Outbreak (1995)
Rated 5.0 stars: Shawshank Redemption, The (1994)
Rated 3.0 stars: While You Were Sleeping (1995)
Rated 5.0 stars: Forrest Gump (1994)
Rated 2.0 stars: Silence of the Lambs, The (1991)
Rated 4.0 stars: Alien (1979)
Rated 5.0 stars: Die Hard 2 (1990)
Rated 5.0 stars: Sphere (1998)


In [10]:
Y = np.hstack([my_ratings[:, None], Y])
R = np.hstack([(my_ratings > 0)[:, None], R])

#  Normalize Ratings
Ynorm, Ymean = normalizeRatings(Y, R)

#  Useful Values
num_movies, num_users = Y.shape
num_features = 10

# Set Initial Parameters (Theta, X)
X = np.random.randn(num_movies, num_features)
Theta = np.random.randn(num_users, num_features)

initial_parameters = fold(X, Theta)

# Set Regularization
lmdba = 10
res = opt.minimize( fun=cost_fn,
                    x0=initial_parameters,
                    jac=gradient,
                    args=(Ynorm, R, num_users, num_movies, num_features, lmdba),
                    method='TNC'
                  )
print(res)
X, Theta = unfold(res.x, num_users, num_movies, num_features)

     fun: 38964.467859979704
     jac: array([-1.26009187e-05, -1.69577337e-07, -5.41634800e-06, ...,
       -6.88409481e-08,  6.59058097e-07,  1.24555160e-07])
 message: 'Converged (|f_n-f_(n-1)| ~= 0)'
    nfev: 490
     nit: 39
  status: 1
 success: True
       x: array([-0.92239008,  0.67312521, -0.06237038, ...,  0.45189284,
       -0.82026653, -0.07700912])


In [11]:
p = np.dot(X, Theta.T)
my_predictions = p[:, 0] + Ymean

ix = np.argsort(my_predictions)[::-1]

print('Top recommendations for you:')
print('----------------------------')
for i in range(10):
    j = ix[i]
    print(f'Predicting rating {round(my_predictions[j], 2)} for movie {movie_names[j]}')

print('\nOriginal ratings provided:')
print('--------------------------')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(f'Rated {my_ratings[i]} for {movie_names[i]}')

Top recommendations for you:
----------------------------
Predicting rating 5.0 for movie Prefontaine (1997)
Predicting rating 5.0 for movie Someone Else's America (1995)
Predicting rating 5.0 for movie They Made Me a Criminal (1939)
Predicting rating 5.0 for movie Santa with Muscles (1996)
Predicting rating 5.0 for movie Entertaining Angels: The Dorothy Day Story (1996)
Predicting rating 5.0 for movie Marlene Dietrich: Shadow and Light (1996)
Predicting rating 5.0 for movie Aiqing wansui (1994)
Predicting rating 5.0 for movie Great Day in Harlem, A (1994)
Predicting rating 5.0 for movie Saint of Fort Washington, The (1993)
Predicting rating 5.0 for movie Star Kid (1997)

Original ratings provided:
--------------------------
Rated 4.0 for Toy Story (1995)
Rated 3.0 for Twelve Monkeys (1995)
Rated 5.0 for Usual Suspects, The (1995)
Rated 4.0 for Outbreak (1995)
Rated 5.0 for Shawshank Redemption, The (1994)
Rated 3.0 for While You Were Sleeping (1995)
Rated 5.0 for Forrest Gump (1994)
R

In [39]:
rang =  range(10)

In [42]:
try_counter = 0

while True:
    try:
        if try_counter == 4:
            break
        for page in rang:
            if page == 2:
                raise ValueError('sda') 
            print(page)
        break

    except ValueError:
        try_counter += 1  
        print('Oops!')
#         time.sleep(120)

0
1
Oops!
0
1
Oops!
0
1
Oops!
0
1
Oops!
