# Collaborative Filtering: From Loops to Latent Space
Flow: load the provided data, implement the loop cost function, vectorize it, add my own ratings, normalize, train with TensorFlow, and print recommendations. The cool bit: the model invents its own axes (latent factors) without knowing genres.

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from recsys_utils import *  # provided helpers

# Load precalculated params and small ratings
X, W, b, num_movies, num_features, num_users = load_precalc_params_small()
Y, R = load_ratings_small()

print('Y', Y.shape, 'R', R.shape)
print('X', X.shape, 'W', W.shape, 'b', b.shape)


## cost function (loops) + regularization

In [None]:
def cofi_cost_func(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Args:
      X (ndarray (num_movies,num_features)): matrix of item features
      W (ndarray (num_users,num_features)) : matrix of user parameters
      b (ndarray (1, num_users)            : vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
    Returns:
      J (float) : Cost
    """
    nm, nu = Y.shape
    J = 0
    n = W.shape[1]
    W1 = np.transpose(W)
    for j in range(nu):
        w = W[j,:]
        b_j = b[0,j]
        for i in range(nm):
            x = X[i,:]
            y = Y[i,j]
            r = R[i,j]
            J += np.square(r * (np.dot(w,x) + b_j - y ))

    J = J/2
    lambda_w = 0
    for j in range(nu):
        for k in range(n):
            lambda_w += W[j,k]**2
    lambda_x = 0
    for j in range(nm):
        for k in range(n):
            lambda_x += X[j,k]**2

    reg = (lambda_*(lambda_w + lambda_x))/2
    J = J + reg
    return J

lambda_ = 1.0
J_loop = cofi_cost_func(X, W, b, Y, R, lambda_)
print('Cost (loops):', float(J_loop))


## Vectorized cost function (NumPy)

In [None]:
def cofi_cost_func_vec(X, W, b, Y, R, lambda_):
    """Vectorized version of cofi_cost_func."""
    pred = X @ W.T          # (num_movies × num_users)
    pred = pred + b         # b is (1, num_users); broadcasts over rows
    err  = (pred - Y) * R
    J = 0.5 * np.sum(err * err)
    J += 0.5 * lambda_ * (np.sum(W * W) + np.sum(X * X))
    return J

J_vec = cofi_cost_func_vec(X, W, b, Y, R, lambda_)
print('Cost (vectorized):', float(J_vec))
print('abs diff:', abs(float(J_vec) - float(J_loop)))


## Add my ratings and rebuild matrices

In [None]:
movieList, movieList_df = load_Movie_List_pd()

my_ratings = np.zeros(num_movies)
# Examples (indices from small_movie_list.csv)
my_ratings[2700] = 5  # Toy Story 3 (2010)
my_ratings[2609] = 2  # Persuasion (2007)
my_ratings[929]  = 5  # LOTR: Return of the King
my_ratings[246]  = 5  # Shrek (2001)
my_ratings[2716] = 3  # Inception
my_ratings[1150] = 5  # The Incredibles (2004)
my_ratings[382]  = 2  # Amelie
my_ratings[366]  = 5  # Harry Potter 1
my_ratings[622]  = 5  # Harry Potter 2
my_ratings[988]  = 3  # Eternal Sunshine
my_ratings[2925] = 1  # Louis Theroux: Law & Disorder
my_ratings[2937] = 1  # Nothing to Declare
my_ratings[793]  = 5  # Pirates of the Caribbean 1

my_rated = [i for i in range(len(my_ratings)) if my_ratings[i] > 0]

print('\nNew user ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(f'Rated {my_ratings[i]} for  {movieList_df.loc[i,"title"]}')


## Reload ratings, append my user, and normalize

In [None]:
# Reload ratings
Y, R = load_ratings_small()

# Add new user as first column
Y = np.c_[my_ratings, Y]
R = np.c_[(my_ratings != 0).astype(int), R]

# Normalize
Ynorm, Ymean = normalizeRatings(Y, R)
print('Ynorm', Ynorm.shape, 'Ymean', Ymean.shape, 'R', R.shape)


## TensorFlow setup and cost (vectorized TF)

In [None]:
num_movies, num_users = Y.shape
num_features = 100

tf.random.set_seed(1234)
W = tf.Variable(tf.random.normal((num_users,  num_features), dtype=tf.float64), name='W')
X = tf.Variable(tf.random.normal((num_movies, num_features), dtype=tf.float64), name='X')
b = tf.Variable(tf.random.normal((1,          num_users),    dtype=tf.float64), name='b')

optimizer = keras.optimizers.Adam(learning_rate=1e-1)

def cofi_cost_func_v(X, W, b, Y, R, lambda_):
    pred = tf.matmul(X, W, transpose_b=True) + b  # (m,u)
    err  = (pred - tf.constant(Y, dtype=tf.float64)) * tf.constant(R, dtype=tf.float64)
    J    = 0.5 * tf.reduce_sum(tf.square(err))
    J   += 0.5 * lambda_ * (tf.reduce_sum(tf.square(X)) +
                            tf.reduce_sum(tf.square(W)) +
                            tf.reduce_sum(tf.square(b)))
    return J


## Train

In [None]:
iterations = 200
lambda_ = 1.0

for iter in range(iterations):
    with tf.GradientTape() as tape:
        cost_value = cofi_cost_func_v(X, W, b, Ynorm, R, lambda_)
    grads = tape.gradient(cost_value, [X, W, b])
    optimizer.apply_gradients(zip(grads, [X, W, b]))
    if iter % 20 == 0:
        print(f'Training loss at iteration {iter}: {cost_value:0.1f}')


## Predict and print recommendations

In [None]:
p  = np.matmul(X.numpy(), np.transpose(W.numpy())) + b.numpy()
pm = p + Ymean
my_predictions = pm[:, 0]
ix = tf.argsort(my_predictions, direction='DESCENDING')

print('\nTop recommendations (unrated by me):')
shown = 0
for i in range(len(my_predictions)):
    j = int(ix[i])
    if j not in my_rated:
        print(f'Predicting rating {my_predictions[j]:0.2f} for movie {movieList[j]}')
        shown += 1
    if shown == 17:
        break

print('\n\nOriginal vs Predicted ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(f'Original {my_ratings[i]}, Predicted {my_predictions[i]:0.2f} for {movieList[i]}')
