In [80]:
import matplotlib.pyplot as plt

In [81]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

 $r(i,j)$ scalar; = 1;  if user j rated movie i  = 0  otherwise             
$y(i,j)$ scalar; = rating given by user j on movie  i    (if r(i,j) = 1 is defined)   
$\mathbf{w}^{(j)}$vector; parameters for user j  
$b^{(j)}$ scalar; parameter for user j    
$\mathbf{x}^{(i)}$ vector; feature ratings for movie i          
$n_u$ number of users  
$n_m$ number of movies    
$n$ number of features   
$\mathbf{X}$ matrix of vectors $\mathbf{x}^{(i)}$         
$\mathbf{W}$ matrix of vectors $\mathbf{w}^{(j)}$         
$\mathbf{b}$ vector of bias parameters $b^{(j)}$   
$\mathbf{R}$ matrix of elements $r(i,j)$                  

## Recommender Systems
The goal of a collaborative filtering recommender system is to generate two vectors: For each user, a 'parameter vector' that embodies the movie rastes of a user.
For each movie, a feature vector of the same size which embodies some description of the movie. 

The dot product of the two vectors plus the bias term should produce an estimate of the rating the user might give to that movie.

### Importing dataset

In [82]:
df = pd.read_csv('./dataset/ml-latest-small/ratings.csv')

In [83]:
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [84]:
# get the total number of unique movies
movie_rows = len(df['movieId'].unique())

In [85]:
# list to add ratings by each user for a particular movie
lst = []

In [86]:
df = pd.read_csv("./dataset/ml-latest-small/ratings.csv")
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


The matrix $Y$ (a  $n_m \times n_u$ matrix) stores the ratings $y^{(i,j)}$. The matrix $R$ is an binary-valued indicator matrix, where $R(i,j) = 1$ if user $j$ gave a rating to movie $i$, and $R(i,j)=0$ otherwise. 

In [87]:
nm = len(df['movieId'].unique())   #number of movies
print(f"Total numbers of movies: {nm}")
nu = len(df['userId'].unique())  #number of users
print(f"Total numbers of users: {nu}")

Total numbers of movies: 9724
Total numbers of users: 610


In [88]:
# rating for each movies by each user, 0 if no ratings
Y = df.pivot(index='movieId', columns='userId', values='rating').fillna(0).to_numpy()

In [89]:
# Creating an R matirx which contains 1 if there is a rating and 0 else
R = Y.copy()
R[R != 0] = 1

### 4.1 Collaborative filtering cost function

The collaborative filtering cost function is given by
$$J({\mathbf{x}^{(0)},...,\mathbf{x}^{(n_m-1)},\mathbf{w}^{(0)},b^{(0)},...,\mathbf{w}^{(n_u-1)},b^{(n_u-1)}})= \left[ \frac{1}{2}\sum_{(i,j):r(i,j)=1}(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+ \underbrace{\left[
\frac{\lambda}{2}
\sum_{j=0}^{n_u-1}\sum_{k=0}^{n-1}(\mathbf{w}^{(j)}_k)^2
+ \frac{\lambda}{2}\sum_{i=0}^{n_m-1}\sum_{k=0}^{n-1}(\mathbf{x}_k^{(i)})^2
\right]}_{regularization}
\tag{1}$$
The first summation in (1) is "for all $i$, $j$ where $r(i,j)$ equals $1$" and could be written:

$$
= \left[ \frac{1}{2}\sum_{j=0}^{n_u-1} \sum_{i=0}^{n_m-1}r(i,j)*(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+\text{regularization}
$$

You should now write cofiCostFunc (collaborative filtering cost function) to return this cost.

In [90]:
## non-vectorised implementation

def cofi_cost_func(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Args:
      X (ndarray (num_movies,num_features)): matrix of item features
      W (ndarray (num_users,num_features)) : matrix of user parameters
      b (ndarray (1, num_users)            : vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
    Returns:
      J (float) : Cost
    """
    nm, nu = Y.shape
    J = 0
    regularised_parameter = 0
    regularised_feature = 0
    ### START CODE HERE ###
    for user_ in range(nu):
        user_cost = 0
        regualrized_parameter_user = 0
        
        for movie_ in range(nm):
            
            user_cost += R[movie_, user_] * np.square(((np.dot(W[user_] , X[movie_]) + b[0][user_]) - (Y[movie_, user_])))
           
            
        for parameter_ in range(len(W[user_])):    
            regualrized_parameter_user += np.square(W[user_][parameter_])
            
        J += user_cost
        
        regularised_parameter += regualrized_parameter_user
    
        
        
        
    for feature_ in range(nm):    
        regularised_feature_movie = np.sum(np.square(X[feature_]))
        regularised_feature += regularised_feature_movie
              
    
    ### END CODE HERE ### 
    total_cost = J/2 + ((lambda_/2) * regularised_parameter) + ((lambda_/2) * regularised_feature)

    return total_cost 

In [100]:
def cofi_cost_func_v(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Vectorized for speed. Uses tensorflow operations to be compatible with custom training loop.
    Args:
      X (ndarray (num_movies,num_features)): matrix of item features
      W (ndarray (num_users,num_features)) : matrix of user parameters
      b (ndarray (1, num_users)            : vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
    Returns:
      J (float) : Cost
    """
    # cost=np.sum((np.sum(np.square(np.matmul(X, W.T) + b - Y)  * R, axis = 0 )) ) * 0.5
    # w_regular = (np.sum(np.square(W))) * (lambda_/2)
    # feature_regular = np.sum(np.square(X)) * (lambda_/2)
    
    # total_cost = cost+w_regular+feature_regular
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
    J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J
    

### Set Initial Parameters (W, X), use tf.Variable to track these variables

In [122]:
num_features = 60

In [123]:
W = tf.Variable(tf.random.normal((nu, num_features), dtype=tf.float64), name='W')
X = tf.Variable(tf.random.normal((nm, num_features), dtype=tf.float64), name='X')
b = tf.Variable(tf.random.normal((1, nu), dtype=tf.float64), name='b')


### Initialize an optimizer

In [124]:
optimizer = keras.optimizers.Adam(learning_rate=1e-1)

Let's now train the collaborative filtering model. This will learn the parameters $\mathbf{X}$, $\mathbf{W}$, and $\mathbf{b}$.   
  
The operations involved in learning $w$, $b$, and $x$ simultaneously do not fall into the typical 'layers' offered in the TensorFlow neural network package.  Consequently, the flow used in Course 2: Model, Compile(), Fit(), Predict(), are not directly applicable. Instead, we can use a custom training loop.

Recall from earlier labs the steps of gradient descent.
- repeat until convergence:
    - compute forward pass
    - compute the derivatives of the loss relative to parameters
    - update the parameters using the learning rate and the computed derivatives 
    
TensorFlow has the marvelous capability of calculating the derivatives for you. This is shown below. Within the `tf.GradientTape()` section, operations on Tensorflow Variables are tracked. When `tape.gradient()` is later called, it will return the gradient of the loss relative to the tracked variables. The gradients can then be applied to the parameters using an optimizer. 
This is a very brief introduction to a useful feature of TensorFlow and other machine learning frameworks. Further information can be found by investigating "custom training loops" within the framework of interest.
    


In [125]:
iterations = 1000
lambda_ = 1

for iter in range(iterations):
    with tf.GradientTape() as tape:
        cost_value = cofi_cost_func_v(X, W, b, Y, R, lambda_)
    grads = tape.gradient(cost_value, [X, W, b])
    optimizer.apply_gradients(zip(grads, [X,W,b]))

    # Log periodically.
    if iter % 20 == 0:
        print(f"Training loss at iteration {iter}: {cost_value:0.1f}")

Training loss at iteration 0: 4038446.9
Training loss at iteration 20: 197402.6
Training loss at iteration 40: 78547.1
Training loss at iteration 60: 41028.4
Training loss at iteration 80: 25528.0
Training loss at iteration 100: 18080.1
Training loss at iteration 120: 14066.8
Training loss at iteration 140: 11720.1
Training loss at iteration 160: 10253.1
Training loss at iteration 180: 9276.8
Training loss at iteration 200: 8588.6
Training loss at iteration 220: 8078.3
Training loss at iteration 240: 7682.7
Training loss at iteration 260: 7364.0
Training loss at iteration 280: 7098.7
Training loss at iteration 300: 6872.5
Training loss at iteration 320: 6675.7
Training loss at iteration 340: 6502.0
Training loss at iteration 360: 6347.1
Training loss at iteration 380: 6207.6
Training loss at iteration 400: 6081.3
Training loss at iteration 420: 5966.5
Training loss at iteration 440: 5861.7
Training loss at iteration 460: 5765.9
Training loss at iteration 480: 5678.2
Training loss at it

In [127]:
X = X.numpy()

In [128]:
W = W.numpy()

In [129]:
b = b.numpy()

In [133]:
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [150]:
movie_rating_df = df.groupby('movieId')[['rating']].mean()

In [151]:
movie_rating_df

Unnamed: 0_level_0,rating
movieId,Unnamed: 1_level_1
1,3.920930
2,3.431818
3,3.259615
4,2.357143
5,3.071429
...,...
193581,4.000000
193583,3.500000
193585,3.500000
193587,3.500000


In [152]:
movie_rating_df['predicted_mean_rating'] = np.mean(np.dot(X, W.T) + b, axis=1)

In [153]:
movie_rating_df

Unnamed: 0_level_0,rating,predicted_mean_rating
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,3.920930,3.807059
2,3.431818,3.580240
3,3.259615,3.482260
4,2.357143,3.398358
5,3.071429,3.248326
...,...,...
193581,4.000000,3.528741
193583,3.500000,3.491724
193585,3.500000,3.491724
193587,3.500000,3.491724
