# SD-TSIA211 : TP1 - RECOMMENDATION

## Remi Jaylet & Romain Louvet

### imports

In [63]:
from movielens_utils import load_movielens, objective, total_objective_vectorized
import numpy as np 
from scipy.sparse.linalg import svds
from scipy.optimize import check_grad, line_search

### 1 - Presentation of the models

#### Question 1.1

In [51]:
R, mask = load_movielens("ml-100k/u.data", minidata=False)
print("R = ", R)
print("mask = ", mask)
print("dimensions = (", len(R), ",", len(R[0]), ")")
print("number of ratings :", sum(sum(mask)))

R =  [[5. 3. 4. ... 0. 0. 0.]
 [4. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [5. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 5. 0. ... 0. 0. 0.]]
mask =  [[ True  True  True ... False False False]
 [ True False False ... False False False]
 [False False False ... False False False]
 ...
 [ True False False ... False False False]
 [False False False ... False False False]
 [False  True False ... False False False]]
dimensions = ( 943 , 1682 )
number of ratings : 100000


#### Question 1.2

In [52]:
R, mask = load_movielens("ml-100k/u.data", minidata=True)
print("dimensions = (", len(R), ",", len(R[0]), ")")

dimensions = ( 100 , 200 )


When the minidata option is activated, the function only returns a small portion of the R matrix of only 100 users over 200 films.

The dimensions of the matric suggest that are ratings from 943 users on 1682 movies.
By calculation the number of True values in the mask, we can we that there is 100000 ratings ranging from 0 to 5.

### 2 - Find P when $ Q^0 $ is fixed

#### Question 2.1

According to the formula giving in the text : $\displaystyle g(P)=\frac{1}{2}\sum_{u\in U,i\in I}\mathbb{1}_{K}(u,i)\left(R_{u,i}-\sum_{f\in F}Q^0_{u,f}P_{f,i}\right)^2+\frac{\rho}{2}\lVert Q^0\rVert_{F}^2+\frac{\rho}{2}\sum_{i\in I,f\in F}P_{f,i}^2$

Hence, $\forall{k\in F},\forall{l\in I}, \displaystyle \frac{\partial g}{\partial P_{k,l}}(P)=\sum_{u\in U,i\in I}\mathbb{1}_{K}(u,i)\delta_{i,l}\left(R_{u,i}-\sum_{f\in F}Q^0_{u,f}P_{f,i}\right)(-Q^0_{u,k})+\rho P_{k,l}$

Thus, $\displaystyle \nabla g(P)={Q^0}^T\left(\mathbb{1}_{K}\circ(Q^0P-R)\right)+\rho P$

#### Question 2.2

In [64]:
# initialize parameters

R, mask = load_movielens("ml-100k/u.data")
Q0,S,P0 = svds(R,k=4)
rho = 0.3

# define both function to compute the gradient

def g(P):
    P = np.reshape(P,(4,1682))
    val = objective(P,Q0,R,mask,rho)[0]
    return val

def grad_g(P):
    P = np.reshape(P,(4,1682))
    grad_P = objective(P, Q0, R, mask, rho)[1]
    return np.ravel(grad_P)

In [67]:
error = check_grad(g, grad_g, x0=np.ravel(P0))
print("error on the gradient :", error)

error on the gradient : 1.1190771584292862


#### Question 2.3

In [68]:
# compute the minimum of a function using the gradient method

def gradient(fonction,P0,gamma,epsilon):
    val, grad = fonction(P0,Q0,R,mask,rho)
    while np.sqrt(np.sum(grad**2)) > epsilon:
        P0 = P0 - gamma * grad
        grad = fonction(P0,Q0,R,mask,rho)[1]
    val = fonction(P0,Q0,R,mask,rho)[0]
    return P0, val

In [69]:
gamma = 1/(rho + np.sqrt(np.sum((Q0.T@Q0)**2)))
epsilon = 1 
print("The minimum of g is", gradient(objective,P0,gamma,epsilon)[1])
print("The argmin of g is ", gradient(objective,P0,gamma,epsilon)[0])

The minimum of g is 369551.5499148193
The argmin of g is  [[-2.23284225e+00  4.90664665e-01  9.98919015e+00 ... -4.74987005e-01
   3.92212474e-02  8.67808467e-01]
 [ 4.53777371e+00 -1.35365298e+01 -2.56785789e+00 ...  3.63533351e-01
  -3.15394841e-01 -1.77455812e-01]
 [-2.02978127e+01 -4.68180885e-01 -1.07452929e+01 ... -3.43502581e-01
   8.21005494e-02  1.54999171e-01]
 [ 5.76416726e+01  2.77454348e+01  1.98640643e+01 ...  6.09197523e-02
   6.75813808e-01  6.32544121e-01]]


#### Question 2.5

In [70]:
# add line search method

def gradient_2(fonction,P0,epsilon):
    val, grad = fonction(P0, Q0, R, mask, rho)
    while np.sqrt(np.sum(grad**2))> epsilon:
        gamma = line_search(g,grad_g,np.ravel(P0),-np.ravel(grad))[0]
        P0 = P0 - gamma * grad
        grad = fonction(P0, Q0, R, mask, rho)[1]
    val = fonction(P0, Q0, R, mask, rho)[0]
    return P0, val

In [71]:
print("The minimum of g is", gradient_2(objective,P0,1)[1])
print("The argmin of g is ", gradient_2(objective,P0,1)[0])

The minimum of g is 369551.4110548445
The argmin of g is  [[-2.23398105e+00  4.90322723e-01  9.98772528e+00 ... -4.74423899e-01
   3.91619850e-02  8.66849934e-01]
 [ 4.53883470e+00 -1.35332793e+01 -2.56588860e+00 ...  3.63102008e-01
  -3.14917972e-01 -1.77259625e-01]
 [-2.02984823e+01 -4.65096299e-01 -1.07474683e+01 ... -3.43094903e-01
   8.19763912e-02  1.54827764e-01]
 [ 5.76420231e+01  2.77486548e+01  1.98678781e+01 ...  6.08473595e-02
   6.74790793e-01  6.31843664e-01]]


### 3 - Resolution of the full problem

#### Question 3.1

$ f $ is proportional to $ || PQ ||_F ^2 $ so the function is polynomial of degree 4 (square of norm). As the gradient of f  is unbounded in norm, it is not Lipschitz continu.

#### Question 3.2

In [72]:
# We use the same strategy than before and apply the gradient method

def g_3(PQ):
    val = total_objective_vectorized(PQ,R,mask,rho)[0]
    return val

def grad_3(PQ):
    grad = total_objective_vectorized(PQ,R,mask,rho)[1]
    return np.ravel(grad)

def line_searh_grad_generalized(fonction,P0Q0,epsilon):
    val, grad = fonction(P0Q0,R,mask,rho)
    while np.sqrt(np.sum(grad**2)) > epsilon:
        gamma = line_search(g_3, grad_3, P0Q0, -np.ravel(grad))[0]
        P0Q0 = P0Q0 - gamma * grad
        grad = fonction(P0Q0, R, mask, rho)[1]
    val = fonction(P0Q0, R, mask, rho)[0]
    return P0Q0, val

#### Question 3.3

In [73]:
P0Q0 = np.concatenate([np.ravel(P0),np.ravel(Q0)])

print("The minimum of g is", line_searh_grad_generalized(total_objective_vectorized,P0Q0,100)[1])
print("The argmin of g is ", line_searh_grad_generalized(total_objective_vectorized,P0Q0,100)[0])

The minimum of g is 35976.86706042895
The argmin of g is  [-0.31075175  0.02359593  0.80690923 ... -0.31753188 -0.8337803
  1.98480307]


In [74]:
i,j = R.shape[1], R.shape[0]
F = argmin.shape[0] // (i + j)
Pv = argmin[0:i*F]
Qv = argmin[i*F:]
Ph = np.reshape(Pv, (F, i))
Qh = np.reshape(Qv, (j, F))

score = Qh@Ph * (1-mask) # elimination of the seen films
print('The recommended movie for user 300 is the movie ', np.argmax(user_score[300,:]))

The recommended movie for user 300 is the movie 312
