# Question 1 

In [7]:
import numpy as np
from scipy import sparse
from scipy.optimize import check_grad
from scipy.sparse.linalg import svds

def load_movielens(filename, minidata=False):
    """
    Cette fonction lit le fichier filename de la base de donnees
    Movielens, par exemple 
    filename = '~/datasets/ml-100k/u.data'
    Elle retourne 
    R : une matrice utilisateur-item contenant les scores
    mask : une matrice valant 1 si il y a un score et 0 sinon
    """

    data = np.loadtxt(filename, dtype=int,encoding='utf-8')

    R = sparse.coo_matrix((data[:, 2], (data[:, 0]-1, data[:, 1]-1)),
                          dtype=float)
    R = R.toarray()  # not optimized for big data

    # code la fonction 1_K
    mask = sparse.coo_matrix((np.ones(data[:, 2].shape),
                              (data[:, 0]-1, data[:, 1]-1)), dtype=bool )
    mask = mask.toarray()  # not optimized for big data

    if minidata is True:
        R = R[0:100, 0:200].copy()
        mask = mask[0:100, 0:200].copy()

    return R, mask



In [8]:
# Change the path
(R,mask)=load_movielens('/Users/ahmedewva/Downloads/ml-100k/u.data',False)

In [9]:
print(len(R[0]),len(R))

1682 943


In [10]:
number_of_grades=np.count_nonzero(mask)
print(number_of_grades)

100000


The number of grades is 100000

# Question 2.1

\begin{equation}
g(P)=\frac{1}{2}\|1_K\circ(R-Q_0P)\|_F^2+\frac{\rho}{2}\|Q_0\|_F^2+\frac{\rho}{2}\|P\|_F^2
\end{equation}
\begin{equation}
\nabla g(P)=Q_0^T(1_K\circ(Q_0P-R))+\rho P
\end{equation}




# Question 2.2

In [11]:
def objective(P, Q0, R, mask, rho):
    """
    La fonction objectif du probleme simplifie.
    Prend en entree 
    P : la variable matricielle de taille C x I
    Q0 : une matrice de taille U x C
    R : une matrice de taille U x I
    mask : une matrice 0-1 de taille U x I
    rho : un reel positif ou nul

    Sorties :
    val : la valeur de la fonction
    grad_P : le gradient par rapport a P
    """

    tmp = (R - Q0.dot(P)) * mask

    val = np.sum(tmp ** 2)/2. + rho/2. * (np.sum(Q0 ** 2) + np.sum(P ** 2))

    grad_P = -(Q0.T).dot(tmp)+rho*P   # todo

    return val, grad_P


In [25]:
mP=np.array([[1,2,3],[1,7,2]])
mQ=np.array([[5,6],[9,3],[0,1]])
mR=np.array([[1,3,2],[0,9,4],[0,2,4]])
m_mask=np.array([[1,0,1],[0,1,1],[0,0,1]])
rho=0.3
def func(A):
    A=A.reshape(2,3)
    return objective(A, mQ, mR, m_mask, rho)[0]
def grad(A):
    A=A.reshape(2,3)
    return np.ravel(objective(A, Q_, R_, mask_, rho)[1])
error=check_grad(func, grad,np.ravel(P)) 
print(error)

525.7299709276076


# Question 2.3

In [27]:
def gradient(g,P0,gamma,epsilon):
    P=P0
    while (np.sqrt(np.sum(g(P)[1]** 2))>epsilon):
        P=P-gamma*g(P)[1]
    return(P,g(P)[0])


# Question 2.4

In [29]:
Q0, s, P0 = svds(R, k=4,which='LM')
gamma=1/(rho+np.sum((Q0.T.dot(Q0))** 2))
def g(P):
    return objective(P, Q0, R, mask, rho)
epsilon=1
P,min_g=gradient(g, P0, gamma, epsilon)
print("The min of g is: ",min_g)

The min of g is:  369551.7001505275


# Question 2.5

In [33]:
def gradient_with_linesearch(g,P0,epsilon):
    P=P0
    l=0
    a=0.5
    gamma=1
    while (np.sqrt(np.sum(g(P)[1]** 2))>epsilon):
        l=g(P-gamma*g(P)[1])[0]
        r=g(P)[0]-0.5*gamma*np.sum(g(P)[1]** 2)
        while(l>r):
            l+=1
            gamma=2*gamma*(a**(l))
            l=g(P-gamma*g(P)[1])[0]£
            r=g(P)[0]-0.5*gamma*np.sum(g(P)[1]** 2)
        P=P-gamma*g(P)[1]
    return(P,g(P)[0])    

P_linesearch,min_g_linesearch=gradient_with_linesearch(g,P0,1)
print("The minimum of g is: ",min_g_linesearch)

The minimum of g is:  369551.4110548447


# Question 3.1

 If f is libschitz continous then $\nabla^2f$ will be bounded. But since f is polynomial of degree 4 $\nabla^2f$ is polynomial of degree 2 and thus is not bounded.
So f is not libchitz continuous


# Question 3.2


In [None]:
def total_objective(P, Q, R, mask, rho):
    """
    La fonction objectif du probleme complet.
    Prend en entree 
    P : la variable matricielle de taille C x I
    Q : la variable matricielle de taille U x C
    R : une matrice de taille U x I
    mask : une matrice 0-1 de taille U x I
    rho : un reel positif ou nul

    Sorties :
    val : la valeur de la fonction
    grad_P : le gradient par rapport a P
    grad_Q : le gradient par rapport a Q
    """

    tmp = (R - Q.dot(P)) * mask

    val = np.sum(tmp ** 2)/2. + rho/2. * (np.sum(Q ** 2) + np.sum(P ** 2))

    grad_P = -(Q.T).dot(tmp)+rho*P  # todo

    grad_Q = -(tmp).dot(P.T)+rho*Q  # todo

    return val, grad_P, grad_Q

# Question 3.3

In [None]:
# P and Q are given by the alg of the previous question 
def scalar_product(X,Y):
    result=0
    n=len(X)
    for i in range(n):
        result+=X[i]*Y[i]
    return result
len_i=len(P[0])
vector_300=Q[300]
max_product=0
film_index=0
film_vector=[]
for i in range(len_i):
    for k in range(4):
        film_vector.append(P[i][K])
    test=abs(scalar_product(vector_300,film_vector))
    if(test>max_product):
        max_product=test
        film_index=i

    