## Libraries

In [148]:
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np
import copy
import scipy.io
from scipy.optimize import minimize
from scipy.optimize import differential_evolution
import pdb
import seaborn as sns
import random


## Parameter Initilization Function

In [149]:
def initilizeFeat(nu,ni,nf,seed=42):
    np.random.seed(seed)
    Theta=np.random.rand(nu,nf)*0.05
    X=np.random.rand(ni,nf)*0.05
    return X, Theta

## Helper Functions

In [150]:
def flatterRev(x,nu,ni,nf):
    X=x[0:ni*nf].reshape((ni,nf),order='F')
    Theta=x[ni*nf:].reshape((nu,nf),order='F')
    return X,Theta

def flatter(X, Theta):
    x=np.concatenate([X.reshape(X.shape[0]*X.shape[1],order='F'),Theta.reshape(Theta.shape[0]*Theta.shape[1],order='F')])
    return(x)
    

# Cost Function

In [151]:
def costFunc(X,Theta,R,M,la=0):
    R=np.ma.array(R, mask=M)
    #pdb.set_trace()
    e=0.5*np.sum(np.power((np.matmul(Theta,X.T)-R),2))+la*0.5*np.sum(np.power(Theta, 2))+la*0.5*np.sum(np.power(X, 2))
    return(e/np.sum(M==False))


def CF(x,R,M,nu,ni,nf,la=0):
    X, Theta=flatterRev(x,nu,ni,nf)
    error=costFunc(X,Theta,R,M,la=la)
    return error

## Gradient Function

In [152]:
def gradFunc(x,R,M,nu,ni,nf,la=0 ):
    X, Theta=flatterRev(x,nu,ni,nf)
    R=np.ma.array(R, mask=M)
    pdb.set_trace()
    
    e=np.matmul(Theta,X.T)-R
    TG=np.matmul(e,X)+la*Theta
    XG=np.matmul(e.T,Theta)+la*X
    grads=np.concatenate([XG.reshape(XG.shape[0]*XG.shape[1],order='F'),TG.reshape(TG.shape[0]*TG.shape[1],order='F')])
    return grads/np.sum(M==False)

## Training Function

In [153]:
def trainMF(R,M,nf,la=0,seed=42):
    nu=R.shape[0]
    ni=R.shape[1]
    R=np.ma.array(R, mask=M)
    X, Theta=initilizeFeat(nu,ni,nf,seed=seed)
    x=flatter(X, Theta)
    # BFGS crashes
    #res = minimize(CF, x, args=(R,M,nu,ni,nf,la), method='BFGS',jac=gradFunc,options={'disp': True, 'gtol':1e-6}) 
    
    #res = minimize(CF, x, args=(R,M,nu,ni,nf,la), method='Newton-CG',jac=gradFunc,options={'disp': True, 'xtol':1e-6})
    #MSEINIT=CF(x,R,M,nu,ni,nf,la)/np.sum(M==False)
    #print('INITMSE:'+str(MSEINIT))
    res = minimize(CF, x, args=(R,M,nu,ni,nf,la), method='CG',jac=gradFunc,options={ 'disp': True,'gtol':1e-5})
    #MSE=CF(res.x,R,M,nu,ni,nf,la)/np.sum(M==False)
    MSE=CF(res.x,R,M,nu,ni,nf,la)
    return(MSE, res,nu,ni,nf)



## Predicting Function

In [154]:
def Predict(res,nu,ni,nf,la=0):
    X, Theta=flatterRev(res.x,nu,ni,nf)
    predict=np.matmul(Theta,X.T)
    return(predict)
    

# Spliting Training and Test Datasets

In [155]:
def splitMatrix(R,M,testPer):
    trainPer=1-testPer
    num_user=R.shape[1]
    num_movie=R.shape[0]
    overallRating=np.sum(M)
    testsize=testPer*overallRating
    testsize=testsize.astype(int)


    #split tarining and test dataset
    random.seed( 9273482 )
    ind1, ind2=np.where(M==1)
    testSamples=random.sample(range(ind1.shape[0]), testsize)
    testInd1=ind1[testSamples]
    testInd2=ind2[testSamples]

    trainR=copy.copy(R)
    trainM=copy.copy(M)
    trainR[testInd1,testInd2]=0
    trainM[testInd1,testInd2]=0


    M= (trainM==0)
    trainR=np.ma.array(trainR, mask=M)
    mu=np.average(trainR,axis=1)

    testR=copy.copy(R)
    testM=np.zeros(shape = (testR.shape[0],testR.shape[1]))
    testM[testInd1,testInd2]=1
    tM=(testM==0)
    testR=testR*testM
    testR=np.ma.array(testR, mask=tM)
    return trainR, M, testR, tM, mu

## Test Error Function

In [156]:
def testMF(tR,tM,predict):
    tR=np.ma.array(tR, mask=tM)
    e=np.abs(tR-predict)
    #e[e<1]=0
    #e[e>5]=5
    #pdb.set_trace()
    testMSE=np.sum(np.power(e,2))/np.sum(tM==False)
    
    #e[e<1]=1
    #e[e>1]=0
    #e=np.ma.array(e, mask=tM)
    #import pdb; pdb.set_trace()
    #ErrPer=np.sum(e)/np.sum(tM==False)
    return(testMSE)



def accuracyMF(tR,tM,predict):
    tR=np.ma.array(tR, mask=tM)
    diff=tR-predict
    e=np.absolute(diff)
    diff=np.ma.array(diff, mask=tM)
    #e[e<1]=0
    #e[e>5]=5
    #pdb.set_trace()
    #testMSE=np.sum(np.power(e,2))/np.sum(tM==False)
    #pdb.set_trace()
    
    e[e>=1]=0
    e[e!=0]=1
    #e[e!=0]=1
    e=np.ma.array(e, mask=tM)
    #import pdb; pdb.set_trace()
    accu=np.sum(e)/np.sum(tM==False)
    return(accu)


def plotDensity(tR,tM,FinalPredict):
    tR=np.ma.array(tR, mask=tM)
    #initPredict=np.ma.array(initPredict, mask=tM)
    FinalPredict=np.ma.array(FinalPredict, mask=tM)
    #diff1=tR-initPredict
    #diff1=np.ma.array(diff1, mask=tM)
    
    #diff2=tR-FinalPredict
    #diff2=np.ma.array(diff2, mask=tM)
    plt.style.use(['dark_background'])
    sns.kdeplot(tR.flatten(), shade=True, bw=.5, color="blue")
    #sns.kdeplot(initPredict.flatten(), shade=True, bw=.5, color="pink")
    sns.kdeplot(FinalPredict.flatten(), shade=True, bw=.5, color="olive")
    plt.show()
    return

## lOOKING FOR THE BEST PARAMETER Setting

In [157]:
mat = scipy.io.loadmat('C:/Users/bagheri/Desktop/recommenderSystem/ex8_movies.mat')
R=mat['Y']
M=mat['R']


trainR, M, testR, tM, mu=splitMatrix(R,M,0.1)
#print(trainM)
#NF=[1,2,10,20,30,40,50,60,70,80,90,100, 110, 150, 200]
NF=[10,20,30,50,100,200]
NF=[]
myseed=5623
for nf in NF:
    trainR=trainR-mu[:,None]
    trainingError, res,nu,ni,nf=trainMF(trainR,M,nf=nf,la=1,seed=myseed)
    mypredict=Predict(res,nu,ni,nf,la=0)
    zeropredict=np.zeros(shape = (mypredict.shape[0],mypredict.shape[1]))
    #print(mypredict)
    testR=testR-mu[:,None]
    testError=testMF(testR,tM,mypredict)
    accuracy=accuracyMF(testR,tM,mypredict)
    accuracyTrain=accuracyMF(trainR,M,mypredict)
    accuracyz=accuracyMF(testR,tM,zeropredict)
    accuracyTrainz=accuracyMF(trainR,M,zeropredict)
    print('[nf='+str(nf)+']'+'Training Error:'+str(trainingError))
    print('[nf='+str(nf)+']'+'Test Error:'+str(testError))
    print('[nf='+str(nf)+']'+'Accuracy After Optimization:test:'+str(accuracy))
    print('[nf='+str(nf)+']'+'Accuracy If we always guess 0:test:'+str(accuracyz))
    print('[nf='+str(nf)+']'+'Accuracy After Optimization:train:'+str(accuracyTrain))
    print('[nf='+str(nf)+']'+'Accuracy If we always guess 0:train:'+str(accuracyTrainz))
    plotDensity(testR,tM,mypredict)


# Building the Recommender System Model

In [158]:
def buildRSModel(R,M,mu=None, nf=10,la=0,seed=42, movie_names=None):
    trainR=copy.copy(R)
    trainM=copy.copy(M)

    trainR=np.ma.array(trainR, mask=trainM)
    if mu is None:
        mu=np.average(trainR,axis=1)
    trainR=trainR-mu[:,None]
    trainingError, res,nu,ni,nf=trainMF(trainR,M,nf=nf,la=la,seed=seed)
    model={'trainingError': trainingError, 'res':res,'nu':nu,'ni':ni,'nf':nf, 'la':la, 'movie_names':movie_names, 'mu':mu,
          'R':R,'M':M}
    return model

#Example
#mat = scipy.io.loadmat('C:/Users/bagheri/Desktop/recommenderSystem/ex8_movies.mat')
#R=mat['Y']
#M=mat['R']
#trainR=copy.copy(R)
#trainM= (M==0)
#movie_names = pd.read_csv('C:/Users/bagheri/Desktop/recommenderSystem/movie_ids.txt', header = None, delimiter=';')[1]

#mymodel=buildRSMolde(R=trainR,M=trainM,mu=None, nf=100,la=0,seed=42, movie_names=movie_names)


# Predicting for User X

In [159]:
def predictForUserX(user_Id,model,movie_Id=None):
    trainingError=model['trainingError']
    res=model['res']
    nu=model['nu']
    ni=model['ni']
    nf=model['nf']
    la=model['la']
    movie_names=model['movie_names']
    mu=model['mu']
    R=model['R']
    M=model['M']
    mypredict=Predict(res,nu,ni,nf,la=0)
    mydata=pd.DataFrame()
    Pred=mypredict[:,user_Id]+mu[user_Id]
    mydata['names']=movie_names
    mydata['predictedRating']=Pred
    mydata['originalrating']=R[:,user_Id]
    mydata=mydata.sort_values(by=['predictedRating'], ascending=False)
    output=mydata[mydata['originalrating'] == 0]
    #print(output)
    return(output)

#Example
#predictForUserX(934,mymodel,movie_Id=None).head()


# Enter your own rating

In [160]:
def weRecommend(myratings,modelparam=None):
    movie_names = pd.read_csv('C:/Users/bagheri/Desktop/recommenderSystem/movie_ids.txt', header = None, delimiter=';')[1]
    mat = scipy.io.loadmat('C:/Users/bagheri/Desktop/recommenderSystem/ex8_movies.mat')
    R=mat['Y']
    M=mat['R']
    trainR=copy.copy(R)
    trainM= (M==0)
    num_user=R.shape[1]
    num_movie=R.shape[0]
    
    print(R.shape)
    #mymu=np.average(myratings['rating'])
    myratings=myratings.sort_values(by=['names'], ascending=False)
    #movie_names=movie_names.sort_values(by=['names'], ascending=False)
    movies=copy.copy(movie_names)
    movies=movies.sort_values( ascending=False)
    indices=movies[movies.isin( myratings['names'])].index
    #pdb.set_trace()
    newuserratingR=np.zeros(num_movie)
    newuserratingM=np.zeros(num_movie)
    newuserratingR[indices]=myratings['rating']
    #newuserratingR[indices]=newuserratingR[indices]-
    newuserratingM[indices]=1
    
    newuserratingM= (newuserratingM==0)
    trainR=np.concatenate((newuserratingR[:,None],trainR),axis=1)
    trainM=np.concatenate((newuserratingM[:,None],trainM),axis=1)
    
    
    if modelparam is None:
        mymodel=buildRSModel(R=trainR,M=trainM,mu=None, nf=100,la=0.1, movie_names=movie_names)
    else:
        nf=modelparam['nf']
        la=modelparam['la'] 
        mymodel=buildRSModel(R=trainR,M=trainM,mu=None, nf=nf,la=la, movie_names=movie_names)
        
    bests=predictForUserX(0,mymodel,movie_Id=None).head(15)
    worsts=predictForUserX(0,mymodel,movie_Id=None).tail(15)
    bests=bests.iloc[:, :-1]
    worsts=worsts.iloc[:, :-1]
    output={'bests':bests,'worsts':worsts}
    return output


