In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import numpy.matlib    
import pandas as pd
import scipy as sc
import csv
import math
import random
from sklearn.model_selection import train_test_split
from scipy.sparse import isspmatrix, csc_matrix, csr_matrix
from scipy.sparse.linalg import eigsh, svds
from numpy.linalg import svd
import os
from sklearn.cluster import KMeans
from sklearn.preprocessing import OneHotEncoder
import io

In [2]:
from skopt import gp_minimize
from skopt.space import Real, Integer

In [3]:
eps                  = np.finfo(float).eps

In [4]:
def createList(r1,r2):
    return [item for item in range(r1,r2 + 1)]

In [5]:
def userSim(Y): 
    print("In userSim")
    N,M              = Y.shape  
    
    Y01              = Y.copy()
    Y01[Y01>0]       = 1
    
    tmp              = (Y**2)@(Y01.T)
    tmp              = tmp + tmp.T
    tmp              = tmp - 2*(Y@(Y.T))
    
    simScore         = 1.0/(1 + np.sqrt(tmp))
    
    nominator        = Y01@Y01.T
    
    denominator      = np.sum(Y01,axis=1).reshape(-1,1)
    denominator      = np.matlib.repmat((denominator),1,N)
    denominator      = denominator + denominator.T
    denominator      = denominator - nominator
    
    B1               = nominator/denominator
    B1[np.isnan(B1)] = 0
    
    simScore         = simScore*B1
    return simScore

In [6]:
def evecs(A,nEvecs):
    npix             = A.shape[0]
    useSparse        = isspmatrix(A)
    
    dd               = 1/((np.sum(A,axis=0))+eps)
    dd               = np.sqrt(dd)

    if(useSparse):
        DD           = sc.sparse.diags(dd)
    else:
        DD           = np.diag(dd)
    
    L                = DD@A@DD
    
    if(useSparse==1):
        ss,v         = eigsh(L,nEvecs,sigma=1)
        ss           = np.flip(ss)
        V            = np.flip(v,axis=1)
    else:
        u,ss,v       = svd(L)
        V            = u[:, :nEvecs]
        
    ss               = ss.reshape(-1,1)
    ss               = ss[:nEvecs]
    
    return V,ss

In [7]:
def spectralClu(A=None,K=None): 
    # A: Affinity Matrix, Higher value -> more similar
    # K: Number of CLuster
    n                = A.shape[0]
    
    D                = np.sum(A,axis=1).reshape(-1,1)
    D[D==0]          = eps
    D                = sc.sparse.spdiags((1.0/np.sqrt(D)).T,0,n,n)
    
    L                = D@A@D
    V,evals          = evecs(L,K)
    
    Vnorm            = (np.linalg.norm(V,axis=1) + eps).reshape(-1,1)
    
    V                = V/Vnorm

    kmeans           = KMeans(n_clusters=K,max_iter=maxiter,random_state=0).fit(V)
    idx              = kmeans.predict(V)

    return idx

In [8]:
def aggregratePrediction(X,UGidx):
    n,m                  = X.shape
    K1                   = len(np.unique(UGidx))
    ctmp_d               = np.ones(n)
    ctmp                 = csc_matrix((ctmp_d,(UGidx.reshape(-1,),range(n))),shape=(K1,n))

    GFreq                = np.bincount(UGidx.reshape(-1,)).reshape(-1,1)
    XPerA                = (ctmp@X)/np.matlib.repmat((GFreq),1,m)
    
    
    return XPerA

In [9]:
def MAE(a,b):
    return np.sum(abs(a*(b!=0) - b))/np.sum(b!=0)

In [10]:
def RMSE(X,Y):
    tmp                        = ((Y - X)*(Y!=0))**2
    
    return math.sqrt(np.sum(tmp)/np.sum(Y!=0))

In [11]:
def AUC(Ypre,Yt,cutoff):
    userWithRating             = (np.sum(Yt!=0,axis=1)>0)
    Yt                         = Yt[userWithRating,:]
    Ypre                       = Ypre[userWithRating,:]
    N,M                        = Yt.shape
    auc                        = 0
    uWithPair                  = 0
    for user in range(N):
        userRating             = Yt[user,:]
        userPrediction         = Ypre[user, :]
        ratedIdx               = userRating>0
        binUserRating          = 2*(userRating[ratedIdx]>=cutoff)-1
        predForKnown           = userPrediction[ratedIdx]
        
        posIdx                 = np.where(binUserRating==1)
        posIdx                 = posIdx[0].T

        negIdx                 = np.where(binUserRating==-1)
        negIdx                 = negIdx[0]
        
        if(np.size(posIdx)!=0 and np.size(negIdx)!=0):
            negIdxGrid         = np.matlib.repmat(negIdx,len(posIdx),1)
            ttlNeg             = np.size(negIdxGrid)
            negIdxGrid         = negIdxGrid.reshape(ttlNeg,1,order='F')
            posIdxGrid         = np.matlib.repmat(posIdx.reshape(-1,1),len(negIdx),1)
            pairs              = np.concatenate((negIdxGrid,posIdxGrid),axis=1)
            pairsPred          = np.concatenate((pairs,predForKnown[negIdxGrid],predForKnown[posIdxGrid]),axis = 1)
            pairsPredWithScore = np.concatenate([pairsPred,(0.5*(pairsPred[:,2]==pairsPred[:,3]).reshape(-1,1)),(1*(pairsPred[:,2]<pairsPred[:,3]).reshape(-1,1))],axis=1)
            auc                = auc + (np.sum(pairsPredWithScore[:,4:6]))/pairsPredWithScore.shape[0]
            uWithPair          = uWithPair + 1    
    auc                        = auc/uWithPair
    
    return auc

In [12]:
def precAtK(Ypre,Yt,k,cutoff):
    # %Yt  : Test Set of size n *m
    # %Ypre: Prediction set of size n *m
    # %cutoff : threshold for relevant item
    # %k: precision@k

    userWithRating             = (np.sum(Yt!=0,axis=1)>0)
    Yt                         = Yt[userWithRating,:]
    Ypre                       = Ypre[userWithRating,:]
    N,M                        = Yt.shape
    prec                       = np.zeros((1,k))

    for user in range(N):
        userRating             = Yt[user, :]
        
        ratedRelevantIdx       = np.where(userRating>=cutoff)
        ratedRelevantIdx       = ratedRelevantIdx[0]

        ratedIdx               = np.where(userRating!=0)
        ratedIdx               = ratedIdx[0]

        userPrediction         = Ypre[user, :]
        prediction             = userPrediction[ratedIdx]
        sortesPIdxTmp          = np.argsort(prediction)[::-1]
        sortesPIdx             = ratedIdx[sortesPIdxTmp]
        nz                     = len(sortesPIdx)
        for kNo in range(k):
            intrsct            = np.intersect1d(sortesPIdx[0:(min(kNo,nz)+1)],ratedRelevantIdx)
            mx                 = max(min(kNo+1,nz),eps)
            prec[:,kNo]        = prec[:,kNo] + len(intrsct)/mx
            
    prec                       = prec/N
    
    return prec

In [13]:
def recallAtK(Ypre,Yt, k, cutoff):
    # %Yt  : Test Set of size n *m
    # %Ypre: Prediction set of size n *m
    # %cutoff : threshold for relevant item
    # %k: recall@k

    userWithRating             = (np.sum(Yt!=0,axis=1)>0)
    Yt                         = Yt[userWithRating,:]
    Ypre                       = Ypre[userWithRating,:]
    N,M                        = Yt.shape
    recall                     = np.zeros((1,k))
    
    for user in range(N):
        userRating             = Yt[user, :]
        userPrediction         = Ypre[user, :]
        
        ratedRelevantIdx       = np.where(userRating>=cutoff)
        ratedRelevantIdx       = ratedRelevantIdx[0]

        ratedIdx               = np.where(userRating!=0)
        ratedIdx               = ratedIdx[0]

        prediction             = userPrediction[ratedIdx]
        
        sortesPIdxTmp          = np.argsort(prediction)[::-1]
        sortesPIdx             = ratedIdx[sortesPIdxTmp]
        
        nz                     = len(sortesPIdx)
        for kNo in range(k):
            intrsct            = np.intersect1d(sortesPIdx[0:(min(kNo,nz)+1)],ratedRelevantIdx)
            mx                 = max(len(ratedRelevantIdx),eps)
            recall[:,kNo]      = recall[:,kNo] + len(intrsct)/mx
            
    recall                     = recall/N
    
    return recall 

In [14]:
def ndcgAtk(Ypre,Yt,k):
    # %Yt  : Test Set of size n *m
    # %Ypre: Prediction set of size n *m
    # %cutoff : threshold for relevant item
    # %k: precision@k

    res                        = np.zeros((1,k))
    cnt                        = 0
    N,M                        = Yt.shape

    for user in range(N):
        ratedIdx               = Yt[user,:]!=0

        userRating             = Yt[user,ratedIdx]
        userPrediction         = Ypre[user,ratedIdx]
        nz                     = len(userRating)

        ranks                  = np.zeros((1,nz))
        ideal_ranks            = np.zeros((1,nz))
        
        I                      = np.argsort(-userPrediction,axis=0)
        ideal_I                = np.argsort(-userRating,axis=0)

        ranks                  = I
        ideal_ranks            = ideal_I

        oriOrder               = np.argsort(ideal_ranks,axis=0)

        nominator              = userRating/(np.log(ranks+2))
        denominator            = userRating/(np.log(ideal_ranks + 2))
    
        nominator              = nominator[oriOrder]
        denominator            = denominator[oriOrder]
        
        if k > nz:
            nominator          = np.concatenate((nominator, np.zeros((k - nz))))
            denominator        = np.concatenate((denominator, np.zeros((k - nz))))
        elif k < nz:
            nominator          = nominator[0:k]
            denominator        = denominator[0:k]          

        if np.array(np.where(np.cumsum(denominator)== 0)).shape[1] != 0:
            tmp                = np.zeros((1,k))
        else:
            tmp                = np.cumsum(nominator) / np.cumsum(denominator)
            cnt                = cnt + 1
        
        res                    = res + tmp
        
    res                        = res/cnt
    
    return res

In [15]:
def EvaluationAllUpdated(Yprd,Y1,k,cutoff):
    mae                        = MAE(Yprd, Y1) 
    rmse                       = RMSE(Yprd, Y1)
    auc                        = AUC(Yprd, Y1, cutoff)
    precision                  = precAtK(Yprd,Y1, k, cutoff)
    recall                     = recallAtK(Yprd,Y1, k, cutoff)
    f1                         = (2*precision*recall)/(precision + recall + eps)
    ndcg                       = ndcgAtk(Yprd,Y1,k)

    return mae,rmse,auc,precision,recall,f1,ndcg

In [16]:
def EvaluationAllUpdated_N(Yprd,Y1,k,cutoff):
    mae                        = MAE(Yprd, Y1) 
    rmse                       = RMSE(Yprd, Y1)

    return mae,rmse

In [17]:
cols                     = ["Model","MAE","RMSE","AUC","Precision@1","Precision@5","Precision@10","Precision@20",
                            "Precision@30","Precision@40","Recall@1","Recall@5","Recall@10","Recall@20","Recall@30",
                            "Recall@40","F1@1","F1@5","F1@10","F1@20","F1@30","F1@40","NDCG@1","NDCG@5","NDCG@10",
                            "NDCG@20","NDCG@30","NDCG@40"]
Result_Trn               = pd.DataFrame(columns=cols)

In [18]:
cols                     = ["Model","MAE","RMSE"]
Result_Trn_CDR               = pd.DataFrame(columns=cols)
Result_Tst_CDR = pd.DataFrame(columns=cols)

In [19]:
def gradUnifiedLS(v,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx): 
    
    n,m                  = Y.shape
    U                    = v[0:n*d].reshape(n,d,order='F')
    V                    = v[n*d:n*d+m*d].reshape(m,d,order='F')
    UG                   = v[n*d+m*d:n*d+m*d+K1*d].reshape(K1,d,order='F')
    VG                   = v[n*d+m*d+K1*d:].reshape(K2,d,order='F')

    UGmatMU              = (UG[UGidx.reshape(-1,),:]) - U
    VGmatMV              = (VG[VGidx.reshape(-1,),:]) - V
    
    X                    = U@(V.T)
    Ygt0                 = Y>0
    YMXgt0               = (Y - X)*Ygt0

    regobj               = (lambda3/2)*(np.sum(U**2) + np.sum(V**2))

    lossobj              = 0
    lossobj              = lossobj + (0.5*(np.sum(YMXgt0**2)))
    lossobj              = lossobj + (lambda1/2)*(np.sum(UGmatMU**2) + np.sum(VGmatMV**2))
    
    dU                   = lambda3*U
    dV                   = lambda3*V
    
    dU                   = dU - YMXgt0@V
    dV                   = dV - (YMXgt0.T)@U

    dU                   = dU - (lambda1*UGmatMU)
    dV                   = dV - (lambda1*VGmatMV)

    ctmp_d               = np.ones(n)                   # convert UGidx to a K1-by-n matrix containing the k1 indicator vectors as row
    ctmp                 = csc_matrix((ctmp_d,(UGidx.reshape(-1,),range(n))),shape=(K1,n))
    dUG                  = lambda1*((ctmp)@UGmatMU)
    
    ctmp_d               = np.ones(m)                   # convert UGidx to a K2-by-m matrix containing the k2 indicator vectors as row
    ctmp                 = csc_matrix((ctmp_d,(VGidx.reshape(-1,),range(m))),shape=(K2,m))
    dVG                  = lambda1*((ctmp)@VGmatMV)
    
    obj                  = regobj + lossobj
   
    grad                 = np.concatenate((dU.flatten('F'),dV.flatten('F'),dUG.flatten('F'),dVG.flatten('F'))).reshape(-1,1)
    
    return obj,grad,lossobj,regobj

In [20]:
def gradUnifiedMMMF(v,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGIdx,VGIdx):

    n,m                  = Y.shape
    U                    = v[0:n*d].reshape(n,d,order='F')
    V                    = v[n*d:n*d+m*d].reshape(m,d,order='F')
    theta                = v[n*d+m*d:n*d+m*d+n*(l-1)].reshape(n,l-1,order='F')
    UG                   = v[n*d+m*d+n*(l-1):n*d+m*d+n*(l-1)+K1*d].reshape((K1,d), order='F')
    VG                   = v[n*d+m*d+n*(l-1)+K1*d:n*d+m*d+n*(l-1)+K1*d+K2*d].reshape((K2,d), order='F')
    thetaG               = v[n*d+m*d+n*(l-1)+K1*d+K2*d:].reshape((K1,l-1), order='F')
        
    UGmatMU              = (UG[UGIdx.reshape(-1,),:]) - U
    VGmatMV              = (VG[VGIdx.reshape(-1,),:]) - V
    thetaGMtheta         = (thetaG[UGIdx.reshape(-1,),:]) - theta
    
    X                    = U@V.T
    Ygt0                 = Y>0
    BX                   = X*Ygt0
    
    dU                   = lambda3*U
    dV                   = lambda3*V
    dtheta               = np.zeros((n, l-1))
    
    regobj               = (lambda3/2)*(np.sum(U**2) + np.sum(V**2))
    
    lossobj              = 0
    lossobj              = lossobj + (lambda1/2)*(np.sum(UGmatMU**2) + np.sum(VGmatMV**2) + np.sum(thetaGMtheta**2))
    
    for k in range(l-1):
        S                = Ygt0 - 2*(Y>k+1)
        BZ               = (theta[:,k].reshape(-1,1)@(np.ones([1,m])))*S - BX*S
        lossobj          = lossobj + sum(sum(h(BZ)))
        tmp              = hprime(BZ)*S
        dU               = dU - tmp@V
        dV               = dV - tmp.T@U
        dtheta[:,k]      = tmp@np.ones((m,))
        
    dU                   = dU - lambda1*UGmatMU
    dV                   = dV - lambda1*VGmatMV
    dtheta               = dtheta - lambda1*thetaGMtheta
    
    ctmp_d               = np.ones(n)                   # convert UGidx to a K1-by-n matrix containing the k1 indicator vectors as row
    ctmp                 = csc_matrix((ctmp_d,(UGIdx.reshape(-1,),range(n))),shape=(K1,n))
    dUG                  = np.multiply(lambda1,(ctmp@UGmatMU))
    dthetaG              = np.multiply(lambda1,(ctmp@thetaGMtheta))
 
    ctmp_d               = np.ones(m)                   # convert UGidx to a K2-by-m matrix containing the k2 indicator vectors as row
    ctmp                 = csc_matrix((ctmp_d,(VGIdx.reshape(-1,),range(m))),shape=(K2,m))
    dVG                  = np.multiply(lambda1,(ctmp@VGmatMV))
    
    obj                  = regobj + lossobj;            # obj is the objective function that we need to minimize

    grad                 = np.concatenate((dU.flatten('F'),dV.flatten('F'),dtheta.flatten('F'),dUG.flatten('F'),dVG.flatten('F'),dthetaG.flatten('F'))).reshape(-1,1)
    
    return obj,grad,lossobj,regobj

In [21]:
def conjgrad(x0,objGrad,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx):
    J                     = []
    temp                  = 0
    nu                    = 0.1
    abstol                = 0                                 # stop if gradient magnitude goes below this
    allowNonDecrease      = 0                                 # don't stop if line search fails to find decrease
    digits                = 12                                # digits of precision to use for objective comparisons
    ogfun                 = objGrad
    ogcalls               = 0
    x                     = x0
    numiter               = 0
    j                     = 0
    alpha                 = 10**(-10)
    ogcalls               = ogcalls + 1
    obj,dx,lossobj,regobj = ogfun(x,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx)
    r                     = -dx
    s                     = r
    dirn                  = s
    deltanew              = (r.T)@dirn
    deltazero             = deltanew
    
    while ((numiter<maxiter) and (abs(deltanew)>tol*tol*abs(deltazero)) and (abs(deltanew)>abstol)):
        numiter           = numiter + 1
        j                 = j + 1
        print('\n %.4f' %obj)
        J.append(obj)
        prevobj           = obj

        if (alpha < 10**(-10)):
            alpha         = 10**(-10)

        alpha,obj,dx,ogc  = cgLineSearch(x,obj,dx,dirn,alpha,objGrad,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx,lossobj,regobj)
        ogcalls           = ogcalls + ogc
        
        temp              = temp + alpha
        x                 = x + alpha*dirn
        r                 = -dx
        deltaold          = deltanew
        deltamid          = r.T@s
        deltanew          = r.T@r
        beta              = (deltanew - deltamid) / deltaold
        dirn              = r + max(0, beta)*dirn
        if ((deltamid/deltanew >= nu) or (dirn.T@dx >= 0)):
            dirn          = r
            j             = 0
        s                 = r
        print(numiter)
        
    return x,numiter,ogcalls,J

In [22]:
def cgLineSearch(x0,obj0,dx0,direction,alpha,objGrad,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx,lobj,robj):
    seciter                   = 5                               # maximum number of quadratic interpolation iterations
    alpha0                    = alpha
    c1                        = 10**(-4)                        # required decrease in objective (relative to gradient)
    digits                    = 12                              # digits of precision to use for objective comparisons
    gamma                     = 10
    ogfun                     = objGrad
    
    if (alpha0 <= 0):                                           # check initial values
        print('alpha0 must be greater than zero')
    
    obj                       = obj0                            # begin line search
    dx                        = dx0
    etazero                   = (dx.T)@direction
    etaprev                   = etazero
    alpha                     = alpha0
    ogcalls                   = 0
    lossobj                   = lobj
    regobj                    = robj
    
    alpha,obj,dx,lossobj,regobj,ogcalls = findNonZeroAlpha(x0,alpha,direction,ogfun,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx,ogcalls,obj,dx,lossobj,regobj)
    
    obj,dx,lossobj,regobj     = ogfun(x0+alpha*direction,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx)
    ogcalls                   = ogcalls + 1
    
    oldalpha,oldobj,olddx                           = saveAlpha(alpha,obj,dx)
    alpha,obj,dx,lossobj,regobj,ogcalls,doBacktrack = backtrack(x0,alpha,direction,ogfun,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx,ogcalls,gamma,obj,dx,lossobj,regobj,digits,obj0,c1,etazero,oldalpha,oldobj,olddx)

    beta                      = alpha
    eta                       = dx.T@direction
    i                         = 0
    
    while((abs(eta)>c2*abs(etazero)) and (i<seciter) and (pround(obj,digits) <= pround(obj0,digits)) and (etaprev!=eta) and (np.sum((x0 + alpha*direction) != x0)>0)):
        beta                  = eta*beta / (etaprev - eta)
        oldalpha,oldobj,olddx = saveAlpha(alpha,obj,dx)
        alpha                 = alpha + beta
        if(alpha<=0):
            alpha             = 1
        etaprev               = eta
        i                     = i + 1
        obj,dx,lossobj,regobj = ogfun(x0+alpha*direction,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx)
        ogcalls               = ogcalls + 1
        eta                   = dx.T@direction

    alpha,obj,dx,lossobj,regobj,ogcalls             = findNonZeroAlpha(x0,alpha,direction,ogfun,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx,ogcalls,obj,dx,lossobj,regobj)
    alpha,obj,dx,lossobj,regobj,ogcalls,doBacktrack = backtrack(x0,alpha,direction,ogfun,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx,ogcalls,gamma,obj,dx,lossobj,regobj,digits,obj0,c1,etazero,oldalpha,oldobj,olddx)
    checkConditions(obj,digits,obj0,etazero,eta,x0,alpha,direction)
    
    return alpha,obj,dx,ogcalls 

In [23]:
def findNonZeroAlpha(x0,alpha,direction,objGrad,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx,ogcalls,obj,dx,lossobj,regobj):
    if(np.array_equal((x0+alpha*direction),x0)):                # Make sure alpha isn't smaller than level of precision
        preAlpha              = alpha
        
        while(np.array_equal((x0+alpha*direction),x0)):
            alpha             = alpha*gamma
        
        obj,dx,lossobj,regobj = objGrad(x0+alpha*direction,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx)
        ogcalls               = ogcalls + 1
        
    return alpha,obj,dx,lossobj,regobj,ogcalls

In [24]:
def backtrack(x0,alpha,direction,objGrad,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx,ogcalls,gamma,obj,dx,lossobj,regobj,digits,obj0,c1,etazero,oldalpha,oldobj,olddx):
    #ensures either (1) Armijo lowex0,alpha,direction,objGrad,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx,ogcallsr objective, or (2) infinitesimal alpha
    doBacktrack               = 0
    preAlpha                  = alpha
    while((pround(obj,digits)>pround(obj0 + c1*alpha*etazero, digits)) and (np.sum((x0+alpha*direction)!=x0)>0)):
        if(ogcalls > 1):
            if((oldalpha > alpha/gamma) and (oldobj < (obj0+c1*oldalpha*etazero))):
                alpha,obj,dx  = restoreAlpha(oldalpha,oldobj,olddx)
                doBacktrack   = 1
                break
        alpha                 = alpha/gamma
        obj,dx,lossobj,regobj = objGrad(x0+alpha*direction,c2,tol,maxiter,l,d,Y,lambda1,lambda3,K1,K2,UGidx,VGidx)
        ogcalls               = ogcalls + 1
        doBacktrack           = 1
    return alpha,obj,dx,lossobj,regobj,ogcalls,doBacktrack

In [25]:
def saveAlpha(alpha,obj,dx):
    #make sure current step yields decrease in objective
    oldalpha   = alpha
    oldobj     = obj
    olddx      = dx
    return oldalpha,oldobj,olddx

In [26]:
def restoreAlpha(oldalpha,oldobj,olddx):
    alpha      = oldalpha
    obj        = oldobj
    dx         = olddx
    return alpha,obj,dx

In [27]:
def pround(x,d):
    d          = round(d)
    if (d<1):
        print('Number of digits must be integer d = %.4e \n'%d)
    if(x==0 or math.isnan(x) or math.isinf(x)):
        y      = x   
    else:
        p      = math.floor(math.log10(abs(x)))+1
        factor = 10**(d-p)
        y      = np.round(x*factor)/factor
    return y

In [28]:
def checkConditions(obj,digits,obj0,etazero,eta,x0,alpha,direction):
    if round(obj, digits) >= round(obj0, digits):
        print('Warning: Finished line search without decreasing objective.\nMay have reached limit of precision, or obj/grad code may be broken.\n')
    if etazero == eta:
        print('Warning: Line search yielded no change in directional derivative.\nMay have reached limit of precision.\n')
    if sum(x0 + alpha * direction != x0) == 0:
        print('Warning: Line search yielded no change in position.\nMay have reached limit of precision.\n')

In [29]:
def h(z):
    zin01      = (z>0)^(z>=1)
    zle0       = z<0
    ret        = zin01/2 - zin01*z + zin01*(z**2)/2 + zle0/2 - zle0*z
    return ret  

In [30]:
def hprime(z):
    zin01      = (z>0)^(z>=1)
    zle0       = z<0
    ret        = zin01*z - zin01 - zle0
    return ret

In [31]:
def m3fSoftmax(xy,theta):
    n,m        = xy.shape
    n1,l1      = theta.shape
    if(n!=n1):
        print('sizes of xy and theta don''t match');
    y          = np.ones((n,m))
    for i in range(l1):
        tmp    = ((theta[:,i]).reshape(-1,1))@(np.ones((1,m)))
        tmp    = xy>=tmp
        y      = y + tmp
    return y

## Preprocessing Video Games domain

In [369]:
names = ['user_id', 'game_id', 'rating', 'timestamp']
df    = pd.read_csv('ratings_Video_Games.csv', sep=',', names=names)
df.head()

Unnamed: 0,user_id,game_id,rating,timestamp
0,AB9S9279OZ3QO,0078764343,5.0,1373155200
1,A24SSUT5CSW8BH,0078764343,5.0,1377302400
2,AK3V0HEBJMQ7J,0078764343,4.0,1372896000
3,A10BECPH7W8HM7,043933702X,5.0,1404950400
4,A2PRV9OULX1TWP,043933702X,5.0,1386115200


In [370]:
n_users_video = df.user_id.nunique(dropna = True)
m_items_video = df.game_id.nunique(dropna = True)
print(str(n_users_video) + " " + str(m_items_video))

826767 50210


In [371]:
user_ids_video = df.user_id.unique()
item_ids_video = df.game_id.unique()
print(user_ids_video.shape)
print(item_ids_video.shape)

(826767,)
(50210,)


## Preprocessing Movies and TV domain

In [372]:
names = ['user_id', 'movie_id', 'rating', 'timestamp']
df    = pd.read_csv('ratings_Movies_and_TV.csv', sep=',', names=names)
df.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,A3R5OBKS7OM2IR,143502,5.0,1358380800
1,A3R5OBKS7OM2IR,143529,5.0,1380672000
2,AH3QC2PC1VTGP,143561,2.0,1216252800
3,A3LKP6WPMP9UKX,143588,5.0,1236902400
4,AVIY68KEPQ5ZD,143588,5.0,1232236800


In [373]:
n_users_mt = df.user_id.nunique(dropna = True)
m_items_mt = df.movie_id.nunique(dropna = True)
print(str(n_users_mt) + " " + str(m_items_mt)) 

2088620 200941


In [374]:
user_ids_mt = df.user_id.unique()
item_ids_mt = df.movie_id.unique()
print(user_ids_mt.shape)
print(item_ids_mt.shape)

(2088620,)
(200941,)


## Dataset trimming

In [375]:
overlap = 4000

In [376]:
print(user_ids_video.shape)
print(user_ids_mt.shape)

(826767,)
(2088620,)


In [377]:
overlapping_user_ids = np.intersect1d(user_ids_video, user_ids_mt)
print(overlapping_user_ids)
print(overlapping_user_ids.shape)

['A0002090WKEMAO8KOWKM' 'A00230923E4Y7VHWZK0IC' 'A002439424KGHR3LZ1OMZ'
 ... 'AZZW55XIQD8QC' 'AZZY72H9Y2F6O' 'AZZZ9DDUPKNKC']
(155797,)


In [378]:
non_overlapping_user_ids_video = np.array(list(set(user_ids_video.tolist()).difference(set(overlapping_user_ids.tolist()))))
non_overlapping_user_ids_mt = np.array(list(set(user_ids_mt.tolist()).difference(set(overlapping_user_ids.tolist()))))
print(non_overlapping_user_ids_video.shape)
print(non_overlapping_user_ids_mt.shape)

(670970,)
(1932823,)


In [379]:
req_overlap_user_ids = np.random.choice(overlapping_user_ids, size=overlap, replace=False)
print(req_overlap_user_ids.shape)
# test = np.array(list(set(overlapping_user_ids_t.tolist()).difference(set(train.tolist()))))

(4000,)


In [380]:
req_non_overlap_user_ids_video = np.random.choice(non_overlapping_user_ids_video, size= 9000 - overlap, replace=False)
req_non_overlap_user_ids_mt = np.random.choice(non_overlapping_user_ids_mt, size= 7000 - overlap, replace=False)
print(req_non_overlap_user_ids_video.shape)
print(req_non_overlap_user_ids_mt.shape)

(5000,)
(3000,)


In [381]:
new_user_ids_video = np.concatenate((req_overlap_user_ids, req_non_overlap_user_ids_video), axis=None)
new_user_ids_mt = np.concatenate((req_overlap_user_ids, req_non_overlap_user_ids_mt), axis=None)
np.random.shuffle(new_user_ids_video)
np.random.shuffle(new_user_ids_mt)
print(new_user_ids_video.shape)
print(new_user_ids_mt.shape)

(9000,)
(7000,)


In [382]:
# new_user_ids_video = np.concatenate((overlapping_user_ids[:4000], non_overlapping_user_ids_video[:5000]))
# new_user_ids_mt = np.concatenate((overlapping_user_ids[:4000], non_overlapping_user_ids_mt[:3000]))
# np.random.shuffle(new_user_ids_video)
# np.random.shuffle(new_user_ids_mt)
# print(new_user_ids_video.shape)
# print(new_user_ids_mt.shape)

In [383]:
print(new_user_ids_video)
print(new_user_ids_mt)

['A34RVLNPKMIRY8' 'A1N4Z88AOT6AAK' 'ATN552TF5V40Z' ... 'A13263K1E1IEK6'
 'A1PTUQ4UODFH0P' 'AOFTRGUWHOE1W']
['A3V4ZUL92CIRAD' 'A28QAKIOMENN3Z' 'A3D4IVUVH2K955' ... 'A3Q2LGX2WRVY5U'
 'A2EDVSRJZZYWIJ' 'A1X3P210XRJ4PJ']


In [384]:
overlapping_user_ids_t = np.intersect1d(new_user_ids_video, new_user_ids_mt)
print(overlapping_user_ids_t)
print(overlapping_user_ids_t.shape)

['A01007512W8LIXGVKI7HZ' 'A0108605182MOPK0I9YBV' 'A01632683TJ1GCADQ8B7X'
 ... 'AZX60AXHHVO5M' 'AZXHFBHEVY5PT' 'AZZDO0J1J2AZW']
(4000,)


## Creating rating matrix for Video Games domain

In [385]:
names = ['user_id', 'game_id', 'rating', 'timestamp']
df    = pd.read_csv('ratings_Video_Games.csv', sep=',', names=names)
df.head()

Unnamed: 0,user_id,game_id,rating,timestamp
0,AB9S9279OZ3QO,0078764343,5.0,1373155200
1,A24SSUT5CSW8BH,0078764343,5.0,1377302400
2,AK3V0HEBJMQ7J,0078764343,4.0,1372896000
3,A10BECPH7W8HM7,043933702X,5.0,1404950400
4,A2PRV9OULX1TWP,043933702X,5.0,1386115200


In [386]:
df = df.loc[df['user_id'].isin(new_user_ids_video.tolist())] 

In [387]:
df.shape

(17605, 4)

In [388]:
present_rating = df.rating.unique()
print(present_rating)

[1. 5. 2. 3. 4.]


In [389]:
new_item_ids_video = df.game_id.unique()
print(new_item_ids_video.shape)
print(new_item_ids_video)

(8527,)
['0545115507' '0700026649' '1613170785' ... 'B00KIY806Y' 'B00KTEQA88'
 'B00LA4WVC0']


In [390]:
print(item_ids_video.shape)

(50210,)


In [391]:
item_ids_video_rated_unrated = new_item_ids_video
np.random.shuffle(item_ids_video_rated_unrated)
print(item_ids_video_rated_unrated.shape)

(8527,)


In [392]:
rating_matrix_video = np.zeros([9000, item_ids_video_rated_unrated.shape[0]], dtype='float32')
print(rating_matrix_video.shape)

(9000, 8527)


In [393]:
for ind in df.index:
    if df['user_id'][ind] in new_user_ids_video:
        rating_matrix_video[np.where(new_user_ids_video == df['user_id'][ind])[0][0]][np.where(item_ids_video_rated_unrated == df['game_id'][ind])[0][0]] = df['rating'][ind]
        print(str(df['user_id'][ind]) + " " + str(df['game_id'][ind]) + " " + str(df['rating'][ind]))

A214Z566V6QEDF 0545115507 1.0
A3P2DVO3OG6MIN 0700026649 5.0
A3TGHJJ3ZE8KCH 1613170785 2.0
AU1G7GPS8N5LV 3866811659 2.0
A1OZ5EU2U9Y2KD 5293009893 3.0
A1AM8DSLVMIKQ6 6050036071 5.0
APIEMCSJGVTB5 7293000936 4.0
AOYEO5AIEZ7IF 7293000936 5.0
A12USB1U8UPSTH 7540727705 4.0
A3TQGJ63SWDRFS 7543450933 4.0
A1Q2S3I527FTWU 8565000168 1.0
AX8QJLYVA34QK 907843905X 4.0
A1PKIY1XCNVWD6 9620077660 4.0
AM54IIJJVLW0C 962012300X 3.0
A17ND914NOQ1F2 9625990674 4.0
A1UDF26YC6NA09 9625990674 2.0
A12B10U4DLCBL4 9861019731 5.0
A3VSDZVWONN4SQ 9862561165 5.0
A13ZDE5MG0ZMK6 9882106463 5.0
A34G8XTL4KAW7D 9882106463 5.0
A1Z48VVE1TPCSQ 9882155456 5.0
AW65BJ5DH1DBZ B000006OVF 4.0
A1I6K11M06BOI6 B000006OVF 4.0
A2KVUYPLEW4LXT B000006P0M 5.0
A10FQYI4KRC3SD B000006RGQ 4.0
A2JVW40PW6D21H B000006RGS 5.0
AF83ROU09VDFI B000007VDN 5.0
A1OLFWUX6WGWXR B00000DMA8 5.0
A38K6QXAW5BIW2 B00000DMAA 5.0
A37HO95YNUUR8P B00000DMAA 5.0
ANAYSRE3LX8GZ B00000DMAC 5.0
A38VACRQKJR3BJ B00000DMAD 5.0
A1878O39ZKG24F B00000DMAG 5.0
AA50ZQDY2TPK5 B000

ACAWLJ657YB0B B00004VUGN 2.0
A2PL65YZRL1VDH B00004VXAK 5.0
A2S2I6LI09BACO B00004VXAN 3.0
AHQ9GH662QRN B00004VXAN 5.0
A2S2I6LI09BACO B00004VXAR 4.0
A387GJKV8S6BHV B00004VXAV 5.0
AHHYBKMR5E8MP B00004VXAX 5.0
A31KL3P9AH2MTJ B00004VXAZ 5.0
A2M1DCBCWJIIHY B00004W419 3.0
A2SBGTE825H1N3 B00004W4QG 3.0
AFR5VAMEUGJD7 B00004W4QJ 4.0
A3Q701355YLUNQ B00004W4QJ 5.0
A2HCGF00ITJU7H B00004W4QO 4.0
AGY3W8X0O9XW8 B00004W4R7 2.0
A39F387VY8JBMF B00004W4S3 5.0
A294DWRBQUOIK4 B00004W4S4 3.0
A2EQNDB6SE95UH B00004W4S4 5.0
A294DWRBQUOIK4 B00004W4VH 4.0
A294DWRBQUOIK4 B00004W4WH 5.0
A31KL3P9AH2MTJ B00004W4WH 5.0
A3PON49XIBQ0RE B00004W4WI 1.0
A294DWRBQUOIK4 B00004W4WJ 4.0
A3I3Z7158E2EL3 B00004WEST 5.0
A27X7S09612AZO B00004WFRN 4.0
A38FCCQ8NL9ZHK B00004WFVZ 5.0
A2ZB9KCFI0QHYZ B00004WFVZ 2.0
A01007512W8LIXGVKI7HZ B00004WG0V 4.0
A2M1DCBCWJIIHY B00004WGRX 4.0
A3SOVG3GIJ357E B00004WGVY 4.0
AT2XXZAB1EZU9 B00004WGW1 5.0
A1RVGM0ZY3TBF4 B00004WGW1 5.0
ADJKMC7AO7YN3 B00004WHW7 4.0
A2M1DCBCWJIIHY B00004WHWF 3.0
A384U2UUPAK

AQI2CW6UKF1ES B00006599U 5.0
A2W82PIX8AYM6T B00006599Y 5.0
A2M1DCBCWJIIHY B00006599Y 3.0
A1ZBXC4PYHGFOQ B000065DGH 3.0
A2D8JA124UE4MV B000065SQJ 2.0
A35XPO303W54V0 B0000663TT 4.0
A2M1DCBCWJIIHY B0000663TT 5.0
A20BVHEDWL7DVY B0000663U5 4.0
A2NDXFS5X3I0PK B0000664JB 5.0
A3HUXF1DMI4NTU B0000664JE 4.0
A1AQZ43RV6LVUG B0000664JE 3.0
A1ZENKTPH5PHSY B0000664JE 5.0
ANMQ93SN0VCXW B0000664JF 5.0
AQI2CW6UKF1ES B0000664JF 4.0
A1QSYTWN5GWSFG B0000664JH 4.0
A2M1DCBCWJIIHY B00006663T 1.0
ACAWLJ657YB0B B00006663T 4.0
A62973MKZ0L2L B00006663U 5.0
A3LKFC1F18JNRN B00006663Z 5.0
A2F7RX5AP0MJCM B00006663Z 1.0
A1D2JLLWMSTDXC B000066641 4.0
AGJFUCQKJHCGR B000066641 5.0
A3MVT8U2F5BTAW B000066641 2.0
A2L49JFKLBBY5M B000066BZ1 5.0
ATVDWH2EQ5A3S B000066BZ2 5.0
A2KVUYPLEW4LXT B000066EXR 4.0
A3PFKE8EFXAXQE B000066JRG 5.0
AB844HS2ODIQD B000066JRH 5.0
ANAYSRE3LX8GZ B000066JRI 1.0
A1WN4WCKW6LUPH B000066JRN 5.0
A31RM5QU797HPJ B000066JRN 5.0
A3O1QMC0DZ9PZV B000066JRN 5.0
AZVCNOOQK36DH B000066JRN 4.0
AB844HS2ODIQD B00006

A3HVK41RUNXKY3 B001E2D44W 1.0
A3RC0BEI7S5KZT B001E2D44W 1.0
A2WFNZWC8O6O1J B001E2EAMW 4.0
A26LIRL6557C8C B001E2I4H4 4.0
A1V68CETC0BXV7 B001E3ARJ6 5.0
A2FAV61IJB1RWR B001E3C9KQ 5.0
ATT9EJ4WWARBE B001E4VL36 5.0
A7QOOJUE2THEC B001E4ZBHI 4.0
A2CR2DM01RXKS3 B001E65P72 5.0
AJV7C34D45AW8 B001E6DG6Y 5.0
A3D5KRZ2XL7YUH B001E6DG6Y 4.0
A3QX3TEN2FS31K B001E6DG6Y 4.0
A2CKS6KT0HQ9ZU B001E7MMNG 5.0
A22B3MFHI1E37F B001E7MMNQ 3.0
A2UX7APFRNL855 B001E8QA2E 1.0
A34527CGD8W6Z8 B001E8VB3C 5.0
A3DGEGMISMC54T B001E8VB3C 5.0
AEJZHD61JTJKK B001E8VB3C 5.0
A1ILEVGCZXRTC7 B001E8VB3C 5.0
A2CIMOITO2S0ZY B001E8VB3C 4.0
A1BB4H9G9APMT3 B001E8VB6O 4.0
A5RVAK4Q0QQ7T B001E8VB6O 5.0
A2PWNZ1NYBOK2L B001E8VB6O 5.0
A369KP0JV77JYT B001E8VB6O 5.0
A3KT0MYOJWU7T5 B001E8VB6O 5.0
AJIDIVBILJKO0 B001E8VB6O 5.0
A1TW9ZGRDQQZ2Y B001E8VB6O 5.0
A33U03NUTRU8YA B001E8VB6O 5.0
A2Q2A6JKY95RTP B001E8VB6O 5.0
A3KKM0T1KY42HA B001E8VB6O 5.0
A19WDL2SMEC7WE B001E93IWI 5.0
AC6MG436BWSED B001E93IWI 3.0
A3RKTKNEUFO0Z4 B001E93IWI 1.0
AQFS172RBGO58 B00

A1M9AF9G9WPZFH B0054IUWSI 4.0
AGH732KV3FJMS B0054IUWSI 3.0
A2MNNS4HEXONIN B0054IUWSI 5.0
A1DT0TZWBJ6IL2 B0054IUY22 5.0
A2UDL8L618YHEX B0054IUY22 5.0
A2MICBMC3M4C51 B0054IUY22 5.0
ATT9EJ4WWARBE B0054IV0A2 5.0
A1WVVLOLJJA6U6 B0054IV0A2 3.0
A64372CPOUBKP B0054IV0A2 3.0
A2SLA60T78NQY2 B0054IV0Z2 5.0
A1PQ72TSE6G8DJ B0054JGGFK 4.0
A82TDLVKJDGBW B0054JGGGY 5.0
A3ISDNYPWGJ4LP B0054JGGGY 1.0
A3B3FJ4C3WRPEL B0054JGGGY 1.0
A1KMIQAIYPAELY B0054JGGGY 4.0
A2J1M0SR5I7HVS B0054SFL34 4.0
A3C2GC1LN9GDVA B0054SFL34 1.0
A1WHIGJ7HJCJTT B0054SFL34 3.0
A34RXPG48LYQZW B00553BI12 5.0
A1EGMC4ZQ8PF85 B0055NBPYM 5.0
A29IOE7AOQ3LB6 B0055NDBEO 5.0
A2JLR4Z0GVTFHO B0055SWM08 4.0
A2ID1DYP5YWAB5 B0055SWM08 5.0
A2ZG8OIEB6W3CF B0055SWM08 5.0
A1SBXBXI462RW8 B0055SWM08 4.0
A5MKMASC2BUUX B0055SWM08 5.0
A2FHF76YHKOTTI B0055SWM08 5.0
AXCWWAYLINBSX B0055SWM08 5.0
AOE25YXY2DXP9 B0055SWM08 5.0
A25BOQ72S1FGU6 B0055SWM08 5.0
A3LQ9ETMFKW3DW B0055SWM08 4.0
AV5TEBBFTWFVI B0055SWM08 5.0
AGHTHICXIQTE3 B0055SWM08 4.0
A1D270I7U2Z3RL B005

In [394]:
rating_matrix_video

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [395]:
unique, frequency = np.unique(rating_matrix_video, 
                              return_counts = True)
print("Unique Values:", 
      unique)
  
# print frequency array
print("Frequency Values:",
      frequency)

Unique Values: [0. 1. 2. 3. 4. 5.]
Frequency Values: [76725395     1804     1050     1803     3637     9311]


In [396]:
K1                       = 10                              #Number of Clusters in User Space
K2                       = 22                              #Number of Clusters in Item Space

d                        = 100     
#Spectral Clustering
simU                     = userSim(rating_matrix_video)
simU                     = (simU + simU.T)/2
UGidx_video              = spectralClu(simU,K1).reshape(-1,1)

simI                     = userSim(rating_matrix_video.T)
simI                     = (simI + simI.T)/2
VGidx_video              = spectralClu(simI,K2).reshape(-1,1)

In userSim
In userSim


## Creating rating matrix for movies and TV domain

In [397]:
names = ['user_id', 'movie_id', 'rating', 'timestamp']
df    = pd.read_csv('ratings_Movies_and_TV.csv', sep=',', names=names)
df.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,A3R5OBKS7OM2IR,143502,5.0,1358380800
1,A3R5OBKS7OM2IR,143529,5.0,1380672000
2,AH3QC2PC1VTGP,143561,2.0,1216252800
3,A3LKP6WPMP9UKX,143588,5.0,1236902400
4,AVIY68KEPQ5ZD,143588,5.0,1232236800


In [398]:
df = df.loc[df['user_id'].isin(new_user_ids_mt.tolist())] 
df.shape

(26259, 4)

In [399]:
new_item_ids_mt = df.movie_id.unique()
print(new_item_ids_mt.shape)
print(new_item_ids_mt)

(13925,)
['0005019281' '0005119367' '0307142469' ... 'B00L8QP082' 'B00LCWVU3Q'
 'B00LPJ3J6W']


In [400]:
print(item_ids_mt.shape)

(200941,)


In [401]:
item_ids_mt_rated_unrated = new_item_ids_mt
np.random.shuffle(item_ids_mt_rated_unrated)
print(item_ids_mt_rated_unrated.shape)

(13925,)


In [402]:
rating_matrix_mt = np.zeros([7000, item_ids_mt_rated_unrated.shape[0]], dtype='float32')
print(rating_matrix_mt.shape)

(7000, 13925)


In [403]:
for ind in df.index:
    if df['user_id'][ind] in new_user_ids_mt:
        rating_matrix_mt[np.where(new_user_ids_mt == df['user_id'][ind])[0][0]][np.where(item_ids_mt_rated_unrated == df['movie_id'][ind])[0][0]] = df['rating'][ind]
        print(str(df['user_id'][ind]) + " " + str(df['movie_id'][ind]) + " " + str(df['rating'][ind]))

A3M1I3T6RCU13B 0005019281 5.0
A2BU6DHJ26RCZO 0005019281 2.0
A14U42QUWC9HBR 0005119367 5.0
A2GPA8G4EJX2N5 0307142469 5.0
AKUOLV49ZUIIJ 0307142485 5.0
AFEN54UGJQOK9 0307142493 5.0
AOKE2TB7YKUZ9 0307514161 3.0
A3UKCD9T4SNC2W 0307732207 4.0
A1AHII2R0R6FAE 0310263662 1.0
A2BXVL1JW115LS 0310263662 2.0
A1B8BEPX1UJ8I4 0310263662 5.0
A3T2JX1MWYOCSE 0310263662 5.0
A352H0WBVBY96P 0310263662 5.0
A1JOTUT2XOUZFG 0310263662 5.0
A3QBKYANO0T7O0 0310263662 2.0
A5I8OWSLWM1MF 0310263662 5.0
A38VACRQKJR3BJ 0310263662 1.0
A1JS0MHHN5KAAE 0310263662 4.0
A2RKI7RPP5CJMG 0310263662 5.0
A1TW9ZGRDQQZ2Y 0310263662 5.0
A39W3263A9HCMN 0310263662 5.0
A3QSZZ60QA25RN 0310263662 5.0
A3KKM0T1KY42HA 0310263662 5.0
A3KJ6JAZPH382D 0310263662 5.0
A38N7SFDOB3MR3 0310263662 5.0
A2ZTI8SSLGH4YC 0310274281 5.0
A39UJVRBHH1GGP 0310274281 5.0
A4HM90ECX054J 0310329582 5.0
A17IIHVVWJYZCQ 0310329922 5.0
A1E319EHBAD5VK 0328048216 5.0
A2V9LOX6TGDVKJ 0510539610 2.0
A1ZH8D7ZPHMOQY 0711995958 3.0
A3719QQO6L6K7D 0718000315 2.0
AV6C57XAL62LM 0

A2PDB6WQ5G1RBJ 078060718X 3.0
AZSN1TO0JI87B 0780607287 5.0
A31RM5QU797HPJ 0780607287 5.0
A3RSP3FDG09K5E 0780607287 5.0
A12G4VIVXEQCIU 0780607287 4.0
A25OJH02IZ86EP 0780607287 4.0
ANAYSRE3LX8GZ 0780607287 4.0
A2YVWIU1B8HJ8 0780608372 5.0
A3196EGU1SFZSD 0780608372 5.0
AR5H7GBUVLRP 0780608372 4.0
A29IOE7AOQ3LB6 0780613856 5.0
AZSN1TO0JI87B 0780616561 4.0
A4GSQJBIIWK5I 0780616561 1.0
A350V986CU399G 0780616561 4.0
APZXV0NV2ARG4 0780618068 5.0
A3F8O237T0I19F 0780618068 5.0
A3AKPY4LXIC24O 0780618548 5.0
A1Q15TP2FXYYMF 0780618556 5.0
A2494DDCD38EI8 0780618831 5.0
AENS008HCBVC5 0780618831 5.0
A2GANR9I6XHTU9 0780619250 3.0
A2VMB0HFQLVJBP 0780619250 4.0
A2XVEYNWYI2PFM 0780619331 5.0
AZSN1TO0JI87B 0780619412 5.0
A1TW9ZGRDQQZ2Y 0780619412 5.0
A2XOM67KTUXLVZ 0780619412 5.0
A1Q15TP2FXYYMF 0780619412 5.0
A8SCX6VUTE05H 0780619439 5.0
A39W3263A9HCMN 0780619552 5.0
A25OJH02IZ86EP 0780619609 3.0
A2GNWRH89M5AJJ 0780619609 2.0
A30E1FEQ51TEAC 0780619609 5.0
A2UEXA5EDWF0F4 0780619765 5.0
A31RM5QU797HPJ 078062

A36L3NRW858ROT 6301971418 4.0
A1MMCJUZQDZKD4 6301971531 5.0
A9I16R6CYV9WL 6301971752 5.0
ANAYSRE3LX8GZ 6301971752 3.0
A276M3NSIY9B82 6301972023 5.0
AEOHUVNAVWK96 6301972066 5.0
A3JV3WNI21NBCT 6301972066 5.0
A39M4I385RT11U 6301972171 5.0
A2TNIGM5OHGYUV 6301972171 5.0
A15WKAFW528U6 6301972236 5.0
A1ZFGAQPLJJN3W 6301972279 5.0
A2M2O1DI1SCQDI 6301972279 5.0
A18TV0OSYWHHLU 6301972279 5.0
AT3X92XOKAH1A 6301972309 3.0
A7Y6AVS576M03 6301972341 4.0
A7Y6AVS576M03 6301972376 5.0
A17KWX1M2SIA89 6301972856 3.0
ANAYSRE3LX8GZ 6301973038 2.0
A3EMGD8RAEOK64 6301973089 5.0
A2GANR9I6XHTU9 6301973232 3.0
A32WLK92G1HV9E 6301973232 5.0
A4T5G02Z1XNBF 6301973232 5.0
A17KWX1M2SIA89 6301973232 5.0
AHAI85T5C2DH3 6301973259 5.0
A7Y6AVS576M03 6301973321 5.0
A2TU3RO6244TZ2 6301973321 5.0
A20Y1TF5OUJY4H 6301973461 1.0
A7QBVQ0GHAW9R 6301973461 4.0
A7Y6AVS576M03 6301976029 5.0
A3Q04FALF4MWHL 6301976061 5.0
A25CE0OC9CLPAN 6301976061 5.0
A2H6A61PV9M88M 6301976061 5.0
AVFKGZ9BB683S 6301976061 1.0
A3PICTPGINN06J 630197606

A31MNUXFSE1WR0 6303637574 5.0
A3GUNXVYK34085 6303637582 3.0
A50W7ZVTG606 6303646417 5.0
A2SBQX9ULY1IJJ 6303646689 5.0
A30H2335OM7RD6 6303651453 4.0
A2DGJE31O6CZOT 6303651453 4.0
A7Y6AVS576M03 6303651453 5.0
A39W3263A9HCMN 6303651453 5.0
A2DGJE31O6CZOT 630365147X 5.0
A7Y6AVS576M03 630365147X 5.0
AGEEDV3ECZUGX 630365147X 5.0
A2CIMOITO2S0ZY 630365147X 5.0
A30J7S95AKI14B 630365147X 5.0
A2DGJE31O6CZOT 6303651488 3.0
A7Y6AVS576M03 6303651488 5.0
A39W3263A9HCMN 6303651488 4.0
AGJFUCQKJHCGR 6303652964 4.0
A7Y6AVS576M03 6303654274 5.0
A3GLCHI2I1RGA9 630365455X 5.0
ANAYSRE3LX8GZ 6303675026 4.0
A1XFEKADFEJSNW 6303675212 5.0
A1R7W6V7FA3EE1 6303675212 5.0
A315C44RFV23BC 6303675212 5.0
ANSYEXFUIVE7C 6303686745 5.0
AAMI6DR8HLVSN 6303686788 5.0
AFEN54UGJQOK9 630368680X 5.0
A7Y6AVS576M03 6303695833 1.0
A22NGL5ZDF30ZO 6303696813 4.0
A2RKI7RPP5CJMG 6303696813 4.0
A34YMZCIWIIVT1 6303696813 4.0
A2C9NVMFR68XY 6303696813 5.0
A2RKI7RPP5CJMG 6303697224 4.0
A1GMSOCYFFDBTA 6303697224 5.0
A2NN53YKIU5T4H 630382090

A3O65HTZRA9HX5 B0002S64PA 4.0
A2UC0W75FBB92X B0002S64SC 5.0
AP6W290VGDTUB B0002S64Z0 3.0
ANAYSRE3LX8GZ B0002S64Z0 5.0
A21Q8CI289L6OD B0002S65X6 5.0
ANHPSQDMW3U3V B0002S94C0 5.0
ANAYSRE3LX8GZ B0002S94GQ 3.0
A1Q15TP2FXYYMF B0002SDY1M 5.0
A230OQI33XFCVB B0002SPQ1S 4.0
A12KQPCCWAPGGB B0002SQ07M 5.0
A3210S68ALISQ5 B0002T2QGA 5.0
A2HUKYAP9W5PAJ B0002T2QRY 5.0
A1CD3QE6MASNHH B0002T8YKM 5.0
A2MWY8ZQ921TDF B0002TSZME 4.0
A24PY5W5LCR6MX B0002TSZME 5.0
A36L3NRW858ROT B0002TSZNS 5.0
A3R999W3U49EHU B0002TT0A0 5.0
A25XPHG59Y00V B0002TT0JG 5.0
A387GJKV8S6BHV B0002TV2WO 4.0
A10HHM2684NZD2 B0002UB2YQ 2.0
A1QU79SIZ2KSDK B0002UE1WQ 3.0
A22WHCWAJD0LB0 B0002UGVHO 1.0
A25ROD8FHUUZ56 B0002V32NO 2.0
A270LRFBXFDD0T B0002V7KV4 5.0
ATT9EJ4WWARBE B0002V7NZM 5.0
A36W5RAALXS1CS B0002V7OEM 4.0
A10HHM2684NZD2 B0002V7OJC 1.0
A23Q35UIKW3ZK9 B0002V7SO8 1.0
A39W3263A9HCMN B0002V7TI8 2.0
A4GSQJBIIWK5I B0002V7TZQ 5.0
A36JHVOTXDDZKZ B0002V7U0A 5.0
ANAYSRE3LX8GZ B0002V7U1Y 3.0
ANAYSRE3LX8GZ B0002VAZ4S 2.0
A35XFVLGEUNQAH B000

A350V986CU399G B0013D8LA4 4.0
A3KKM0T1KY42HA B0013D8LA4 3.0
ATAIWJS7UAV4R B0013D8LA4 2.0
A4GSQJBIIWK5I B0013D8LLS 1.0
A2RKI7RPP5CJMG B0013D8LMW 3.0
ANAYSRE3LX8GZ B0013D8LMW 3.0
A1YMJJE6751QPR B0013D8M3U 5.0
AI1RB7QWYPMG1 B0013D8M44 5.0
A2BHUR27H5QSY0 B0013D8M44 5.0
A2TNWXEKBUEECO B0013DZNAA 5.0
A2LB4Z0GCMDNXX B0013DZNAA 5.0
A276M3NSIY9B82 B0013ERFFU 3.0
A1484U2Y4YFX2R B0013ERFFU 3.0
A3KJ6JAZPH382D B0013ERFFU 2.0
A2RKI7RPP5CJMG B0013ERFGE 4.0
A276M3NSIY9B82 B0013F2ESM 3.0
A2WVVMOIWTU778 B0013FGDGQ 3.0
A1ABP9Q8FUCGLR B0013FSL0C 5.0
A2RKI7RPP5CJMG B0013FZUQA 3.0
A1TW9ZGRDQQZ2Y B0013FZUQA 4.0
A3KKM0T1KY42HA B0013FZUQA 3.0
AFNG8O2DXRCUV B0013FZUQK 4.0
AL5TQX7VV6BJ5 B0013FZUQK 4.0
A1TW9ZGRDQQZ2Y B0013FZUQK 3.0
A1IW2WFTB6JR7M B0013FZUQK 4.0
A7Y6AVS576M03 B0013GRX5A 4.0
A2AYEHC9KUBWWG B0013GS3WW 3.0
A33775AIB1A664 B0013GS3WW 4.0
A16E3PV9AGS8JJ B0013GS3WW 4.0
A3FFG1UJQ6F11Y B0013H49NS 5.0
A3KJ6JAZPH382D B0013HL6ES 4.0
ANAYSRE3LX8GZ B0013J30YU 4.0
A39W3263A9HCMN B0013J30YU 1.0
A19MTY5ST6Z2Y9 B00

A2FCQBUFD7BMTK B009934S5M 4.0
A7Y6AVS576M03 B009934S5M 5.0
A1ULOEMZ3FA0C6 B009934S5M 4.0
A38RLBIN8LASPE B009934S5M 5.0
A1LKQT9TJLU1OA B009934S5M 5.0
A1I326CYVEZDNH B009934S5M 5.0
A2RKI7RPP5CJMG B009934S5M 4.0
A1PHI3A5MHNEWL B009934S5M 5.0
A3KEDSPZ1SQ1LG B009934S5M 5.0
A16PLJVBJMPRFX B009934S5M 4.0
A2HVY3YDCM29ME B009934S5M 5.0
A1JPESBZ0FEXJ9 B009934S5M 2.0
AR5UEO0OIYPO1 B009934S5M 5.0
A36L3NRW858ROT B009934S5M 5.0
A136KGJPEYV42D B009934S5M 2.0
APMW2JWO9D4A7 B009934S5M 5.0
A1TD4H3SD2FCAI B009934S5M 5.0
A3O2UXL1MGN6F8 B009934S5M 5.0
A1FGGWG10EQT1A B009934S5M 5.0
AQ147VBI0U4TH B009934S5M 1.0
A1MWOAEA9MJJSD B009934S5M 5.0
A362GXHYJEO0LZ B009934S5M 5.0
A3PIHIF2E6XP7N B009934S5M 4.0
AIVLDR8SIVNSC B009A87WU4 5.0
A7Y6AVS576M03 B009A8825S 5.0
AG1MDK0TA0GUM B009AF5OY8 5.0
A25X3M600YEBBJ B009AF5OY8 5.0
A298LLGHN2WXB9 B009AF5OY8 5.0
A1GMSOCYFFDBTA B009AJB0YM 4.0
A3BR26ITFVPYTC B009ALC4G8 5.0
A1LA42DKBMZV4Y B009AMAGQM 2.0
A2L5DEBYBZIVKU B009AMAGQM 3.0
A387DT0NOH41V4 B009AMAGQM 5.0
A1MTSZTHVZIMY1 B0

In [404]:
unique, frequency = np.unique(rating_matrix_mt, 
                              return_counts = True)
print("Unique Values:", 
      unique)
  
# print frequency array
print("Frequency Values:",
      frequency)

Unique Values: [0. 1. 2. 3. 4. 5.]
Frequency Values: [97448741     1829     1477     2814     5442    14697]


In [405]:
numpy.savetxt("rating_matrix_video_4k.csv", rating_matrix_video, delimiter = ",")

In [406]:
temp = pd.DataFrame(new_user_ids_video)
temp.to_csv("new_user_ids_video_4k.csv")

In [407]:
temp = pd.DataFrame(item_ids_video_rated_unrated)
temp.to_csv("item_ids_video_4k_rated_unrated.csv")

In [408]:
numpy.savetxt("rating_matrix_mt_4k.csv", rating_matrix_mt, delimiter = ",")

In [409]:
temp = pd.DataFrame(new_user_ids_mt)
temp.to_csv("new_user_ids_mt_4k.csv")

In [410]:
temp = pd.DataFrame(item_ids_mt_rated_unrated)
temp.to_csv("item_ids_mt_4k_rated_unrated.csv")

## Removing overlapping test user_ids from target domain

In [321]:
# rating_matrix_mt = np.loadtxt("rating_matrix_mt_2k.csv", delimiter=",")

In [323]:
# from pandas import read_csv

In [324]:
# temp_d = read_csv('new_user_ids_mt_2k.csv')
# temp_np = temp_d.to_numpy()
# temp_np = np.delete(temp_np, 0, 1)
# new_user_ids_mt = temp_np.reshape(temp_np.shape[0],)

In [325]:
# temp_d = read_csv('item_ids_mt_2k_rated_unrated.csv')
# temp_np = temp_d.to_numpy()
# temp_np = np.delete(temp_np, 0, 1)
# item_ids_mt_rated_unrated = temp_np.reshape(temp_np.shape[0],)

In [411]:
d                        = 100 

In [412]:
overlapping_user_ids_t = np.intersect1d(new_user_ids_video, new_user_ids_mt)
print(overlapping_user_ids_t)
print(overlapping_user_ids_t.shape)

['A01007512W8LIXGVKI7HZ' 'A0108605182MOPK0I9YBV' 'A01632683TJ1GCADQ8B7X'
 ... 'AZX60AXHHVO5M' 'AZXHFBHEVY5PT' 'AZZDO0J1J2AZW']
(4000,)


In [413]:
#Change as per Overlap count
#train = 2500 when overlap = 3000
#train = 800 when overlap = 1000
#train = 5000 when overlap = 6000
train = np.random.choice(overlapping_user_ids_t, size=math.ceil(0.8*overlap), replace=False)
test = np.array(list(set(overlapping_user_ids_t.tolist()).difference(set(train.tolist()))))

In [414]:
X_train = np.empty((0,100))
Y_train = np.empty((0,100))
X_test = np.empty((0,100))
Y_test = []

In [415]:
for user_id in test:
    ind1 = np.where(new_user_ids_mt == user_id)
    Y_test.append(rating_matrix_mt[ind1][0])
    new_user_ids_mt =np.delete(new_user_ids_mt, ind1, 0)
    rating_matrix_mt = np.delete(rating_matrix_mt, ind1, 0)

In [416]:
Y_test = np.array(Y_test)
print(Y_test.shape)

(800, 13925)


In [417]:
print(new_user_ids_mt.shape)
print(rating_matrix_mt.shape)

(6200,)
(6200, 13925)


In [418]:
K1                       = 10                               #Number of Clusters in User Space
K2                       = 100                             #Number of Clusters in Item Space

d                        = 100    
#Spectral Clustering
simU                     = userSim(rating_matrix_mt)
simU                     = (simU + simU.T)/2

UGidx_mt                    = spectralClu(simU,K1).reshape(-1,1)

simI                     = userSim(rating_matrix_mt.T)
simI                     = (simI + simI.T)/2

VGidx_mt                    = spectralClu(simI,K2).reshape(-1,1)

In userSim
In userSim


  B1               = nominator/denominator


## GRS for Video Games domain

In [262]:
epochs                   = 3
tstPer                   = 30                               # Testing Percentage
NoTstIFromG              = 50
l                        = 5                                # Rating Level

K1                       = 10                              #Number of Clusters in User Space
K2                       = 22                              #Number of Clusters in Item Space

d                        = 100                              # latent space size
minUserPerForSel         = 20                               #Item will be select for test only if minUserPerForSel percentage of users in a group has rated
k                        = 40                               # eval@k: precision@k, recall@k
cutoff                   = 3                                # Relevant > cutoff

lambda1UnifiedLS         = 1
lambda3UnifiedLS         = 1

c2                       = 10**-2
tol                      = 10**-3;
maxiter                  = 500;

In [263]:
n, m = rating_matrix_video.shape
print(n, m)

9000 7265


In [264]:
#Spectral Clustering
simU                     = userSim(rating_matrix_video)
simU                     = (simU + simU.T)/2
UGidx_video              = spectralClu(simU,K1).reshape(-1,1)

simI                     = userSim(rating_matrix_video.T)
simI                     = (simI + simI.T)/2
VGidx_video              = spectralClu(simI,K2).reshape(-1,1)

In userSim
In userSim


In [334]:
print(UGidx_video)
print(UGidx_video.shape)

[[1]
 [1]
 [0]
 ...
 [0]
 [0]
 [0]]
(9000, 1)


In [335]:
print(VGidx_video)
print(VGidx_video.shape)

[[3]
 [1]
 [9]
 ...
 [1]
 [3]
 [1]]
(7500, 1)


In [336]:
unique, frequency = np.unique(UGidx_video, 
                              return_counts = True)
print("Unique Values:", 
      unique)
  
# print frequency array
print("Frequency Values:",
      frequency)

print()

unique, frequency = np.unique(VGidx_video, 
                              return_counts = True)
print("Unique Values:", 
      unique)
  
# print frequency array
print("Frequency Values:",
      frequency)

Unique Values: [0 1 2 3 4 5 6 7 8 9]
Frequency Values: [5270 1123  531  486  269  247  270  258  266  280]

Unique Values: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21]
Frequency Values: [ 125 4058   95  611  452   88   98  109   80   76  103  153  434  167
   88   44  136  121  130  129  110   93]


### Recommendation

In [None]:
# np.random.seed(0)
vini = np.random.randn((n*d + m*d + K1*d + K2*d), 1)

In [None]:
# Group Recommendation: Unified LS
objGrad             = gradUnifiedLS
v,numiter,ogcalls,J = conjgrad(vini,objGrad,c2,tol,maxiter,l,d,rating_matrix_video,lambda1UnifiedLS,lambda3UnifiedLS,K1,K2,UGidx_video,VGidx_video)
U_video                   = v[0:n*d].reshape(n,d,order='F')
V_video                   = v[n*d:n*d+m*d].reshape(m,d,order='F')
UG_video                  = v[n*d+m*d:n*d+m*d+K1*d].reshape(K1,d,order='F')
X                   = U_video@V_video.T
X[X<=1]             = 1
X[X>=5]             = 5
XPerA               = aggregratePrediction(X,UGidx_video)
XPerAMat            = XPerA[UGidx_video.reshape(-1,),:]

XPer_Grp                    = UG_video@V_video.T
XPer_GrpMat                 = XPer_Grp[UGidx_video.reshape(-1,),:]
XPer_GrpMat[XPer_GrpMat<=1] = 1
XPer_GrpMat[XPer_GrpMat>=5] = 5

In [None]:
mae,rmse,auc,precision,recall,f1,ndcg = EvaluationAllUpdated(XPerAMat, rating_matrix_video, k, cutoff)
ResultTrnULS_Grp_PerA                 = ["Unified_LS_Grp_PerA_Run_2_3k",mae,rmse,auc,precision,recall,f1,ndcg]
Result_Trn.loc[len(Result_Trn)]       = ["Unified_LS_Grp_PerA_Run_2_3k",mae,rmse,auc,precision[0][0],precision[0][4],precision[0][9],
                                         precision[0][19],precision[0][29],precision[0][39],recall[0][0],recall[0][4],
                                         recall[0][9],recall[0][19],recall[0][29],recall[0][39],f1[0][0],f1[0][4],f1[0][9],
                                         f1[0][19],f1[0][29],f1[0][39],ndcg[0][0],ndcg[0][4],ndcg[0][9],ndcg[0][19],
                                         ndcg[0][29],ndcg[0][39]]

In [None]:
mae,rmse,auc,precision,recall,f1,ndcg = EvaluationAllUpdated(XPer_GrpMat, rating_matrix_video, k, cutoff)
ResultTrnULS_Grp                      = ["Unified_LS_Grp_Run_2_3k",mae,rmse,auc,precision,recall,f1,ndcg]
Result_Trn.loc[len(Result_Trn)]       = ["Unified_LS_Grp_Run_2_3k",mae,rmse,auc,precision[0][0],precision[0][4],precision[0][9],
                                         precision[0][19],precision[0][29],precision[0][39],recall[0][0],recall[0][4],
                                         recall[0][9],recall[0][19],recall[0][29],recall[0][39],f1[0][0],f1[0][4],f1[0][9],
                                         f1[0][19],f1[0][29],f1[0][39],ndcg[0][0],ndcg[0][4],ndcg[0][9],ndcg[0][19],
                                         ndcg[0][29],ndcg[0][39]]

## GRS for Movies and TV domain

In [268]:
epochs                   = 3
tstPer                   = 30                               # Testing Percentage
NoTstIFromG              = 50
l                        = 5                                # Rating Level

K1                       = 10                               #Number of Clusters in User Space
K2                       = 100                             #Number of Clusters in Item Space

d                        = 100                              # latent space size
minUserPerForSel         = 20                               #Item will be select for test only if minUserPerForSel percentage of users in a group has rated
k                        = 40                               # eval@k: precision@k, recall@k
cutoff                   = 3                                # Relevant > cutoff

lambda1UnifiedLS         = 30
lambda3UnifiedLS         = 1

c2                       = 10**-2
tol                      = 10**-3;
maxiter                  = 500;

In [269]:
n, m = rating_matrix_mt.shape
print(n, m)

6600 12256


In [270]:
#Spectral Clustering
simU                     = userSim(rating_matrix_mt)
simU                     = (simU + simU.T)/2

UGidx_mt                    = spectralClu(simU,K1).reshape(-1,1)

simI                     = userSim(rating_matrix_mt.T)
simI                     = (simI + simI.T)/2

VGidx_mt                    = spectralClu(simI,K2).reshape(-1,1)

In userSim


LinAlgError: SVD did not converge

In [337]:
print(UGidx_mt)
print(UGidx_mt.shape)

[[1]
 [1]
 [7]
 ...
 [3]
 [9]
 [1]]
(6600, 1)


In [338]:
print(VGidx_mt)
print(VGidx_mt.shape)

[[ 1]
 [ 1]
 [ 1]
 ...
 [ 1]
 [ 1]
 [13]]
(11480, 1)


In [339]:
unique, frequency = np.unique(UGidx_mt, 
                              return_counts = True)
print("Unique Values:", 
      unique)
  
# print frequency array
print("Frequency Values:",
      frequency)

print()

unique, frequency = np.unique(VGidx_mt, 
                              return_counts = True)
print("Unique Values:", 
      unique)
  
# print frequency array
print("Frequency Values:",
      frequency)

Unique Values: [0 1 2 3 4 5 6 7 8 9]
Frequency Values: [ 279 3735  359  323  332  286  386  321  273  306]

Unique Values: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]
Frequency Values: [  43 7089   47   51    1   27  184  382    4    5   31   87   16  420
   10   34   20    5    5   35   32    6    5    3   15   50    3    3
   26    9   55   31   27    3    4    3  285    3    9  168   13    3
    7   25   12    8   11    5   11    3    4    6    6    6    6    3
    7    3    5   13  314   17    7    6  260    2    3   39    9   12
    2    5   28   26    3   51   35    7   45    4    3  389    5   54
   34    5    3  273   36  332    4    3    3    3   46    4    3    3
    5    4]


### Recommendation

In [None]:
# np.random.seed(0)
vini = np.random.randn((n*d + m*d + K1*d + K2*d), 1)

In [None]:
# Group Recommendation: Unified LS
objGrad             = gradUnifiedLS
v,numiter,ogcalls,J = conjgrad(vini,objGrad,c2,tol,maxiter,l,d,rating_matrix_mt,lambda1UnifiedLS,lambda3UnifiedLS,K1,K2,UGidx_mt,VGidx_mt)
U_mt                  = v[0:n*d].reshape(n,d,order='F')
V_mt                   = v[n*d:n*d+m*d].reshape(m,d,order='F')
UG_mt                  = v[n*d+m*d:n*d+m*d+K1*d].reshape(K1,d,order='F')
X                   = U_mt@V_mt.T
X[X<=1]             = 1
X[X>=5]             = 5
XPerA               = aggregratePrediction(X,UGidx_mt)
XPerAMat_mt            = XPerA[UGidx_mt.reshape(-1,),:]

XPer_Grp                    = UG_mt@V_mt.T
XPer_GrpMat_mt                 = XPer_Grp[UGidx_mt.reshape(-1,),:]
XPer_GrpMat_mt[XPer_GrpMat_mt<=1] = 1
XPer_GrpMat_mt[XPer_GrpMat_mt>=5] = 5

In [None]:
mae,rmse,auc,precision,recall,f1,ndcg = EvaluationAllUpdated(XPerAMat_mt, rating_matrix_mt, k, cutoff)
ResultTrnULS_Grp_PerA                 = ["Unified_LS_Grp_PerA_mt_Run_2_3k",mae,rmse,auc,precision,recall,f1,ndcg]
Result_Trn.loc[len(Result_Trn)]       = ["Unified_LS_Grp_PerA_mt",mae,rmse,auc,precision[0][0],precision[0][4],precision[0][9],
                                         precision[0][19],precision[0][29],precision[0][39],recall[0][0],recall[0][4],
                                         recall[0][9],recall[0][19],recall[0][29],recall[0][39],f1[0][0],f1[0][4],f1[0][9],
                                         f1[0][19],f1[0][29],f1[0][39],ndcg[0][0],ndcg[0][4],ndcg[0][9],ndcg[0][19],
                                         ndcg[0][29],ndcg[0][39]]

In [None]:
mae,rmse,auc,precision,recall,f1,ndcg = EvaluationAllUpdated(XPer_GrpMat_mt, rating_matrix_mt, k, cutoff)
ResultTrnULS_Grp                      = ["Unified_LS_Grp_mt_Run_2_3k",mae,rmse,auc,precision,recall,f1,ndcg]
Result_Trn.loc[len(Result_Trn)]       = ["Unified_LS_Grp_mt_Run_2_3k",mae,rmse,auc,precision[0][0],precision[0][4],precision[0][9],
                                         precision[0][19],precision[0][29],precision[0][39],recall[0][0],recall[0][4],
                                         recall[0][9],recall[0][19],recall[0][29],recall[0][39],f1[0][0],f1[0][4],f1[0][9],
                                         f1[0][19],f1[0][29],f1[0][39],ndcg[0][0],ndcg[0][4],ndcg[0][9],ndcg[0][19],
                                         ndcg[0][29],ndcg[0][39]]

In [None]:
Result_Trn

## Creating X, Y matrices for mapping

In [None]:
#Adding individual latent vectors
for user_id in train:
    source_ind = np.where(new_user_ids_video == user_id)
    target_ind = np.where(new_user_ids_mt == user_id)
    X_train = np.concatenate((X_train, U_video[source_ind]), axis = 0)
    Y_train = np.concatenate((Y_train, U_mt[target_ind]), axis = 0)
    
for user_id in test:
    source_ind = np.where(new_user_ids_video == user_id)
    target_ind = np.where(new_user_ids_mt == user_id)
    X_test = np.concatenate((X_test, U_video[source_ind]), axis = 0)  

    
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

In [None]:
groupsToMembers_video = []
groupsToMembers_mt = []
for i in range(K1):
    groupsToMembers_video.append([])
    groupsToMembers_mt.append([])

for i in range(UGidx_video.shape[0]):
    groupsToMembers_video[int(UGidx_video[i][0])].append(new_user_ids_video[i])
    
for i in range(UGidx_mt.shape[0]):
    groupsToMembers_mt[int(UGidx_mt[i][0])].append(new_user_ids_mt[i])
    
print(len(groupsToMembers_video))
print(len(groupsToMembers_mt))

In [None]:
for i in range(len(groupsToMembers_video)):
    print(len(groupsToMembers_video[i]))

In [None]:
for i in range(len(groupsToMembers_mt)):
    print(len(groupsToMembers_mt[i]))

In [None]:
print(groupsToMembers_mt[5])

In [None]:
counter = 0
for i in range(len(groupsToMembers_video)):
    currGroup_video = groupsToMembers_video[i]
    for j in range(len(groupsToMembers_mt)):
        currGroup_mt = groupsToMembers_mt[j]
        if len(set(currGroup_video).intersection(set(currGroup_mt))) > 0:
            counter = counter + 1
            X_train = np.row_stack((X_train, UG_video[i]))
            Y_train = np.row_stack((Y_train, UG_mt[j]))
            
print(X_train.shape)
print(Y_train.shape)
print(counter)

## MLP Mapping

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import torch.optim as optim

In [None]:
class GetDataset(torch.utils.data.Dataset):

  def __init__(self, X, y, scale_data=True):
    if not torch.is_tensor(X) and not torch.is_tensor(y):
      self.X = torch.from_numpy(X)
      self.y = torch.from_numpy(y)

  def __len__(self):
      return len(self.X)

  def __getitem__(self, i):
      return self.X[i], self.y[i]

In [None]:
class MLP(nn.Module):
  '''
    Multilayer Perceptron for regression.
  '''
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
      nn.Linear(100, 64),
      nn.ReLU(),
      nn.Linear(64, 32),
      nn.ReLU(),
      nn.Linear(32, 100)
    )


  def forward(self, x):
    '''
      Forward pass
    '''
    return self.layers(x)

In [None]:
# Set fixed random number seed
# torch.manual_seed(42)

In [None]:
dataset = GetDataset(X_train, Y_train)
trainloader = torch.utils.data.DataLoader(dataset, batch_size=X_train.shape[0], shuffle=False, num_workers=0)

In [None]:
# space = [
#     Real(1e-3, 5e-2, name='lr'),
#     Real(1e-5, 1e-2, name='decay')
# ]

In [None]:
# def trainMLPAndGetMAE(hyperparameters):
#     # Extract hyperparameters
#     learning_rate = hyperparameters[0]
#     wt_decay = hyperparameters[1]
    
#     mlp = MLP()
  
#     # Define the loss function and optimizer
#     loss_function = nn.MSELoss()
#     optimizer = torch.optim.Adam(mlp.parameters(), lr=learning_rate, weight_decay=wt_decay)
#     scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=1, verbose=True)
    
#     counter = 0
#     # Run the training loop
#     while optimizer.param_groups[-1]['lr'] > 1e-4: # 5 epochs at maximum

#         # Print epoch
#     #     print(f'Starting epoch {epoch+1}')

#         # Set current loss value
#         current_loss = 0.0
#         losses = []
#         # Iterate over the DataLoader for training data
#         for i, data in enumerate(trainloader, 0):

#           # Get and prepare inputs
#     #         if counter > 2:
#     #             break
#     #         print(counter)
#             inputs, targets = data
#             inputs, targets = inputs.float(), targets.float()
#             targets = targets.reshape((targets.shape[0], 100))

#     #         print("Inputs shape: ", inputs.shape)
#     #         print("Target shape: ", targets.shape)
#     #         counter += 1

#           # Zero the gradients
#             optimizer.zero_grad()

#           # Perform forward pass
#             outputs = mlp(inputs)
#     #         print("Output shape: ", outputs.shape)

# #             outputs_np = outputs.detach().numpy()
# #             targets_np = targets.detach().numpy()
#             t_V_mt = torch.from_numpy(V_mt.T).float()

# #             t_vec = targets_np@V_mt.T
# #             o_vec = outputs_np@V_mt.T
#             t_vec = torch.matmul(targets, t_V_mt)
#             o_vec = torch.matmul(outputs, t_V_mt)
#     #         print("t_vec.shape: ", t_vec.shape)
#     #         print("o_vec.shape: ", o_vec.shape)
#             indicator_matrix = (t_vec >= 0.7)
#     #         print("indicator matrix: ", indicator_matrix)
#     #         print(np.all(indicator_matrix == True))
# #             t_vec_interacted = torch.from_numpy(t_vec*indicator_matrix).requires_grad_() 
# #             o_vec_interacted = torch.from_numpy(o_vec*indicator_matrix).requires_grad_() 
#             t_vec_interacted = torch.mul(t_vec, indicator_matrix)
#             o_vec_interacted = torch.mul(o_vec, indicator_matrix)
# #             t = torch.from_numpy(a)

#           # Compute loss
#             loss = loss_function(o_vec_interacted, t_vec_interacted)
#     #         print("Loss: ", loss.item())
#             print("Epoch : " + str(counter + 1) + " | Error : " + str(loss.item()) + " | LR : " + str(optimizer.param_groups[-1]['lr']))
#             losses.append(loss.item())
#           # Perform backward pass
#             loss.backward()

#           # Perform optimization
#             optimizer.step()


#         mean_loss = sum(losses) / len(losses)
#         scheduler.step(mean_loss)
# #         print(f"Loss at epoch {counter + 1} = {mean_loss}")
#         if optimizer.param_groups[-1]['lr'] <= 1e-4:
#             break;
#         counter += 1

#     # Process is complete.
#     print('Training process has finished.')
    
#     pred_X_train = mlp(torch.from_numpy(np.float32(X_train))).detach().numpy()
#     print("pred_X_train: ", pred_X_train.shape)
#     par1 = pred_X_train@V_mt.T
#     par2 = Y_train@V_mt.T
#     mae = MAE(par1, par2)
#     print("MAE: ", mae)
#     print()
#     print()
#     return mae

In [None]:
# def objective(hyperparameters):
#     print(hyperparameters)
#     return trainMLPAndGetMAE(hyperparameters)

In [None]:
# # Perform Bayesian optimization
# result = gp_minimize(objective, space, n_calls=20)

In [None]:
# Get the best hyperparameters and performance
# best_hyperparameters = {
#     'K1' = result.x[0]
#     'K2' = result.x[1]
#     'lambda1UnifiedLS' = result.x[2]
#     'lambda3UnifiedLS' = result.x[3]
# }
# best_performance = result.fun
# print("Best MAE: {:.4f}".format(result.fun))
# print("Best Hyperparameters: {}".format(dict(zip(['lr', 'decay'], result.x))))

In [None]:
# Print the MAE values and hyperparameters for each call
# for i, val in enumerate(result.func_vals):
#     print("Call {}: MAE={:.4f}, Hyperparameters={}".format(i, val, result.x_iters[i]))

In [None]:
# Print the values of the optimized hyperparameters
# print('Optimized Hyperparameters:')
# for hyperparameter, value in zip(space, result.x):
#     print('{}: {}'.format(hyperparameter.name, value))

## MLP with tuned hyperparameters

In [None]:
# torch.manual_seed(42)

In [None]:
mlp = MLP()
  
# Define the loss function and optimizer
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=0.018113971388685, weight_decay=0.0052151663029590236)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=1, verbose=True)

In [None]:
counter = 0
# Run the training loop
while optimizer.param_groups[-1]['lr'] > 1e-4: # 5 epochs at maximum
    
    # Print epoch
#     print(f'Starting epoch {epoch+1}')
    
    # Set current loss value
    current_loss = 0.0
    losses = []
    # Iterate over the DataLoader for training data
    for i, data in enumerate(trainloader, 0):
      
      # Get and prepare inputs
#         if counter > 2:
#             break
#         print(counter)
        inputs, targets = data
        inputs, targets = inputs.float(), targets.float()
        targets = targets.reshape((targets.shape[0], 100))
        
#         print("Inputs shape: ", inputs.shape)
#         print("Target shape: ", targets.shape)
#         counter += 1
      
      # Zero the gradients
        optimizer.zero_grad()
      
      # Perform forward pass
        outputs = mlp(inputs)
#         print("Output shape: ", outputs.shape)
        
#         outputs_np = outputs.detach().numpy()
#         targets_np = targets.detach().numpy()
        t_V_mt = torch.from_numpy(V_mt.T).float()
    
#         t_vec = targets_np@V_mt.T
#         o_vec = outputs_np@V_mt.T
#         print("t_vec.shape: ", t_vec.shape)
#         print("o_vec.shape: ", o_vec.shape)
        t_vec = torch.matmul(targets, t_V_mt)
        o_vec = torch.matmul(outputs, t_V_mt)
        indicator_matrix = (t_vec >= 0.7)
#         print("indicator matrix: ", indicator_matrix)
#         print(np.all(indicator_matrix == True))
#         t_vec_interacted = t_vec*indicator_matrix
#         o_vec_interacted = o_vec*indicator_matrix
        t_vec_interacted = torch.mul(t_vec, indicator_matrix)
        o_vec_interacted = torch.mul(o_vec, indicator_matrix)
#             t = torch.from_numpy(a)

        # Compute loss
        loss = loss_function(o_vec_interacted, t_vec_interacted)
        
#         print("Loss: ", loss.item())
        print("Epoch : " + str(counter + 1) + " | Error : " + str(loss.item()) + " | LR : " + str(optimizer.param_groups[-1]['lr']))
        losses.append(loss.item())
      # Perform backward pass
        loss.backward()
      
      # Perform optimization
        optimizer.step()
        
        
    mean_loss = sum(losses) / len(losses)
    scheduler.step(mean_loss)
    print(f"Loss at epoch {counter + 1} = {mean_loss}")
    if optimizer.param_groups[-1]['lr'] <= 1e-4:
        break;
    counter += 1

# Process is complete.
print('Training process has finished.')

In [None]:
pred_X_train = mlp(torch.from_numpy(np.float32(X_train))).detach().numpy()
print(pred_X_train.shape)
cutoff = 3
k = 40
par1 = pred_X_train@V_mt.T
par2 = Y_train@V_mt.T
# mae,rmse = EvaluationAllUpdated_N(par1, par2, k, cutoff)
# print("Training Metrics")
# print(mae, rmse)

In [None]:
mae,rmse = EvaluationAllUpdated_N(par1, par2, k, cutoff)
Training_Metrics                = ["CDGRS_Train",mae,rmse]
Result_Trn_CDR.loc[len(Result_Trn_CDR)]       = ["Train-2_3k",mae,rmse]

In [None]:
pred_X_test = mlp(torch.from_numpy(np.float32(X_test))).detach().numpy()
print(pred_X_test.shape)
cutoff = 3
k = 40
par1 = pred_X_test@V_mt.T
par1[par1 > 5] = 5
par2 = Y_test
# mae,rmse = EvaluationAllUpdated_N(par1, par2, k, cutoff)
# print("Test Metrics")
# print(mae, rmse)

In [None]:
mae,rmse = EvaluationAllUpdated_N(par1, par2, k, cutoff)
Testing_Metrics                = ["CDGRS_Test",mae,rmse]
Result_Tst_CDR.loc[len(Result_Tst_CDR)]       = ["Test-2_3k",mae,rmse]

In [None]:
Result_Trn_CDR

In [None]:
Result_Tst_CDR