In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from decimal import Decimal, getcontext


def CalculatePrecision(Original,Predicted,K,th):
    M=Original.shape[0]
    Pr=0
    Rc=0
    
    for i in range(M):
        #No of items with rating greater then equal to threshold
        y=np.sum(Original[i,:] >= th)
        z=np.sum(Predicted[i,:] >= th)
        if y>K : y=K
        if z>K : z=K
        #Relevent Items from test data
        #Sorting data in incresing order and storing their index
        UV = np.argsort(Original[i,:])
        #Reversing the data to get largest rating at first
        revUV = np.flip(UV)
        Relev=revUV[0:y]
        #Recommended Items from Predicted data
        #Sorting data in incresing order and storing their index
        UV1 = np.argsort(Predicted[i,:])
        #Reversing the data to get largest rating at first
        revUV1 = np.flip(UV1)
        Recom=revUV1[0:z]
        #Computing No of items common b/w Relevent and Recommended items
        A=np.intersect1d(Recom, Relev)
        x=A.shape[0]
        #Calculating Precision
        if z>0 :
            prec=(x/z)
            Pr=Pr+prec
        else:
            Pr=Pr+1
        #Calculating Recall
        if y>0 :
            recall=(x/y)
            Rc=Rc+recall
        else:
            Rc=Rc+1
    return Pr/M,Rc/M


def GenerateGroup(M,K):
    A=np.zeros(M,dtype=int)
    UI = np.zeros(M,dtype=int)
    for i in range(M):
        A[i]=i
    K1=0
    while np.size(A) > 0:
        count = 0
        while count < K:
            if np.size(A) == 0: break
            count=count+1
            size=np.size(A)
            index = np.random.randint(size)
            UI[A[index]]=K1
            A=np.delete(A, index)
            
        K1=K1+1
    return UI,K1
        

def ReadingDataset():
    print("Reading Dataset....")
    df = pd.read_csv('/content/drive/My Drive/u.data',delimiter='\t',engine='python')
    df = df.reindex(np.random.permutation(df.index))
    Ratings = df.pivot(index = 'userId' , columns = 'movieId' , values = 'rating').fillna(0)
    R=np.array(Ratings,dtype=int)
    #Size of Rating Matrix
    M,N = R.shape
    print(M,N)
    X_test = []
    X_train = []
    Y_test = []
    Y_train = []
    for i in range(M):
        X = []
        Y = []
        for j in range(N):
            if R[i][j] > 0:
                X.append([i,j])
                Y.append(R[i][j])
        X_tr, X_ts, Y_tr, Y_ts = train_test_split(X, Y, test_size=0.30, random_state=42)
        tr_size=np.size(Y_tr)
        ts_size=np.size(Y_ts)
        for i in range(ts_size):
            X_test.append(X_ts[i])
            Y_test.append(Y_ts[i])
        for i in range(tr_size):
            X_train.append (X_tr[i])
            Y_train.append (Y_tr[i])
    
    X_test=np.array(X_test,dtype=int)
    X_train=np.array(X_train,dtype=int)
    Y_test=np.array(Y_test,dtype=int)
    Y_train=np.array(Y_train,dtype=int)
    return X_train, X_test, Y_train, Y_test,R,M,N

def matrix_factorization(R, I, U1, V1, UGG, VGG,UI,VJ, D, lm1, lm2,lm3,lm4, steps,aR,aI,a,alp,rate):
    getcontext().prec = 10
    print("Training",end=" ")
    while True:    
        U=np.array(U1)
        V=np.array(V1)
        UG=np.array(UGG)
        VG=np.array(VGG)
        V=V.T
        flag=1
        prev=0
        
        for step in range(steps):
            
            E = np.array((R - np.dot(U,V))* I)
            #Updating value of User latent factor
            U = U + alp*(2 * ((np.dot(E,V.T) + lm1*(UG[UI] - U)))-(lm3*U))
            #Updating value of Item latent factor
            V = V + alp*(2 * ((np.dot(U.T,E) + lm2*(VG[VJ].T - V)))-(lm4*V))
            
            #Updating value of User Group Latent Factor
            T = np.array(UG[UI] - U)
            for i in range(K1):
                TMP=T[np.nonzero(i == UI),:]
                x,y,z=TMP.shape
                UG[i,:]=UG[i,:] - ((2*alp*lm1) * (np.sum(TMP.reshape((y,z)),axis=0)))   

            #Updating value of Item Group Latent Factor
            T=np.array(VG[VJ] - V.T)
            for i in range(K2):
                TMP=T[np.nonzero(i == VJ),:]
                x,y,z=TMP.shape
                VG[i,:]=VG[i,:] - ((2*alp*lm2) * (np.sum(TMP.reshape((y,z)),axis=0)))
                
            #Calculating Loss    
            ER =np.sum( pow((R - np.dot(U,V)), 2) * I) + lm1*np.sum(pow((UG[UI]-U),2)) + lm2*np.sum(pow((VG[VJ].T-V),2))

            MAE=0
            if (step%100==0 or step==steps-1) and step>0:
                print(">>",end=" ")
                # nR = np.dot(U, V)
                # MAE = np.sum(np.abs(aR-nR)*aI)/a
                # MSE = np.sum(pow((aR-nR),2)*aI)/a
                # RMSE = np.sqrt(MSE)
                # print("MAE : ",MAE," RMSE : ",RMSE)
            #Comparing error with prev error if there is increase in error then updating the learning rate
            if (step>0 and (prev<ER or np.isnan(ER))) or MAE > 5 :
                flag=0;
                break;
            prev = ER
        if flag==1:
            break;
        else:
            #Updating Learning Rate
            if rate < alp:
                alp=float(Decimal(alp)-Decimal(rate))
            else:
                rate =float(Decimal(rate)/Decimal(10))
                alp=float(Decimal(alp)-Decimal(rate))

    return U,V.T,UG,VG,alp,rate







"""*********************************"""
"""*********************************"""
"""*********************************"""
"""*********************************"""

#Reading dataset.
X_train, X_test, Y_train, Y_test, Rating,M,N = ReadingDataset()
#size of training and testing dataset
a,b=X_test.shape
c,d=X_train.shape
a1=Y_test.shape
c1=Y_train.shape   

#Training Data
R=np.zeros((M,N),dtype=int)
R[X_train[:,0],X_train[:,1]] = Y_train[:]
#Testing Data
aR=np.zeros((M,N),dtype=int)
aR[X_test[:,0],X_test[:,1]] = Y_test[:]

#Calculating Incidence Matrix for training data
I=np.zeros((M,N),dtype=int)
I[R > 0] = 1
#Calculating Incidence Matrix for testing data
aI=np.zeros((M,N),dtype=int)
aI[aR > 0] = 1 

#Initializing alpha value
alp=1
#Used to update alpha
rate=0.1
#No of iteration
step=500
#No of Features
#D=20
#Rating Threshold for recommendation( or to calculate precision)
th=3
#different lambda value
lm1=10
lm2=10
lm3=10
lm4=10
for GroupSize in np.array([2,4,6,8,10,12]):
    #User Group Size
    UserGroupSize = GroupSize
    #Item Group Size
    ItemGroupSize = GroupSize

    for D in np.array([20]):
        #Randomly Generating User and Item latent Factor matrix
        U1 = np.random.rand(M,D)
        V1 = np.random.rand(N,D)

        #Assigning User and Items to different Groups
        UI,K1 = GenerateGroup(M,UserGroupSize)
        VJ,K2 = GenerateGroup(N,ItemGroupSize)

        #Randomly Generating User Group and Item Group Latent Factor
        UG1 = np.random.rand(K1,D)
        VG1 = np.random.rand(K2,D)
    
        print()
        #Predicting Recommendation Using Matrix Factorisation
        U,V,UG,VG,alp,rate=matrix_factorization(R, I, U1, V1,UG1,VG1,UI,VJ,D,lm1, lm2,lm3,lm4,step,aR,aI,a,alp,rate)
        print()
        print("User Group Size : ",UserGroupSize,"Item Group Size : ",ItemGroupSize," alpha :",alp)

        # Calculating Predicted Rating Matrix 
        nR = np.dot(U, V.T)
        getcontext().prec = 2
        #Calculating Mean Absolute Error
        MAE = Decimal(np.sum(np.abs(aR-nR)*aI))/Decimal(a)
        print("MAE : ",MAE,end="\t")
        #Calculating Root Mean Square Error
        MSE = Decimal(np.sum(pow((aR-nR),2)*aI))/Decimal(a)
        RMSE = np.sqrt(MSE)
        print("RMSE : ",RMSE)
        print();
        for K in np.array([20]):
            #Precision and Recall for Personalized Recommendation
            nR = nR*aI
            pr,rc=CalculatePrecision(aR,nR,K,th)

            print("Precision and Recall for User-Item : ");
            print(Decimal(pr)/Decimal(1),end="\t")
            print(Decimal(rc)/Decimal(1))
            

            #Generating Recommendation of items for User Group
            GroupRating = np.dot(UG, V.T)
            
            Precision=0
            Recall=0
            #Precision and Recall for Group Recommendation
            for i in range(K1):
                #Storing all users of i'th group from test data
                OrgGroup=aR[np.nonzero(i == UI),:]
                x,y,z=OrgGroup.shape
                #Converting i'th group users matrix in 2D by reshaping it
                OrgGroup=OrgGroup.reshape((y,z))
                #Predicted Group Rating
                PredGroup=np.zeros((y,z),dtype=float)
                for j in range(y):
                    PredGroup[j,:]=GroupRating[i,:]
                #Calcuting Incidence matrix for i'th group test data to remove unobserved value from predicted group rating
                pI=np.zeros((y,z),dtype=int)
                pI[OrgGroup > 0] = 1
                PredGroup = PredGroup*pI
                prec,rc = CalculatePrecision(OrgGroup,PredGroup,K,th)
                Precision=Precision+prec
                Recall=Recall+rc

            #Average precision and recall of all user groups 

            print("Precision and Recall for UserGroup-Item: ");
            print(Decimal(Precision)/Decimal(K1),end="\t")
            print(Decimal(Recall)/Decimal(K1))

            #Generating Recommendation of items Group for User
            UserItemGroupRating = np.dot(U, VG.T)
            
            Precision=0
            Recall=0
            #Precision and Recall for Item Group Recommendation for User
            PredGroup = UserItemGroupRating[:,VJ]
            PredGroup = PredGroup*aI
            prec,rc = CalculatePrecision(aR,PredGroup,K,th)

            print("Precision and Recall for User-ItemGroup: ");
            print(Decimal(prec)/Decimal(1),end="\t")
            print(Decimal(rc)/Decimal(1))

            #Generating Recommendation of items Group for User Group
            UG_ItemGroupRating = np.dot(UG, VG.T)
            
            Precision=0
            Recall=0
            #Precision and Recall for Item Group Recommendation for User Group
            for i in range(K1):
                #Storing all users of i'th group from test data
                OrgGroup=aR[np.nonzero(i == UI),:]
                x,y,z=OrgGroup.shape
                #Converting i'th group users matrix in 2D by reshaping it
                OrgGroup=OrgGroup.reshape((y,z))
                #Predicted Group Rating
                PredGroup=np.zeros((y,z),dtype=float)
                for j in range(y):
                    PredGroup[j,:]=UG_ItemGroupRating[i,VJ]
                #Calcuting Incidence matrix for i'th group test data to remove unobserved value from predicted group rating
                pI=np.zeros((y,z),dtype=int)
                pI[OrgGroup > 0] = 1
                PredGroup = PredGroup*pI
                prec,rc = CalculatePrecision(OrgGroup,PredGroup,K,th)
                Precision=Precision+prec
                Recall=Recall+rc

            #Average precision and recall of all user groups 
            print("Precision and Recall for UserGroup-ItemGroup: ");
            print(Decimal(Precision)/Decimal(K1),end="\t")
            print(Decimal(Recall)/Decimal(K1))

            print()
    





Reading Dataset....
943 1682

Training >> >> >> >> >> 
User Group Size :  2 Item Group Size :  2  alpha : 0.0004
MAE :  0.75	RMSE :  0.95

Precision and Recall for User-Item : 
0.77	0.68
Precision and Recall for UserGroup-Item: 
0.76	0.69
Precision and Recall for User-ItemGroup: 
0.74	0.53
Precision and Recall for UserGroup-ItemGroup: 
0.72	0.53


Training >> >> >> >> >> 
User Group Size :  4 Item Group Size :  4  alpha : 0.0004
MAE :  0.74	RMSE :  0.94

Precision and Recall for User-Item : 
0.77	0.68
Precision and Recall for UserGroup-Item: 
0.76	0.70
Precision and Recall for User-ItemGroup: 
0.74	0.42
Precision and Recall for UserGroup-ItemGroup: 
0.71	0.44


Training >> >> >> >> >> 
User Group Size :  6 Item Group Size :  6  alpha : 0.0004
MAE :  0.74	RMSE :  0.94

Precision and Recall for User-Item : 
0.78	0.69
Precision and Recall for UserGroup-Item: 
0.75	0.72
Precision and Recall for User-ItemGroup: 
0.75	0.38
Precision and Recall for UserGroup-ItemGroup: 
0.71	0.39


Training >