In [9]:
# Integrated Model
import numpy as np
from scipy import io, sparse
from math import sqrt

from utils import pre_processing, compute_sparse_correlation_matrix, path

import random
import sys
import time

mat_file = path + "/T.mat"
mat = io.loadmat(mat_file)['X']


mat = mat[0:mat.shape[0] // 128, 0:mat.shape[1] // 128]
mat = mat[mat.getnnz(1) > 0][:, mat.getnnz(0) > 0]

mat.shape


(1542, 111)

In [12]:
train = np.zeros((1542,111))
test = np.zeros((1542,111))

cx = mat.tocoo()
idx = 0

for u, i, v in zip(cx.row, cx.col, cx.data):
    
    if idx%4 == 0:
        test[u][i]=v
    else :
        train[u][i]=v
    idx = idx + 1

train = np.asmatrix(train)
test = np.asmatrix(test)

In [156]:
class Integrated():
    
    def __init__(self, train, test, f, mat):
            
        self.gamma1=0.007 
        self.gamma2=0.007 
        self.gamma3=0.001 
        self.l_reg2=100 
        self.l_reg6=0.005 
        self.l_reg7=0.015
        self.l_reg8=0.015
        self.k=30
                 
        self.f = f
        self.no_user = train.shape[0]
        self.no_item = train.shape[1]
            
        self.R = train
        self.N = sparse.csr_matrix(train).copy()
        self.N.data[:] = 1
        self.S = sparse.csr_matrix.dot(self.N.T, self.N)
        self.S.data[:] = self.S.data[:] / (self.S.data[:] + self.l_reg2)
        self.S = self.S * compute_sparse_correlation_matrix(train)
        
        self.R_test = test
        
        self.Bu = np.random.standard_normal(self.no_user)
        self.Bi = np.random.standard_normal(self.no_item)
        self.W = np.random.standard_normal((self.no_item, self.no_item))
        self.C = np.random.standard_normal((self.no_item, self.no_item))
        self.Q = np.random.standard_normal((self.no_item, self.f))
        self.P = np.random.standard_normal((self.no_user, self.f))
        self.Y = np.random.standard_normal((self.no_item, self.f))

        self.mu = np.mean(train[train != 0])
        
        self.bu_index, self.bi_index = pre_processing(mat, "" )

        self.rmse = 0
        self.rmse_test = 0
        
        print(self.bi_index)

        print(len(self.bi_index))
        
        
              
    def optimize(self):
        
        n_iter = 30
        # to penalize overflow
        
        cx = sparse.coo_matrix(self.R)
        
        
        time_sum = 0


        for it in range(n_iter):

            start = time.time()  # 시간측정 시작
            
            cnt = 0
            self.rmse = 0
            
            for u, i, v in zip(cx.row, cx.col, cx.data):
                
                N_u = self.bi_index[u]
                Nk_iu = np.flip(np.argsort(self.S[i,]))[:self.k].ravel()
                Rk_iu = Nk_iu

                e_ui, bias = self.compute_e_ui(train, u, i, Rk_iu, Nk_iu, N_u)

                self.Bu[u] = self.Bu[u] + self.gamma1 * (e_ui - self.l_reg6 * self.Bu[u])
                self.Bi[i] = self.Bi[i] + self.gamma1 * (e_ui - self.l_reg6 * self.Bi[i])

                self.Q[i] = self.Q[i] + self.gamma2 * (e_ui * ( self.P[u] + ( np.sum(self.Y[N_u], axis=0 ) )/np.sqrt(len(N_u)) ) - self.l_reg7 * self.Q[i])
                self.P[u] = self.P[i] + self.gamma2 * (e_ui * self.Q[i] - self.l_reg7 * self.P[u])

                self.Y[N_u] = self.Y[N_u] + self.gamma2 * ( ((e_ui * self.Q[i])/np.sqrt(len(N_u))).reshape(1,-1) - self.l_reg7 * self.Y[N_u])

                if len(Rk_iu)>0 :
                    self.W[i][Rk_iu] = self.W[i][Rk_iu] + self.gamma3 * ( e_ui * bias / np.sqrt(len(Rk_iu)) - self.l_reg8 * self.W[i][Rk_iu] )
                if len(Nk_iu)>0 :
                    self.C[i][Nk_iu] = self.C[i][Nk_iu] + self.gamma3 * ( e_ui / np.sqrt(len(Nk_iu)) - self.l_reg8 * self.C[i][Nk_iu] )
                
                cnt += 1
                self.rmse += e_ui**2
                
                
            self.gamma1 *= 0.9
            self.gamma2 *= 0.9
            self.gamma3 *= 0.9

            end = time.time()  # 시간측정 종료
            time_sum += end - start
            
            self.rmse = np.sqrt(self.rmse/cnt)
            
            self.rmse_test = self.get_RMSE()
            
            if it % 10 == 0 or it == n_iter - 1:
                print_line = "EPOCH " + str(it+1) + " : Training RMSE " + str(self.rmse[0]) + " / Test RMSE " + str(self.rmse_test)
                print(print_line)
        
        print(f'Time per Iteration {time_sum/n_iter :.2f}')
            
            
    
    def compute_e_ui(self, mat, u, i, Rk_iu, Nk_iu, N_u):
        
        predict, bias = self.predict_r_ui(mat, u, i, Rk_iu, Nk_iu, N_u)

        return mat[u, i] - predict, bias
    
    
    def predict_r_ui(self, mat, u, i, Rk_iu, Nk_iu, N_u):
        
        p = self.P[u] + self.Y[N_u].sum(0)/np.sqrt(len(N_u))
        Factor_part = np.dot(p,self.Q[i].T)

        if len(Rk_iu)>0 :
            bias_uj = mat[u,Rk_iu] - (self.mu + self.Bu[u] + self.Bi[Rk_iu])
            Neigh_part = np.dot(bias_uj, self.W[i,Rk_iu]) / np.sqrt(len(Rk_iu))
        else :
            bias_uj = 0
            Neigh_part = 0

        if len(Nk_iu)>0 :
            Neigh_part = Neigh_part + np.sum(self.C[i,Nk_iu]) / np.sqrt(len(Nk_iu))

        return self.mu + self.Bu[u] + self.Bi[i] + Factor_part + Neigh_part, bias_uj
    
    
    
    def predict(self):
        pred = np.zeros((self.no_user, self.no_item))
        
        cs = sparse.coo_matrix(self.R)
        
        for u, i, v in zip(cs.row, cs.col, cs.data):
            if self.R_test[u, i] != 0:
                N_u = self.bi_index[u]
                Nk_iu = np.flip(np.argsort(self.S[i,].toarray()))[:self.k].ravel()
                Rk_iu = Nk_iu
                pred[u, i], _ = self.predict_r_ui(self.R, u, i, N_u, Rk_iu, Nk_iu)
        
        return pred
    
    
    def get_RMSE(self):
        
        pred_mat = self.predict()
        rating_idx = self.R_test != 0
        rmse_pred = np.sqrt(np.mean(np.power((self.R_test - pred_mat)[rating_idx], 2)))

        return rmse_pred
        
        
        
        
    
    

In [157]:
model = Integrated(train, test, 100, mat)

Pre-processing done.
[[9, 24, 46, 84], [24], [6, 22, 24, 65], [24], [65], [6, 22, 24, 65, 75, 87], [24], [61, 94], [6], [61], [24], [24], [24], [22, 24, 36, 37, 44, 47, 61, 62, 63, 65], [61, 87], [22, 24], [24], [24, 66], [24], [84, 87], [73, 87], [6], [42], [65], [24], [13, 24, 47, 65], [94], [24, 87], [24], [19], [34, 47, 61, 84, 87, 94, 99, 101, 110], [24], [44, 87, 94], [22, 54, 56], [22, 44, 47, 61, 94], [24], [38, 63, 110], [24], [9, 56, 75, 76], [24], [24], [24], [24, 84, 87, 94], [24], [24, 47], [87], [24, 87], [22, 24, 62], [24, 84], [22, 38], [22, 24], [75], [24], [36], [84], [84], [24], [24, 87], [22, 24, 87], [24], [34, 47], [22], [24], [13, 24, 83], [22, 24, 35, 36, 44, 47, 61, 62, 64, 94], [24, 34, 65], [24], [6, 24, 35], [24], [24, 37, 42, 65], [20, 24, 48, 84, 87, 102], [24], [6, 34], [24], [24, 56, 87], [63, 84], [42], [22, 24, 27], [75], [22, 24, 36], [24, 60, 93], [22, 24], [20], [24], [110], [13, 20, 24, 84, 87], [22], [6, 11, 20, 22, 24, 47, 61, 87, 89, 97], [71], 



In [158]:
print(train.shape)

(1542, 111)


In [159]:
model.optimize()

  return fmatmul(a, a)
  self.Y[N_u] = self.Y[N_u] + self.gamma2 * ( ((e_ui * self.Q[i])/np.sqrt(len(N_u))).reshape(1,-1) - self.l_reg7 * self.Y[N_u])


EPOCH 1 : Training RMSE [[nan]] / Test RMSE 3.675360422010131


KeyboardInterrupt: 