In [3]:
import numpy as np
from timeit import default_timer as timer 
from numba import cuda,jit,vectorize,int32,float32,guvectorize
from math import sqrt
import pickle
import Call_Function as cf

In [None]:
start=timer()
n_user,n_movie,n_rating=cf.read_user_inform()
#n_user,n_factor,n_movie,mean,stdev,epoch,learning_rate=0.005,regulation=0.02,random_state=1
svd_pp=SVD_pp(n_user,100,n_movie)
svd_pp.read_u_data(cf.data_location)
svd_pp.get_overall_mean()
RMSE_list=[]
for i in range(cf.k_fold):
    svd_pp.gradient_descent(cf.train_set_list[i])
    print(str(i)+" .batch gradient finish")
    RMSE=svd_pp.predict(cf.test_set_list[i])
    print(str(i)+" . RMSE :"+RMSE)
    RMSE_list.append(RMSE)
total_time=timer()-start
print("Time : "+str(total_time))
print("AVG RMSE : "+str(sum(RMSE_list)/len(RMSE_list)))
if cf.save_or_not:
    cf.save_object("svd_pp_object.p",[svd_pp,RMSE_list,total_time])

Read Start
Finish
Time :  2.588124406989664
Loading : 69488

In [5]:
class SVD_pp:
    def __init__(self,n_user,n_factor,n_movie,epoch=20,mean=0,stdev=0.2,learning_rate=0.007,regulation=0.02,random_state=1):
        self.n_user=n_user
        self.n_factor=n_factor
        self.n_movie=n_movie
        self.rating_matrix=np.zeros((n_user,n_movie),np.int8)
        self.overall_mean=0
        self.epochs=epoch
        #default value - 1
        self.random_state=random_state
        self.init_mean=mean
        self.init_stdev=stdev
        self.lr=learning_rate
        self.reg=regulation
        self.bu=None
        self.bi=None
        self.pu=None
        self.yj=None
        self.qi=None
        
     # Reading data for input rating to matrix        
    def read_u_data(self,total_location):
        with open(total_location,'r') as f:
            print("Read Start")
            line=f.readline().split('\t')
            while len(line)>1:
                self.rating_matrix[int(line[0])-1,int(line[1])-1]=int(line[2])
                line=f.readline().split('\t')
            print("Finish")
            
    # get overall mean             
    def get_overall_mean(self):
        start=timer()
        total_count=0
        total_sum=0
        for j in range(self.n_movie):
            for i in range(self.n_user):
                if self.rating_matrix[i,j] != 0:
                    total_count+=1
                    total_sum+=self.rating_matrix[i,j]
        self.overall_mean=total_sum/total_count
        print("Time : ",timer()-start)
        
    #Testing       
    def predict(self,test_set):
        test_count=0
        r_sum=0
        with open(test_set,'r') as file:
            line=file.readline().split('\t')
            
            while len(line) >1:
                movie_index=int(line[1])-1
                user_index=int(line[0])-1
                rating=int(line[2])
                user_implicit_feedback=np.zeros(self.n_factor,np.double)
                rated_item=[item_id for item_id,r in enumerate(self.rating_matrix[user_index]) if r!=0]
                rated_size=sqrt(len(rated_item))
                for item_id in rated_item:
                    for f in range(self.n_factor):
                        user_implicit_feedback[f]+=self.yj[item_id,f]/rated_size
                dot=0
                for f in range(self.n_factor):
                    dot+=qi[movie_index,f]*(pu[user_index,factor]+user_implicit_feedback[factor])
                r_hat=self.overall_mean+self.bu[user_index]+self.bi[movie_index]+dot
                error=rating-r_hat
                r_sum+=(error)**2
                test_count+=1
                line=file.readline().split('\t')
        return sqrt(r_sum/test_count)
    
      #Training
    def gradient_descent(self,train_set):
        SVD_pp.init_vector(self)
        
        loading2=0
        for i in range(self.epochs):
            with open(train_set,'r') as file:
                line=file.readline().split('\t')
                
                loading=0
                while len(line) >1:
                    loading+=1
                    print("Loading : {}\r".format(loading),end='')
                    movie_index=int(line[1])-1
                    user_index=int(line[0])-1
                    rating=int(line[2])
                    user_implicit_feedback=np.zeros(self.n_factor,np.double)
                    rated_item=[item_id for item_id,r in enumerate(self.rating_matrix[user_index]) if r!=0]
                    rated_size=sqrt(len(rated_item))
                    for item_id in rated_item:
                        for f in range(self.n_factor):
                            user_implicit_feedback[f]+=self.yj[item_id,f]/rated_size
                    dot=0
                    for f in range(self.n_factor):
                        dot+=self.qi[movie_index,f]*(self.pu[user_index,f]+user_implicit_feedback[f])
                    r_hat=self.overall_mean+self.bu[user_index]+self.bi[movie_index]+dot
                    error=rating-r_hat
                    self.bu[user_index]+=self.lr*(error-self.reg*self.bu[user_index])
                    self.bi[movie_index]+=self.lr*(error-self.reg*self.bi[movie_index])
                    for i in range(self.n_factor):
                        pu_=self.pu[user_index,i]
                        qi_=self.qi[movie_index,i]
                        self.pu[user_index,i]+=self.lr*(error*qi_-self.reg*pu_)
                        self.qi[movie_index,i]+=self.lr*(error*(pu_+user_implicit_feedback[i])-self.reg*qi_)
                        for j in rated_item:
                            self.yj[j,i]+=self.lr*((error/rated_size)*qi_-self.reg*self.yj[j,i])
                    line=file.readline().split('\t')
                loading+=1
                print("Epoch !",loading)
        
                    
    #initialize all the vectors by normalizing and making zero                   
    def init_vector(self):
        generator=np.random.RandomState(self.random_state)
        self.bu=np.zeros(self.n_user,np.double)
        self.bi=np.zeros(self.n_movie,np.double)
        self.pu=generator.normal(self.init_mean,self.init_stdev,(self.n_user,self.n_factor))
        self.qi=generator.normal(self.init_mean,self.init_stdev,(self.n_movie,self.n_factor))
        self.yj=generator.normal(self.init_mean,self.init_stdev,(self.n_movie,self.n_factor))