In [1]:
import numpy as np
import pandas as pd 
import time
np.random.seed(0)

In [3]:
# Train 데이터 불러오기 
file_path_train = '/Users/yoonminseok/Desktop/DSAIL/2주차/u1.base.csv'
movie_data_train = pd.read_csv(file_path_train, delimiter='\t', header=None, names=['user_id', 'movie_id', 'rating', 'timestamp'])

# 타임스탬프 버리기
movie_data_train = movie_data_train[['user_id', 'movie_id', 'rating']]

# R Matrix 만들기 
R_matrix = movie_data_train.pivot(index='user_id', columns='movie_id', values='rating')

# 크기 줄이기 
R_matrix = R_matrix.iloc[:450, :1000]

In [4]:
R_matrix

movie_id,1,2,3,4,5,6,7,8,9,10,...,997,998,999,1000,1001,1002,1003,1004,1005,1006
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,,4.0,1.0,5.0,,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446,,,,,,,,,,,...,,,,,,,,,,
447,3.0,,,,3.0,,5.0,,2.0,,...,,,,,,,,,,
448,,,,,,,,,,,...,,,,,,,,,,
449,,,,,,,,,4.0,,...,,,,,,,,,5.0,4.0


In [5]:
# P_matrix 설정
# P_matrix는 Implicit한 데이터를 의미 
P_matrix = np.copy(R_matrix)
P_matrix[P_matrix > 0] = 1
P_matrix = np.nan_to_num(P_matrix, nan=0)

In [11]:
class BPR_MF:
  def __init__(self, alpha = 0.01, dimension = 20, iteration = 100, lambda_parameter = 0.01, data = None):
    self.alpha = alpha
    self.dimension = dimension
    self.iteration = iteration
    self.lambada_parameter = lambda_parameter
    self.data = data

  def fit(self):
    X = np.random.rand(self.data.shape[0], self.dimension) * 0.01     # 0 ~ 0.01 사이 난수 
    Y = np.random.rand(self.data.shape[1], self.dimension) * 0.01

    predict = 0
    truevalue = 0

    for _ in range(self.iteration):                                   
      random = np.random.choice(range(self.data.shape[0])) 
      random_user_matrix = np.copy(self.data[random])
      i = np.random.choice(np.where(random_user_matrix == 1)[0])      # random_user와 상호작용한 아이템  
      j = np.random.choice(np.where(random_user_matrix == 0)[0])      # random_user와 상호작용하지 않은 아이템 

      w_u = X[random,:]
      h_i = Y[i,:]
      h_j = Y[j,:]

      x_uij = np.dot(w_u,h_i) - np.dot(w_u,h_j)
      exp = np.exp(-x_uij) / (1 + np.exp(-x_uij))
      
      # Parameter update 
      # Parameter는 3개 존재, w_u, h_i, h_j 
      gradient_w_u = exp * (h_i-h_j) + self.lambada_parameter * w_u
      X[random,:] = X[random,:] + self.alpha * gradient_w_u

      gradient_h_i = exp * (w_u) + self.lambada_parameter * h_i
      Y[i,:] = Y[i,:] + self.alpha * gradient_h_i

      gradient_h_j = exp * (-w_u) + self.lambada_parameter * h_j
      Y[j,:] = Y[j,:] + self.alpha * gradient_h_j

      if(x_uij>0):
        predict += 1 
      
      if(self.data[random][i]==1 and self.data[random][j]==0):
        truevalue += 1

    auc = predict / truevalue
    return auc


In [14]:
bpr_mf = BPR_MF(0.01,20,10,0.01,P_matrix)
auc = bpr_mf.fit()

In [17]:
class BPR_KNN:
  def __init__(self, alpha = 0.01, dimension = 20, iteration = 100, lambda_parameter = 0.01, data = None):
    self.alpha = alpha
    self.dimension = dimension
    self.iteration = iteration
    self.lambda_parameter = lambda_parameter
    self.data = data
    self.C_matrix = np.random.rand(self.data.shape[1], self.data.shape[1]) * 0.01

  def fit(self):
    predict = 0 
    truevalue = 0 

    for _ in range(self.iteration):
      random = np.random.choice(range(self.data.shape[0])) 
      random_user_matrix = np.copy(self.data[random])
      i = np.random.choice(np.where(random_user_matrix == 1)[0])      # random_user와 상호작용한 아이템  
      j = np.random.choice(np.where(random_user_matrix == 0)[0]) 

      # x_ui와 x_uj를 구하기 위해서는 user가 상호작용한 아이템과의 유사도를 합해야함, 이 과정에서 자기 자신의 item은 빼야함 
      list_i = np.nonzero(self.data[random])[0]
      list_i = list_i[list_i != i]
      x_ui = np.sum(self.C_matrix[i][list_i])

      list_j = np.nonzero(self.data[random])[0]
      list_j = list_j[list_j != i]
      x_uj = np.sum(self.C_matrix[i][list_j])


      x_uij = x_ui - x_uj
      exp = np.exp(-x_uij) / (1 + np.exp(-x_uij))

    
      # Parameter update
      # Parameter는 2개 존재, c_i, c_j 
      gradient_c_i = exp * 1 + self.lambda_parameter * self.C_matrix[i, list_i]
      self.C_matrix[i, list_i] = self.C_matrix[i, list_i] + self.alpha * gradient_c_i

      gradient_c_j = exp * 1 + self.lambda_parameter * self.C_matrix[i, list_j]
      self.C_matrix[j, list_j] = self.C_matrix[j, list_j] + self.alpha * gradient_c_j


      if(x_uij>0):
        predict += 1 
      
      if(self.data[random][i]==1 and self.data[random][j]==0):
        truevalue += 1

    auc = predict / truevalue
    return auc

In [18]:
bpr_knn = BPR_KNN(0.01,20,10,0.01,P_matrix)
auc = bpr_knn.fit()