Reference : https://github.com/seohl16/GNN_RECSYS_paper/blob/master/RecommenderSystem/PMF/pmf1-simpledataframe.ipynb

In [3]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt 
import random
import argparse
import pickle
import torch

In [4]:
# N = 7, M = 5인 Preference matrix 
R = np.array([
    [1, 0, 0, 1, 3],
    [2, 0, 3, 1, 1],
    [1, 2, 0, 5, 0],
    [1, 0, 0, 4, 4],
    [2, 1, 5, 4, 0],
    [5, 1, 5, 4, 0],
    [0, 0, 0, 1, 0],
])

# The latent matrix의 사이즈는 (7, d), (d, 5)

In [7]:
class ProbabilisticMatrixFactorization(torch.nn.Module): 
    def __init__(self, R, d=10, lr=0.01, epochs=10):
      super(ProbabilisticMatrixFactorization, self).__init__()
      self.R = R                  # Preference matrix
      self.d = d                  # Hyperparameter for dimension
      self.lr = lr                # Learning late
      self.epochs = epochs        
      self.nusers = R.shape[0]    # N
      self.mitems = R.shape[1]    # M 

      # latent vectors
      self.user_vecs = np.random.normal(scale = 1. / self.d, size=(self.nusers, self.d))  # 평균이 0, 표준편차가 1. / self.d에서 나오는 난수 값을 가지는 [self.nusers, self.d] 행렬 
      self.item_vecs = np.random.normal(scale = 1. / self.d, size=(self.mitems, self.d))

      self.all_mean = np.sum(self.R) / len(np.where(self.R > 0)[0])
      print("Average of all ratings:", self.all_mean)

    def fit(self) :
      xi, yi = self.R.nonzero() 
      print("Model epochs:", self.epochs)
      for epoch in range(self.epochs):
        for i, j in zip(xi, yi):
          residual = self.R[i, j] - np.dot(self.user_vecs[i], self.item_vecs[j].transpose())
          if i == 0 and j == 3 and epoch % 10 == 0:
            print("[Epoch %2d] Target Score %d, Predicted Score %.4f, Residual %.4f" %(epoch, self.R[i, j], np.dot(self.user_vecs[i], self.item_vecs[j].transpose()), residual))
          temp = self.user_vecs[i, :] 
          self.user_vecs[i, :] += self.lr * residual * self.item_vecs[j, :] # Gradient Descent 효과 
          self.item_vecs[j, :] += self.lr * residual * temp                 # Gradient Descent 효과


    def test(self, user_ind, item_ind):   # U(T) * V 행렬 곱을 의미함 
      if user_ind >= self.nusers or item_ind >= self.mitems or user_ind < 0 or item_ind < 0:  # 평가하지 않은 데이터는 전체 평균으로 처리 
        return self.all_mean
      nanCheck = np.dot(self.user_vecs[user_ind, :], self.item_vecs[item_ind, :])             # 평가하지 않은 데이터는 전체 평균으로 처리 
      if np.isnan(nanCheck):
        return self.all_mean 
      else: 
        return np.dot(self.user_vecs[user_ind, :], self.item_vecs[item_ind, :]) 
      

In [9]:
# The loss of a single element is decreasing
factorizer = ProbabilisticMatrixFactorization(R, d=10, lr=0.01, epochs=100)
factorizer.fit()

Average of all ratings: 2.590909090909091
Model epochs: 100
[Epoch  0] Target Score 1, Predicted Score -0.0161, Residual 1.0161
[Epoch 10] Target Score 1, Predicted Score 0.0752, Residual 0.9248
[Epoch 20] Target Score 1, Predicted Score 0.5848, Residual 0.4152
[Epoch 30] Target Score 1, Predicted Score 1.4007, Residual -0.4007
[Epoch 40] Target Score 1, Predicted Score 1.6973, Residual -0.6973
[Epoch 50] Target Score 1, Predicted Score 1.7594, Residual -0.7594
[Epoch 60] Target Score 1, Predicted Score 1.7178, Residual -0.7178
[Epoch 70] Target Score 1, Predicted Score 1.6280, Residual -0.6280
[Epoch 80] Target Score 1, Predicted Score 1.5240, Residual -0.5240
[Epoch 90] Target Score 1, Predicted Score 1.4238, Residual -0.4238


In [10]:
# Truth
R[0, 4]

3

In [11]:
# Predicted. (2.9 and 3 is similar)
factorizer.test(0,4)

2.695063265815001

In [12]:
pip freeze

anyio==3.7.1
appnope==0.1.3
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
attrs==23.2.0
backcall==0.2.0
beautifulsoup4==4.12.2
bleach==6.0.0
certifi @ file:///private/var/folders/sy/f16zz6x50xz3113nwtb9bvq00000gp/T/abs_477u68wvzm/croot/certifi_1671487773341/work/certifi
cffi==1.15.1
comm==0.1.4
cycler==0.11.0
debugpy==1.7.0
decorator==5.1.1
defusedxml==0.7.1
entrypoints==0.4
exceptiongroup==1.2.0
fastjsonschema==2.19.1
fonttools==4.38.0
gensim==4.2.0
idna==3.6
importlib-metadata==6.7.0
importlib-resources==5.12.0
ipykernel==6.16.2
ipython==7.34.0
ipython-genutils==0.2.0
ipywidgets==8.1.1
jedi==0.19.1
Jinja2==3.1.2
jsonschema==4.17.3
jupyter==1.0.0
jupyter-console==6.6.3
jupyter-server==1.24.0
jupyter_client==7.4.9
jupyter_core==4.12.0
jupyterlab-pygments==0.2.2
jupyterlab-widgets==3.0.9
kiwisolver==1.4.5
MarkupSafe==2.1.3
matplotlib==3.5.3
matplotlib-inline==0.1.6
mistune==3.0.2
nbclassic==1.0.0
nbclient==0.7.4
nbconvert==7.6.0
nbformat==5.8.0
nest-asyncio==1.5.8
networkx==2.6.3
not