# 第8章 評価値行列の次元削減

## 準備

In [1]:
import numpy as np
import numpy.linalg as LA
np.set_printoptions(precision=3)

# 縮約後の次元数
DIM = 2

R = np.array([
              [np.nan, 4,      3,      1,      2,      np.nan],
              [5,      5,      4,      np.nan, 3,      3     ],
              [4,      np.nan, 5,      3,      2,      np.nan],
              [np.nan, 3,      np.nan, 2,      1,      1     ],
              [2,      1,      2,      4,      np.nan, 3     ],
])
U = np.arange(R.shape[0])
I = np.arange(R.shape[1])
Ui = [U[~np.isnan(R)[:,i]] for i in I]
Iu = [I[~np.isnan(R)[u,:]] for u in U]
ru_mean = np.nanmean(R, axis=1)
R2 = R - ru_mean.reshape((ru_mean.size, 1))

## 分散共分散行列

### 01 各アイテムに対して与えられた平均中心化評価値の平均値

In [2]:
ri2_mean = np.nanmean(R2, axis=0)
print('ri2_mean = {}'.format(ri2_mean))

ri2_mean = [ 0.367  0.588  0.4   -0.037 -0.938 -0.383]


In [3]:
ri2_mean = np.array([(1 / Ui[i].size) * np.sum([R2[u,i] for u in Ui[i]]) for i in I])
print('ri2_mean = {}'.format(ri2_mean))

ri2_mean = [ 0.367  0.588  0.4   -0.037 -0.938 -0.383]


### 02 各アイテムの平均中心化評価値の分散

In [4]:
s2 = np.nanvar(R2, axis=0)
print('s2 = {}'.format(s2))

s2 = [0.336 1.348 0.505 1.279 0.137 0.494]


In [5]:
s2 = np.array([(1 / Ui[i].size) * np.sum([(R2[u,i] - ri2_mean[i])**2 for u in Ui[i]]) for i in I])
print('s2 = {}'.format(s2))

s2 = [0.336 1.348 0.505 1.279 0.137 0.494]


In [6]:
s2 = np.array([(1 / Ui[i].size) * np.nansum((R2[:,i] - ri2_mean[i])**2) for i in I])
print('s2 = {}'.format(s2))

s2 = [0.336 1.348 0.505 1.279 0.137 0.494]


### 03 アイテムiとアイテムjの平均中心化評価値の共分散

In [7]:
i = 0
j = 1
Uij = np.intersect1d(Ui[i], Ui[j])
sij = (1 / Uij.size) * np.sum([(R2[u,i] - ri2_mean[i]) * (R2[u,j] - ri2_mean[j]) for u in Uij]) if Uij.size > 0 else 0
print('s{}{} = {:.3f}'.format(i, j, sij))

s01 = 0.892


### 04 分散共分散行列

In [8]:
S = np.zeros((I.size, I.size))
for i in I:
    for j in I:
        Uij = np.intersect1d(Ui[i], Ui[j])
        S[i,j] = (1 / Uij.size) * np.sum([(R2[u,i] - ri2_mean[i]) * (R2[u,j] - ri2_mean[j]) for u in Uij]) if Uij.size > 0 else 0
print('S = \n{}'.format(S))

S = 
[[ 0.336  0.892  0.169 -0.659 -0.057 -0.572]
 [ 0.892  1.348  0.505 -1.466  0.166 -0.817]
 [ 0.169  0.505  0.505 -0.655 -0.183 -0.27 ]
 [-0.659 -1.466 -0.655  1.279 -0.109  0.752]
 [-0.057  0.166 -0.183 -0.109  0.137 -0.015]
 [-0.572 -0.817 -0.27   0.752 -0.015  0.494]]


## 固有値・固有ベクトル

### 05 固有値・固有ベクトル

In [9]:
lmd, v = LA.eig(S)
print('λ = {}'.format(lmd))
print('v = \n{}'.format(v))

λ = [ 3.909  0.48   0.233 -0.315 -0.049 -0.16 ]
v = 
[[ 0.327  0.228  0.484 -0.685  0.279 -0.245]
 [ 0.609  0.211 -0.099  0.565  0.371 -0.344]
 [ 0.245 -0.806 -0.097 -0.134 -0.202 -0.472]
 [-0.583  0.126  0.374  0.258 -0.019 -0.661]
 [ 0.028  0.462 -0.624 -0.294 -0.394 -0.393]
 [-0.348 -0.157 -0.465 -0.204  0.767 -0.087]]


### 06 第d主成分までの固有ベクトル

In [10]:
indices = np.argsort(lmd)[::-1]
v = v[:, indices]
V = v[:, :DIM]
print('V = \n{}'.format(V))

V = 
[[ 0.327  0.228]
 [ 0.609  0.211]
 [ 0.245 -0.806]
 [-0.583  0.126]
 [ 0.028  0.462]
 [-0.348 -0.157]]


## 主成分得点

### 07 ユーザuの第k主成分得点

In [11]:
u = 0
k = 0
puk = np.sum([R2[u,i] * V[i,k] for i in Iu[u]]) / Iu[u].size
print('p{}{} = {:.3f}'.format(u, k, puk))

p00 = 0.474


### 08 潜在因子行列

In [12]:
P = np.zeros((U.size, DIM))
for u in U:
    for k in range(0, DIM):
        P[u,k] = np.sum([R2[u,i] * V[i,k] for i in Iu[u]]) / Iu[u].size
print('P = \n{}'.format(P))

P = 
[[ 0.474 -0.127]
 [ 0.251  0.027]
 [ 0.195 -0.463]
 [ 0.214  0.017]
 [-0.445  0.009]]


In [13]:
P = np.array([[np.sum([R2[u,i] * V[i,k] for i in Iu[u]]) / Iu[u].size for k in range(0, DIM)] for u in U])
print('P = \n{}'.format(P))

P = 
[[ 0.474 -0.127]
 [ 0.251  0.027]
 [ 0.195 -0.463]
 [ 0.214  0.017]
 [-0.445  0.009]]
