# 評価値行列の次元削減

In [35]:
import numpy as np
import numpy.linalg as LA
np.set_printoptions(precision=3)

# 縮約後の次元数
DIM = 2

R = np.array([
              [np.nan, 4,      3,      1,      2,      np.nan],
              [5,      5,      4,      np.nan, 3,      3     ],
              [4,      np.nan, 5,      3,      2,      np.nan],
              [np.nan, 3,      np.nan, 2,      1,      1     ],
              [2,      1,      2,      4,      np.nan, 3     ],
])
U = np.arange(R.shape[0])
I = np.arange(R.shape[1])
Ui = [U[~np.isnan(R)[:,i]] for i in I]
Iu = [I[~np.isnan(R)[u,:]] for u in U]
ru_mean = np.nanmean(R, axis=1)
R2 = R - ru_mean.reshape((ru_mean.size, 1))

In [36]:
R2

array([[  nan,  1.5 ,  0.5 , -1.5 , -0.5 ,   nan],
       [ 1.  ,  1.  ,  0.  ,   nan, -1.  , -1.  ],
       [ 0.5 ,   nan,  1.5 , -0.5 , -1.5 ,   nan],
       [  nan,  1.25,   nan,  0.25, -0.75, -0.75],
       [-0.4 , -1.4 , -0.4 ,  1.6 ,   nan,  0.6 ]])

In [37]:
ri2_mean = np.nanmean(R2, axis=0)
ri2_mean

array([ 0.367,  0.588,  0.4  , -0.037, -0.938, -0.383])

In [38]:
s2 = np.nanvar(R2, axis=0)
s2

array([0.336, 1.348, 0.505, 1.279, 0.137, 0.494])

In [39]:
i = 0
j = 1
Uij = np.intersect1d(Ui[i], Ui[j])
sij = np.sum([(R2[u,i]-ri2_mean[i])*(R2[u,j]-ri2_mean[j]) for u in Uij]) / len(Uij)
sij

0.8924999999999998

In [40]:
S = np.zeros((I.size, I.size))
for i in I:
    for j in I:
        Uij = np.intersect1d(Ui[i], Ui[j])
        S[i,j] = round(np.sum([(R2[u,i]-ri2_mean[i])*(R2[u,j]-ri2_mean[j]) for u in Uij]) / len(Uij), 3)

S

array([[ 0.336,  0.892,  0.169, -0.659, -0.057, -0.572],
       [ 0.892,  1.348,  0.505, -1.466,  0.166, -0.817],
       [ 0.169,  0.505,  0.505, -0.655, -0.183, -0.27 ],
       [-0.659, -1.466, -0.655,  1.279, -0.109,  0.752],
       [-0.057,  0.166, -0.183, -0.109,  0.137, -0.015],
       [-0.572, -0.817, -0.27 ,  0.752, -0.015,  0.494]])

# 固有値・固有ベクトル


In [41]:
lmd, v = LA.eig(S)

In [42]:
lmd

array([ 3.908,  0.48 ,  0.233, -0.313, -0.049, -0.16 ])

In [53]:
v

array([[-0.327, -0.228, -0.484, -0.685,  0.279, -0.246],
       [-0.609, -0.212,  0.1  ,  0.565,  0.37 , -0.344],
       [-0.245,  0.806,  0.098, -0.134, -0.203, -0.471],
       [ 0.583, -0.126, -0.374,  0.258, -0.02 , -0.661],
       [-0.028, -0.462,  0.624, -0.294, -0.395, -0.392],
       [ 0.348,  0.157,  0.466, -0.204,  0.767, -0.088]])

In [54]:
# 第d主成分までの固有ベクトル
V = v[:, np.argsort(-lmd)[:DIM]]
V

array([[-0.327, -0.228],
       [-0.609, -0.212],
       [-0.245,  0.806],
       [ 0.583, -0.126],
       [-0.028, -0.462],
       [ 0.348,  0.157]])

In [60]:
u = 0
k = 0
np.sum([R2[u,i]*V[i,k] for i in Iu[u]]) / Iu[u].size

-0.47421196817216366

In [59]:
V[0,:]

array([-0.327, -0.228])

In [63]:
# 潜在因子行列
P = np.zeros((U.size, DIM))
for u in U:
    for k in np.arange(DIM):
        P[u,k] = np.sum([R2[u,i]*V[i,k] for i in Iu[u]]) / Iu[u].size
P

array([[-0.474,  0.126],
       [-0.251, -0.027],
       [-0.195,  0.463],
       [-0.214, -0.017],
       [ 0.445, -0.009]])