In [76]:
import numpy as np
import numpy.linalg as LA

# 縮約後の次元数
DIM = 2

# ある一人のuserの評価履歴
Du = np.array([
               [5, 3, 3, +1],
               [6, 2, 5, +1],
               [4, 1, 5, +1],
               [8, 5, 9, -1],
               [2, 4, 2, -1],
               [3, 6, 5, -1],
               [7, 6, 8, -1],
               [4, 2, 3, np.nan],
               [5, 1, 8, np.nan],
               [8, 6, 6, np.nan],
               [3, 4, 2, np.nan],
               [4, 7, 5, np.nan],
               [4, 4, 4, np.nan],
])
I = np.arange(Du.shape[0])
x = Du[:,:-1]
ru = Du[:,-1]

# 分散共分散行列

In [77]:
xk_mean = np.mean(x, axis=0)
np.round(xk_mean, decimals=3)

array([4.846, 3.923, 5.   ])

In [78]:
s2 = np.var(x, axis=0)
np.round(s2, decimals=3)

array([3.361, 3.763, 4.769])

In [79]:
# 特徴量インデックス
K = np.arange(x.shape[1])

In [80]:
x2 = np.zeros((x.shape[0], x.shape[1]))

In [81]:
for k in K:
    for i in I:
        x2[i,k] = (x[i,k]-xk_mean[k])/np.sqrt(s2[k])
x2 = np.round(x2, decimals=3)

In [82]:
# 分散共分散行列
S = np.cov([x2[:,k] for k in K], bias=True)
S = np.round(S, decimals=3)
S

array([[1.   , 0.191, 0.749],
       [0.191, 1.   , 0.163],
       [0.749, 0.163, 1.   ]])

# 固有値・固有ベクトル

In [83]:
lmd, v = LA.eig(S)
lmd = np.round(lmd, decimals=3)
v = np.round(v, decimals=3)

In [84]:
# 固有値
lmd

array([1.825, 0.25 , 0.925])

In [85]:
# 固有ベクトル
v

array([[-0.679, -0.71 ,  0.185],
       [-0.29 ,  0.028, -0.956],
       [-0.674,  0.704,  0.225]])

In [86]:
indices = np.argsort(-lmd)
indices

array([0, 2, 1])

In [87]:
# 固有値・固有ベクトルの降順にソート
lmd = lmd[indices]
lmd

array([1.825, 0.925, 0.25 ])

In [88]:
v = v[:, indices]
v

array([[-0.679,  0.185, -0.71 ],
       [-0.29 , -0.956,  0.028],
       [-0.674,  0.225,  0.704]])

In [89]:
# 第D主成分までの固有ベクトル
V = v[:, np.arange(DIM)]
V

array([[-0.679,  0.185],
       [-0.29 , -0.956],
       [-0.674,  0.225]])

In [96]:
# 次元削減後の特徴ベクトル
x3 = x2@V
x3 = np.round(x3, decimals=3)
x3

array([[ 0.698,  0.264],
       [-0.14 ,  1.064],
       [ 0.751,  1.355],
       [-2.564,  0.2  ],
       [ 1.968, -0.635],
       [ 0.373, -1.21 ],
       [-2.034, -0.497],
       [ 1.218,  0.656],
       [-0.546,  1.765],
       [-1.787, -0.603],
       [ 1.598, -0.534],
       [-0.146, -1.602],
       [ 0.611, -0.227]])

# 寄与率

In [97]:
# 第1主成分の寄与率
k = 0
pk = lmd[k] / np.sum(lmd)
pk

0.6083333333333333

In [98]:
# 第２主成分までの累積寄与率
k = 2
ck = np.sum([lmd[i] for i in np.arange(k)]) / np.sum(lmd)
ck

0.9166666666666666

# 次元削減後の評価履歴

In [111]:
Du2 = np.hstack((x3, ru.reshape((-1,1))))
Du2

array([[ 0.698,  0.264,  1.   ],
       [-0.14 ,  1.064,  1.   ],
       [ 0.751,  1.355,  1.   ],
       [-2.564,  0.2  , -1.   ],
       [ 1.968, -0.635, -1.   ],
       [ 0.373, -1.21 , -1.   ],
       [-2.034, -0.497, -1.   ],
       [ 1.218,  0.656,    nan],
       [-0.546,  1.765,    nan],
       [-1.787, -0.603,    nan],
       [ 1.598, -0.534,    nan],
       [-0.146, -1.602,    nan],
       [ 0.611, -0.227,    nan]])