# 第7章 評価履歴の次元削減

## 準備

In [1]:
import numpy as np
import numpy.linalg as LA
np.set_printoptions(precision=3)

# 縮約後の次元数
DIM = 2

Du = np.array([
               [5, 3, 3, +1],
               [6, 2, 5, +1],
               [4, 1, 5, +1],
               [8, 5, 9, -1],
               [2, 4, 2, -1],
               [3, 6, 5, -1],
               [7, 6, 8, -1],
               [4, 2, 3, np.nan],
               [5, 1, 8, np.nan],
               [8, 6, 6, np.nan],
               [3, 4, 2, np.nan],
               [4, 7, 5, np.nan],
               [4, 4, 4, np.nan],
])
I = np.arange(Du.shape[0])
x = Du[:,:-1]
ru = Du[:,-1]

## 分散共分散行列

### 01 各特徴量の平均値

In [2]:
xk_mean = np.mean(x, axis=0)
print('xk_mean = {}'.format(xk_mean))

xk_mean = [4.846 3.923 5.   ]


### 02 各特徴量の分散

In [3]:
s2 = np.var(x, axis=0)
print('s^2 = {}'.format(s2))

s^2 = [3.361 3.763 4.769]


In [4]:
d = x.shape[1]
s2 = np.array([(1 / I.size) * np.sum([(x[i,k] - xk_mean[k])**2 for i in I]) for k in range(0, d)])
print('s^2 = {}'.format(s2))

s^2 = [3.361 3.763 4.769]


In [5]:
s2 = np.array([(1 / I.size) * np.sum((x[:,k] - xk_mean[k])**2) for k in range(0, d)])
print('s^2 = {}'.format(s2))

s^2 = [3.361 3.763 4.769]


In [6]:
s2 = (1 / I.size) * np.sum((x - xk_mean)**2, axis=0)
print('s^2 = {}'.format(s2))

s^2 = [3.361 3.763 4.769]


### 03 各特徴量の標準化

In [7]:
x2 = np.array([[(x[i,k] - xk_mean[k]) / np.sqrt(s2[k]) for k in range(0, d)] for i in I])
print('x\' = \n{}'.format(x2))

x' = 
[[ 0.084 -0.476 -0.916]
 [ 0.629 -0.991  0.   ]
 [-0.462 -1.507  0.   ]
 [ 1.72   0.555  1.832]
 [-1.552  0.04  -1.374]
 [-1.007  1.071  0.   ]
 [ 1.175  1.071  1.374]
 [-0.462 -0.991 -0.916]
 [ 0.084 -1.507  1.374]
 [ 1.72   1.071  0.458]
 [-1.007  0.04  -1.374]
 [-0.462  1.586  0.   ]
 [-0.462  0.04  -0.458]]


In [8]:
x2 = np.array([(x[i] - xk_mean) / np.sqrt(s2) for i in I])
print('x\' = \n{}'.format(x2))

x' = 
[[ 0.084 -0.476 -0.916]
 [ 0.629 -0.991  0.   ]
 [-0.462 -1.507  0.   ]
 [ 1.72   0.555  1.832]
 [-1.552  0.04  -1.374]
 [-1.007  1.071  0.   ]
 [ 1.175  1.071  1.374]
 [-0.462 -0.991 -0.916]
 [ 0.084 -1.507  1.374]
 [ 1.72   1.071  0.458]
 [-1.007  0.04  -1.374]
 [-0.462  1.586  0.   ]
 [-0.462  0.04  -0.458]]


In [9]:
x2 = np.array((x - xk_mean) / np.sqrt(s2))
print('x\' = \n{}'.format(x2))

x' = 
[[ 0.084 -0.476 -0.916]
 [ 0.629 -0.991  0.   ]
 [-0.462 -1.507  0.   ]
 [ 1.72   0.555  1.832]
 [-1.552  0.04  -1.374]
 [-1.007  1.071  0.   ]
 [ 1.175  1.071  1.374]
 [-0.462 -0.991 -0.916]
 [ 0.084 -1.507  1.374]
 [ 1.72   1.071  0.458]
 [-1.007  0.04  -1.374]
 [-0.462  1.586  0.   ]
 [-0.462  0.04  -0.458]]


### 04 標準化された特徴量kと特徴量lの共分散

In [10]:
k = 0
l = 1
skl = np.cov(x2, rowvar=False, bias=True)[k, l]
print('s{}{} = {:.3f}'.format(k, l, skl))

s01 = 0.191


In [11]:
k = 0
l = 1
skl = (1 / I.size) * np.sum([x2[i, k] * x2[i, l] for i in I])
print('s{}{} = {:.3f}'.format(k, l, skl))

s01 = 0.191


In [12]:
k = 0
l = 1
skl = (1 / I.size) * np.sum(x2[:, k] * x2[:, l])
print('s{}{} = {:.3f}'.format(k, l, skl))

s01 = 0.191


### 05 分散共分散行列

In [13]:
S = np.cov(x2, rowvar=False, bias=True)
print('S = \n{}'.format(S))

S = 
[[1.    0.191 0.749]
 [0.191 1.    0.163]
 [0.749 0.163 1.   ]]


In [14]:
S = np.zeros((d, d))
for k in range(0, d):
    for l in range(0, d):
        S[k,l] = (1 / I.size) * np.sum([x2[i, k] * x2[i, l] for i in I])
print('S = \n{}'.format(S))

S = 
[[1.    0.191 0.749]
 [0.191 1.    0.163]
 [0.749 0.163 1.   ]]


In [15]:
S = np.array([[(1 / I.size) * np.sum([x2[i, k] * x2[i, l] for i in I]) for k in range(0, d)] for l in range(0, d)])
print('S = \n{}'.format(S))

S = 
[[1.    0.191 0.749]
 [0.191 1.    0.163]
 [0.749 0.163 1.   ]]


## 固有値・固有ベクトル

### 06 固有値・固有ベクトル

In [16]:
lmd, v = LA.eig(S)
print('λ = {}'.format(lmd))
print('v = \n{}'.format(v))

λ = [1.826 0.25  0.924]
v = 
[[-0.679 -0.71   0.186]
 [-0.291  0.028 -0.956]
 [-0.674  0.704  0.225]]


### 07 固有値の降順にソートしたインデックス配列

In [17]:
indices = np.argsort(lmd)[::-1]
print('indices = {}'.format(indices))

indices = [0 2 1]


### 08 固有値の降順に固有値配列をソート

In [18]:
lmd = lmd[indices]
print('λ = {}'.format(lmd))

λ = [1.826 0.924 0.25 ]


### 09 固有値の降順に固有ベクトル配列をソート

In [19]:
v = v[:, indices]
print('v = \n{}'.format(v))

v = 
[[-0.679  0.186 -0.71 ]
 [-0.291 -0.956  0.028]
 [-0.674  0.225  0.704]]


### 10 第d主成分までの固有ベクトル

In [20]:
V = v[:, :DIM]
print('V = \n{}'.format(V))

V = 
[[-0.679  0.186]
 [-0.291 -0.956]
 [-0.674  0.225]]


## 主成分得点

### 11 アイテムiの第k主成分得点

In [21]:
i = 0
k = 0
xik3 = np.sum([x2[i,l] * V[l,k] for l in range(0, d)])
print('x{}{}\'\' = {:.3f}'.format(i, k, xik3))

x00'' = 0.699


### 12 各アイテムの次元削減後の特徴ベクトル

In [22]:
x3 = x2@V
print('x\'\' = \n{}'.format(x3))

x'' = 
[[ 0.699  0.264]
 [-0.139  1.065]
 [ 0.752  1.355]
 [-2.564  0.202]
 [ 1.969 -0.636]
 [ 0.373 -1.211]
 [-2.035 -0.496]
 [ 1.219  0.656]
 [-0.545  1.766]
 [-1.788 -0.601]
 [ 1.598 -0.535]
 [-0.148 -1.603]
 [ 0.611 -0.227]]


## 寄与率

### 13 第k主成分の寄与率

In [23]:
k = 0
pk = lmd[k] / np.sum([lmd[l] for l in range(0, d)])
print('第{}主成分の寄与率 = {:.3f}'.format(k+1, pk))

第1主成分の寄与率 = 0.609


### 14 第k主成分までの累積寄与率

In [24]:
k = 2
ck = np.sum([lmd[l] for l in range(0, k)]) / np.sum([lmd[l] for l in range(0, d)])
print('第{}主成分までの累積寄与率 = {:.3f}'.format(k, ck))

第2主成分までの累積寄与率 = 0.917


## 推薦

### 15 次元削減後の評価履歴

In [25]:
Du2 = np.hstack([x3, ru.reshape((ru.size, 1))])
print('Du\' = \n{}'.format(Du2))

Du' = 
[[ 0.699  0.264  1.   ]
 [-0.139  1.065  1.   ]
 [ 0.752  1.355  1.   ]
 [-2.564  0.202 -1.   ]
 [ 1.969 -0.636 -1.   ]
 [ 0.373 -1.211 -1.   ]
 [-2.035 -0.496 -1.   ]
 [ 1.219  0.656    nan]
 [-0.545  1.766    nan]
 [-1.788 -0.601    nan]
 [ 1.598 -0.535    nan]
 [-0.148 -1.603    nan]
 [ 0.611 -0.227    nan]]


In [26]:
Du2 = np.append(x3, ru.reshape((ru.size, 1)), axis=1)
print('Du\' = \n{}'.format(Du2))

Du' = 
[[ 0.699  0.264  1.   ]
 [-0.139  1.065  1.   ]
 [ 0.752  1.355  1.   ]
 [-2.564  0.202 -1.   ]
 [ 1.969 -0.636 -1.   ]
 [ 0.373 -1.211 -1.   ]
 [-2.035 -0.496 -1.   ]
 [ 1.219  0.656    nan]
 [-0.545  1.766    nan]
 [-1.788 -0.601    nan]
 [ 1.598 -0.535    nan]
 [-0.148 -1.603    nan]
 [ 0.611 -0.227    nan]]


In [27]:
Du2 = np.concatenate([x3, ru.reshape((ru.size, 1))], axis=1)
print('Du\' = \n{}'.format(Du2))

Du' = 
[[ 0.699  0.264  1.   ]
 [-0.139  1.065  1.   ]
 [ 0.752  1.355  1.   ]
 [-2.564  0.202 -1.   ]
 [ 1.969 -0.636 -1.   ]
 [ 0.373 -1.211 -1.   ]
 [-2.035 -0.496 -1.   ]
 [ 1.219  0.656    nan]
 [-0.545  1.766    nan]
 [-1.788 -0.601    nan]
 [ 1.598 -0.535    nan]
 [-0.148 -1.603    nan]
 [ 0.611 -0.227    nan]]
