# 第2章 評価値行列

## 準備

In [1]:
import pprint
import numpy as np
np.set_printoptions(precision=3)

## 評価値行列

### 01 評価値行列の生成

In [2]:
R = np.array([
              [np.nan, 4,      3,      1,      2,      np.nan],
              [5,      5,      4,      np.nan, 3,      3     ],
              [4,      np.nan, 5,      3,      2,      np.nan],
              [np.nan, 3,      np.nan, 2,      1,      1     ],
              [2,      1,      2,      4,      np.nan, 3     ],
])
print('R = \n{}'.format(R))

R = 
[[nan  4.  3.  1.  2. nan]
 [ 5.  5.  4. nan  3.  3.]
 [ 4. nan  5.  3.  2. nan]
 [nan  3. nan  2.  1.  1.]
 [ 2.  1.  2.  4. nan  3.]]


### 02 ユーザ集合

In [3]:
U = np.arange(R.shape[0])
print('U = {}'.format(U))

U = [0 1 2 3 4]


### 03 アイテム集合

In [4]:
I = np.arange(R.shape[1])
print('I = {}'.format(I))

I = [0 1 2 3 4 5]


### 04 ユーザ数

In [5]:
print('|U| = {}'.format(U.size))

|U| = 5


### 05 アイテム数

In [6]:
print('|I| = {}'.format(I.size))

|I| = 6


### 06 評価値

In [7]:
u = 0
i = 1
print('r{}{} = {}'.format(u, i, R[u,i]))

r01 = 4.0


## 評価値行列の疎性

### 07 評価値行列の全要素数

In [8]:
print('Rの全要素数 = {}'.format(R.size))

Rの全要素数 = 30


### 08 観測されているか否かの判定

In [9]:
print('観測値 = \n{}'.format(~np.isnan(R)))

観測値 = 
[[False  True  True  True  True False]
 [ True  True  True False  True  True]
 [ True False  True  True  True False]
 [False  True False  True  True  True]
 [ True  True  True  True False  True]]


### 09 評価値行列の観測値数

In [10]:
print('|R| = {}'.format(np.count_nonzero(~np.isnan(R))))

|R| = 22


In [11]:
print('|R| = {}'.format(R[~np.isnan(R)].size))

|R| = 22


### 10 評価値行列の疎性

In [12]:
sparsity = 1 - np.count_nonzero(~np.isnan(R)) / (I.size * U.size)
print('sparsity = {:.3f}'.format(sparsity))

sparsity = 0.267


## 評価済みアイテム集合

### 11 ユーザuが評価済みのアイテム集合

In [13]:
u = 0
print('I{} = {}'.format(u, I[~np.isnan(R)[u,:]]))

I0 = [1 2 3 4]


### 12 各ユーザの評価済みアイテム集合

In [14]:
Iu = []
for u in U:
    Iu.append(I[~np.isnan(R)[u,:]])
print('Iu = ')
pprint.pprint(Iu)

Iu = 
[array([1, 2, 3, 4]),
 array([0, 1, 2, 4, 5]),
 array([0, 2, 3, 4]),
 array([1, 3, 4, 5]),
 array([0, 1, 2, 3, 5])]


In [15]:
Iu = [I[~np.isnan(R)[u,:]] for u in U]
print('Iu = ')
pprint.pprint(Iu)

Iu = 
[array([1, 2, 3, 4]),
 array([0, 1, 2, 4, 5]),
 array([0, 2, 3, 4]),
 array([1, 3, 4, 5]),
 array([0, 1, 2, 3, 5])]


### 13 ユーザuとユーザvの共通の評価済みアイテム集合

In [16]:
u = 0
v = 1
Iuv = np.intersect1d(Iu[u], Iu[v])
print('I{}{} = {}'.format(u, v, Iuv))

I01 = [1 2 4]


### 14 アイテムiを評価済みのユーザ集合

In [17]:
i = 0
print('U{} = {}'.format(i, U[~np.isnan(R)[:,i]]))

U0 = [1 2 4]


### 15 各アイテムの評価済みユーザ集合

In [18]:
Ui = []
for i in I:
    Ui.append(U[~np.isnan(R)[:,i]])
print('Ui = ')
pprint.pprint(Ui)

Ui = 
[array([1, 2, 4]),
 array([0, 1, 3, 4]),
 array([0, 1, 2, 4]),
 array([0, 2, 3, 4]),
 array([0, 1, 2, 3]),
 array([1, 3, 4])]


In [19]:
Ui = [U[~np.isnan(R)[:,i]] for i in I]
print('Ui = ')
pprint.pprint(Ui)

Ui = 
[array([1, 2, 4]),
 array([0, 1, 3, 4]),
 array([0, 1, 2, 4]),
 array([0, 2, 3, 4]),
 array([0, 1, 2, 3]),
 array([1, 3, 4])]


### 16 アイテムiとアイテムjの両方を評価済みのユーザ集合

In [20]:
i = 0
j = 4
Uij = np.intersect1d(Ui[i], Ui[j])
print('U{}{} = {}'.format(i, j, Uij))

U04 = [1 2]


## 平均中心化評価値行列

### 17 評価値行列全体の平均評価値

In [21]:
print('R全体の平均評価値 = {:.3f}'.format(np.nanmean(R)))

R全体の平均評価値 = 2.864


### 18 各アイテムの平均評価値

In [22]:
ri_mean = np.nanmean(R, axis=0)
print('ri_mean = {}'.format(ri_mean))

ri_mean = [3.667 3.25  3.5   2.5   2.    2.333]


### 19 各ユーザの平均評価値

In [23]:
ru_mean = np.nanmean(R, axis=1)
print('ru_mean = {}'.format(ru_mean))

ru_mean = [2.5  4.   3.5  1.75 2.4 ]


In [24]:
ru_mean = np.array([np.sum([R[u,i] for i in Iu[u]]) / Iu[u].size for u in U])
print('ru_mean = {}'.format(ru_mean))

ru_mean = [2.5  4.   3.5  1.75 2.4 ]


### 20 評価値ベクトルの形状変換

In [25]:
print('ru_mean = \n{}'.format(ru_mean.reshape((ru_mean.size, 1))))

ru_mean = 
[[2.5 ]
 [4.  ]
 [3.5 ]
 [1.75]
 [2.4 ]]


### 21 平均中心化評価値行列

In [26]:
R2 = R - ru_mean.reshape((ru_mean.size, 1))
print('R\' = \n{}'.format(R2))

R' = 
[[  nan  1.5   0.5  -1.5  -0.5    nan]
 [ 1.    1.    0.     nan -1.   -1.  ]
 [ 0.5    nan  1.5  -0.5  -1.5    nan]
 [  nan  1.25   nan  0.25 -0.75 -0.75]
 [-0.4  -1.4  -0.4   1.6    nan  0.6 ]]
