<a href="https://colab.research.google.com/github/tnaka78/recsys-python/blob/main/recsys_python_chap02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# recsys-python 第2章 評価値行列
https://recsyslab.github.io/recsys-python/ja/chap02.html

In [1]:
import pprint
import numpy as np
np.set_printoptions(precision=3)

## 評価値行列

In [3]:
R = np.array(
  [[np.nan,  4.,  3.,  1.,  2., np.nan],
   [ 5.,  5.,  4., np.nan,  3.,  3.],
   [ 4., np.nan,  5.,  3.,  2., np.nan],
   [np.nan,  3., np.nan,  2.,  1.,  1.],
   [ 2.,  1.,  2.,  4., np.nan,  3.]]
)
print('R = \n{}'.format(R))

R = 
[[nan  4.  3.  1.  2. nan]
 [ 5.  5.  4. nan  3.  3.]
 [ 4. nan  5.  3.  2. nan]
 [nan  3. nan  2.  1.  1.]
 [ 2.  1.  2.  4. nan  3.]]


In [4]:
U = np.arange(R.shape[0])
print('U = {}'.format(U))

U = [0 1 2 3 4]


In [5]:
I = np.arange(R.shape[1])
print('I = {}'.format(I))

I = [0 1 2 3 4 5]


In [6]:
print('|U| = {}'.format(U.size))

|U| = 5


In [7]:
print('|I| = {}'.format(I.size))

|I| = 6


In [8]:
u = 0
i = 1
print('r{}{} = {}'.format(u, i, R[u, i]))

r01 = 4.0


## 評価値行列の疎性

In [9]:
print('Rの全要素数 = {}'.format(R.size))

Rの全要素数 = 30


In [11]:
print('観測値 = \n{}'.format(~np.isnan(R)))

観測値 = 
[[False  True  True  True  True False]
 [ True  True  True False  True  True]
 [ True False  True  True  True False]
 [False  True False  True  True  True]
 [ True  True  True  True False  True]]


In [17]:
print('|R| = {}'.format(np.count_nonzero(~np.isnan(R))))
print('|R| = {}'.format(R[~np.isnan(R)].size))  # 別解1
print('|R| = {}'.format(np.sum(~np.isnan(R))))  # 別解2

|R| = 22
|R| = 22
|R| = 22


In [19]:
sparsity = 1.0 - np.count_nonzero(~np.isnan(R)) / R.size
print('sparsity = {:.3f}'.format(sparsity))

sparsity = 0.267


## 評価済みアイテム集合

In [21]:
u = 0
print('I{} = {}'.format(u, I[~np.isnan(R[u])]))

I0 = [1 2 3 4]


In [22]:
Iu = [I[~np.isnan(R[u])] for u in U]
print('Iu = ')
pprint.pprint(Iu)

Iu = 
[array([1, 2, 3, 4]),
 array([0, 1, 2, 4, 5]),
 array([0, 2, 3, 4]),
 array([1, 3, 4, 5]),
 array([0, 1, 2, 3, 5])]


In [23]:
u = 0
v = 1
Iuv = np.intersect1d(Iu[u], Iu[v])
print('I{}{} = {}'.format(u, v, Iuv))

I01 = [1 2 4]


In [24]:
i = 0
print('U{} = {}'.format(i, U[~np.isnan(R[:, i])]))

U0 = [1 2 4]


In [25]:
Ui = [U[~np.isnan(R[:, i])] for i in I]
print('Ui = ')
pprint.pprint(Ui)

Ui = 
[array([1, 2, 4]),
 array([0, 1, 3, 4]),
 array([0, 1, 2, 4]),
 array([0, 2, 3, 4]),
 array([0, 1, 2, 3]),
 array([1, 3, 4])]


In [26]:
i = 0
j = 4
Uij = np.intersect1d(Ui[i], Ui[j])
print('U{}{} = {}'.format(i, j, Uij))

U04 = [1 2]


## 平均中心化評価値行列

In [27]:
print('R全体の平均評価値 = {:.3f}'.format(np.nanmean(R)))

R全体の平均評価値 = 2.864


In [28]:
ri_mean = np.nanmean(R, axis=0)
print('ri_mean = {}'.format(ri_mean))

ri_mean = [3.667 3.25  3.5   2.5   2.    2.333]


In [40]:
ru_mean = np.nanmean(R, axis=1)
print('ru_mean = {}'.format(ru_mean))
# 別解
ru_mean = np.array([np.sum(R[u, Iu[u]]) / Iu[u].size for u in U])
print('ru_mean = {}'.format(ru_mean))

ru_mean = [2.5  4.   3.5  1.75 2.4 ]
ru_mean = [2.5  4.   3.5  1.75 2.4 ]


In [41]:
print('ru_mean = \n{}'.format(ru_mean.reshape((U.size, 1))))

ru_mean = 
[[2.5 ]
 [4.  ]
 [3.5 ]
 [1.75]
 [2.4 ]]


In [43]:
R2 = R - ru_mean.reshape((U.size, 1))
print('R\' = \n{}'.format(R2))

R' = 
[[  nan  1.5   0.5  -1.5  -0.5    nan]
 [ 1.    1.    0.     nan -1.   -1.  ]
 [ 0.5    nan  1.5  -0.5  -1.5    nan]
 [  nan  1.25   nan  0.25 -0.75 -0.75]
 [-0.4  -1.4  -0.4   1.6    nan  0.6 ]]
