# 評価値行列

In [26]:
import pprint
import numpy as np
np.set_printoptions(precision=3)

In [27]:
R = np.array([
              [np.nan, 4,      3,      1,      2,      np.nan],
              [5,      5,      4,      np.nan, 3,      3     ],
              [4,      np.nan, 5,      3,      2,      np.nan],
              [np.nan, 3,      np.nan, 2,      1,      1     ],
              [2,      1,      2,      4,      np.nan, 3     ],
])

In [28]:
U = np.arange(R.shape[0])  # ユーザー集合
I = np.arange(R.shape[1])   # アイテム集合

print(f"| U | = {U.size}")
print(f"| I | = {I.size}")

| U | = 5
| I | = 6


In [29]:
u, i = 0, 1
print(f"R_ui = {R[u][i]}")
print(f"Rの全要素数 = {R.size}")

R_ui = 4.0
Rの全要素数 = 30


In [30]:
print(f"観測値 = \n {~np.isnan(R)}")
print(f"| R | = {np.count_nonzero(~np.isnan(R))}")

観測値 = 
 [[False  True  True  True  True False]
 [ True  True  True False  True  True]
 [ True False  True  True  True False]
 [False  True False  True  True  True]
 [ True  True  True  True False  True]]
| R | = 22


In [31]:
# 同様の形状のbool値のテンソルでインデキシングするとflatにしてtrueの部分のみ返される.
sample = np.array([[1, 2], [3, 4]])
print(sample[np.array([[True, False], [False, True]])])

[1 4]


In [32]:
sparsity = 1 - np.count_nonzero(~np.isnan(R)) / R.size
print(f"sparsity = {sparsity}")

sparsity = 0.2666666666666667


In [33]:
u = 0
print(f"I_{u} = {I[~np.isnan(R[u])]}")

I_0 = [1 2 3 4]


In [34]:
Iu = [I[~np.isnan(R[u])] for u in U]
print("Iu = ")
pprint.pprint(Iu)

Iu = 
[array([1, 2, 3, 4]),
 array([0, 1, 2, 4, 5]),
 array([0, 2, 3, 4]),
 array([1, 3, 4, 5]),
 array([0, 1, 2, 3, 5])]


In [35]:
u, v = 0, 1
print(f"Iu & Iv = {np.intersect1d(Iu[u], Iu[v])}")

Iu & Iv = [1 2 4]


In [39]:
Ui = [U[~np.isnan(R[:, i])] for i in I]
print("Ui = ")
pprint.pprint(Ui)

Ui = 
[array([1, 2, 4]),
 array([0, 1, 3, 4]),
 array([0, 1, 2, 4]),
 array([0, 2, 3, 4]),
 array([0, 1, 2, 3]),
 array([1, 3, 4])]


In [40]:
i, j = 0, 1
print(f"Ui & Uj = {np.intersect1d(Ui[i], Ui[j])}")

Ui & Uj = [1 4]


In [43]:
# 平均中心化評価値行列
print(f"Rの平均評価値 = {np.nanmean(R)}")

Rの平均評価値 = 2.8636363636363638


In [49]:
ri_mean = [np.nanmean(R[:, i]) for i in I]
ri_mean = np.nanmean(R, axis=0)
print(ri_mean)

[3.667 3.25  3.5   2.5   2.    2.333]


In [52]:
ru_mean = [np.nanmean(R[u]) for u in U]
ru_mean = np.nanmean(R, axis=1)
print(ru_mean)

[2.5  4.   3.5  1.75 2.4 ]


In [54]:
ru_mean = ru_mean.reshape(5, 1)
print(ru_mean)

[[2.5 ]
 [4.  ]
 [3.5 ]
 [1.75]
 [2.4 ]]


In [55]:
R2 = R - ru_mean
print(f"R' = \n{R2}")

R' = 
[[  nan  1.5   0.5  -1.5  -0.5    nan]
 [ 1.    1.    0.     nan -1.   -1.  ]
 [ 0.5    nan  1.5  -0.5  -1.5    nan]
 [  nan  1.25   nan  0.25 -0.75 -0.75]
 [-0.4  -1.4  -0.4   1.6    nan  0.6 ]]
