In [1]:
import numpy as np
np.set_printoptions(precision=3)

# 近傍アイテム数
K_ITEMS = 3
# 閾値
THETA = 0

R = np.array([
              [np.nan, 4,      3,      1,      2,      np.nan],
              [5,      5,      4,      np.nan, 3,      3     ],
              [4,      np.nan, 5,      3,      2,      np.nan],
              [np.nan, 3,      np.nan, 2,      1,      1     ],
              [2,      1,      2,      4,      np.nan, 3     ],
])
U = np.arange(R.shape[0])
I = np.arange(R.shape[1])
Ui = [U[~np.isnan(R)[:,i]] for i in I]
Iu = [I[~np.isnan(R)[u,:]] for u in U]
ru_mean = np.nanmean(R, axis=1)
R2 = R - ru_mean.reshape((ru_mean.size, 1))


In [2]:
Ui[0]

array([1, 2, 4])

In [3]:
Ui[4]

array([0, 1, 2, 3])

## コサイン類似度

In [8]:
i = 0
j = 4
Uij = np.intersect1d(Ui[i], Ui[j])
Uij

array([1, 2])

In [22]:
num = np.sum([R[u,i]*R[u,j] for u in Uij])
den_i = np.sqrt(np.sum([R[u,i]**2 for u in Uij]))
den_j = np.sqrt(np.sum([R[u,j]**2 for u in Uij]))

In [24]:
cosine = round(num / (den_i * den_j),3)
cosine

0.996

## 調整コサイン類似度 & アイテムxアイテム類似度行列

In [26]:
num = np.sum([R2[u,i]*R2[u,j] for u in Uij])
den_i = np.sqrt(np.sum([R2[u,i]**2 for u in Uij]))
den_j = np.sqrt(np.sum([R2[u,j]**2 for u in Uij]))

In [27]:
cosine = round(num / (den_i * den_j),3)
cosine

-0.868

In [34]:
S = np.zeros((I.size, I.size))
for i in I:
    for j in I:
        Uij = np.intersect1d(Ui[i], Ui[j])
        num = np.sum([R2[u,i]*R2[u,j] for u in Uij])
        den_i = np.sqrt(np.sum([R2[u,i]**2 for u in Uij]))
        den_j = np.sqrt(np.sum([R2[u,j]**2 for u in Uij]))
        # 調整コサイン類似度
        cosine = round(num / (den_i * den_j),3)
        S[i,j] = cosine
S

array([[ 1.   ,  0.842,  0.494, -0.829, -0.868, -0.987],
       [ 0.842,  1.   ,  0.896, -0.788, -0.91 , -0.942],
       [ 0.494,  0.896,  1.   , -0.583, -0.845, -0.514],
       [-0.829, -0.788, -0.583,  1.   ,  0.469,  0.497],
       [-0.868, -0.91 , -0.845,  0.469,  1.   ,  1.   ],
       [-0.987, -0.942, -0.514,  0.497,  1.   ,  1.   ]])

## 類似アイテムの選定

In [35]:
# アイテム-アイテム類似度行列から対象アイテムを除外した辞書
Ii = {i: {j: S[i,j] for j in I if i != j} for i in I}
Ii

{0: {1: 0.842, 2: 0.494, 3: -0.829, 4: -0.868, 5: -0.987},
 1: {0: 0.842, 2: 0.896, 3: -0.788, 4: -0.91, 5: -0.942},
 2: {0: 0.494, 1: 0.896, 3: -0.583, 4: -0.845, 5: -0.514},
 3: {0: -0.829, 1: -0.788, 2: -0.583, 4: 0.469, 5: 0.497},
 4: {0: -0.868, 1: -0.91, 2: -0.845, 3: 0.469, 5: 1.0},
 5: {0: -0.987, 1: -0.942, 2: -0.514, 3: 0.497, 4: 1.0}}

In [42]:
Ii = {i: dict(sorted(Ii[i].items(), key=lambda x:x[1], reverse=True)[:K_ITEMS]) for i in I}
Ii

{0: {1: 0.842, 2: 0.494, 3: -0.829},
 1: {2: 0.896, 0: 0.842, 3: -0.788},
 2: {1: 0.896, 0: 0.494, 5: -0.514},
 3: {5: 0.497, 4: 0.469, 2: -0.583},
 4: {5: 1.0, 3: 0.469, 2: -0.845},
 5: {4: 1.0, 3: 0.497, 2: -0.514}}

In [49]:
Ii = {i: np.array([k for k, j in Ii[i].items() if j > THETA]) for i in I}
Ii

{0: array([1, 2]),
 1: array([2, 0]),
 2: array([1, 0]),
 3: array([5, 4]),
 4: array([5, 3]),
 5: array([4, 3])}

## 嗜好予測

In [50]:
Iu

[array([1, 2, 3, 4]),
 array([0, 1, 2, 4, 5]),
 array([0, 2, 3, 4]),
 array([1, 3, 4, 5]),
 array([0, 1, 2, 3, 5])]

In [51]:
Ui

[array([1, 2, 4]),
 array([0, 1, 3, 4]),
 array([0, 1, 2, 4]),
 array([0, 2, 3, 4]),
 array([0, 1, 2, 3]),
 array([1, 3, 4])]

In [75]:
u = 1
i = 4
Iiu = np.intersect1d(Ii[i], Iu[u])
Iiu

array([5])

In [78]:
np.sum([S[i,j]*R2[0,j] for j in Iiu])

nan

In [77]:
rui_pred = round((ru_mean[u] + np.sum([S[i,j]*R2[0,j] for j in Iiu]) / np.sum(np.abs([S[i,j] for j in Iiu]))), 3)
rui_pred

nan

In [79]:
R3 = R.copy()
for u in U:
    for i in I:
        if ~np.isnan(R3[u,i]):
            continue
        Iiu = np.intersect1d(Ii[i], Iu[u])
        pri
        rui_pred = round((ru_mean[u] + np.sum([S[i,j]*R2[u,j] for j in Iiu]) / np.sum(np.abs([S[i,j] for j in Iiu]))), 3)
        R3[u,i] = rui_pred
R3
        

0 [1 2]
5 [3 4]
3 [4 5]
1 [0 2]
5 [3 4]
0 [1]
2 [1]
4 [3 5]


array([[3.63 , 4.   , 3.   , 1.   , 2.   , 1.668],
       [5.   , 5.   , 4.   , 3.   , 3.   , 3.   ],
       [4.   , 4.516, 5.   , 3.   , 2.   , 2.332],
       [3.   , 3.   , 3.   , 2.   , 1.   , 1.   ],
       [2.   , 1.   , 2.   , 4.   , 3.319, 3.   ]])