# ユーザベース協調フィルタリング

In [2]:
import numpy as np
np.set_printoptions(precision=3)

# 近傍ユーザ数
K_USERS = 3
# 閾値
THETA = 0

R = np.array([
              [np.nan, 4,      3,      1,      2,      np.nan],
              [5,      5,      4,      np.nan, 3,      3     ],
              [4,      np.nan, 5,      3,      2,      np.nan],
              [np.nan, 3,      np.nan, 2,      1,      1     ],
              [2,      1,      2,      4,      np.nan, 3     ],
])
U = np.arange(R.shape[0])
I = np.arange(R.shape[1])
Ui = [U[~np.isnan(R)[:,i]] for i in I]
Iu = [I[~np.isnan(R)[u,:]] for u in U]
ru_mean = np.nanmean(R, axis=1)
R2 = R - ru_mean.reshape((ru_mean.size, 1))

In [10]:
U

array([0, 1, 2, 3, 4])

In [15]:
I

array([0, 1, 2, 3, 4, 5])

In [16]:
Ui

[array([1, 2, 4]),
 array([0, 1, 3, 4]),
 array([0, 1, 2, 4]),
 array([0, 2, 3, 4]),
 array([0, 1, 2, 3]),
 array([1, 3, 4])]

In [17]:
Iu

[array([1, 2, 3, 4]),
 array([0, 1, 2, 4, 5]),
 array([0, 2, 3, 4]),
 array([1, 3, 4, 5]),
 array([0, 1, 2, 3, 5])]

In [18]:
ru_mean

array([2.5 , 4.  , 3.5 , 1.75, 2.4 ])

In [21]:
R2

array([[  nan,  1.5 ,  0.5 , -1.5 , -0.5 ,   nan],
       [ 1.  ,  1.  ,  0.  ,   nan, -1.  , -1.  ],
       [ 0.5 ,   nan,  1.5 , -0.5 , -1.5 ,   nan],
       [  nan,  1.25,   nan,  0.25, -0.75, -0.75],
       [-0.4 , -1.4 , -0.4 ,  1.6 ,   nan,  0.6 ]])

## ピアソンの相関係数

In [47]:
u = 0
v = 1
Iuv = np.intersect1d(Iu[u], Iu[v])
num = np.sum([R2[u, i]*R2[v, i] for i in Iuv])
num

2.0

In [52]:
den_u = round(np.sqrt(np.sum([R2[u, i]**2 for i in Iuv])), 3)
den_v = round(np.sqrt(np.sum([R2[v, i]**2 for i in Iuv])), 3)
print(den_u, den_v)

1.658 1.414


In [54]:
prsn = round(num / (den_u * den_v), 3)
prsn

0.853

## User-User類似度行列

In [56]:
S = np.zeros((U.size, U.size))
for u in U:
    for v in U:
        Iuv = np.intersect1d(Iu[u], Iu[v])
        num = np.sum([R2[u,i]*R2[v,i] for i in Iuv])
        den_u = round(np.sqrt(np.sum([R2[u, i]**2 for i in Iuv])), 3)
        den_v = round(np.sqrt(np.sum([R2[v, i]**2 for i in Iuv])), 3)
        prsn = round(num / (den_u * den_v), 3)
        S[u,v] = prsn
S
        
        

array([[ 1.   ,  0.853,  0.623,  0.582, -0.997],
       [ 0.853,  1.   ,  0.649,  0.969, -0.853],
       [ 0.623,  0.649,  1.   ,  0.8  , -0.569],
       [ 0.582,  0.969,  0.8  ,  1.   , -0.551],
       [-0.997, -0.853, -0.569, -0.551,  1.   ]])

## 類似ユーザの選定

In [57]:
Uu = {u: {v: S[u,v] for v in U if u!=v} for u in U}
Uu

{0: {1: 0.853, 2: 0.623, 3: 0.582, 4: -0.997},
 1: {0: 0.853, 2: 0.649, 3: 0.969, 4: -0.853},
 2: {0: 0.623, 1: 0.649, 3: 0.8, 4: -0.569},
 3: {0: 0.582, 1: 0.969, 2: 0.8, 4: -0.551},
 4: {0: -0.997, 1: -0.853, 2: -0.569, 3: -0.551}}

In [67]:
Uu ={u: dict(sorted(Uu[u].items(), key=lambda i:i[1], reverse=True)[:K_USERS]) for u in U}
Uu 

{0: {1: 0.853, 2: 0.623, 3: 0.582},
 1: {3: 0.969, 0: 0.853, 2: 0.649},
 2: {3: 0.8, 1: 0.649, 0: 0.623},
 3: {1: 0.969, 2: 0.8, 0: 0.582},
 4: {3: -0.551, 2: -0.569, 1: -0.853}}