# k近傍法を用いた内容ベース推薦

In [1]:
import numpy as np
Du = np.array([
               [5, 3, +1],
               [6, 2, +1],
               [4, 1, +1],
               [8, 5, -1],
               [2, 4, -1],
               [3, 6, -1],
               [7, 6, -1],
               [4, 2, np.nan],
               [5, 1, np.nan],
               [8, 6, np.nan],
               [3, 4, np.nan],
               [4, 7, np.nan],
               [4, 4, np.nan],
])
I = np.arange(Du.shape[0])
x = Du[:,:-1]
Iu = I[~np.isnan(Du[:,-1])]
Iup = I[Du[:,-1]==+1]
Iun = I[Du[:,-1]==-1]
Iu_not = np.setdiff1d(I, Iu)

In [2]:
Iup

array([0, 1, 2])

In [3]:
D = np.zeros((Iu_not.size, Iu.size))
for j in np.arange(len(Iu_not)):
    for i in np.arange(len(Iu)):
        x1 = x[Iu_not[j],0]-x[Iu[i],0]
        x2 = x[Iu_not[j],1]-x[Iu[i],1]
        D[j,i] = round(np.sqrt(np.sum((x1**2)+(x2**2))), 3)
D

array([[1.414, 2.   , 1.   , 5.   , 2.828, 4.123, 5.   ],
       [2.   , 1.414, 1.   , 5.   , 4.243, 5.385, 5.385],
       [4.243, 4.472, 6.403, 1.   , 6.325, 5.   , 1.   ],
       [2.236, 3.606, 3.162, 5.099, 1.   , 2.   , 4.472],
       [4.123, 5.385, 6.   , 4.472, 3.606, 1.414, 3.162],
       [1.414, 2.828, 3.   , 4.123, 2.   , 2.236, 3.606]])

In [4]:
near_dist = np.argsort(D[:,Iu])
near_dist

array([[2, 0, 1, 4, 5, 3, 6],
       [2, 1, 0, 4, 3, 5, 6],
       [3, 6, 0, 1, 5, 4, 2],
       [4, 5, 0, 2, 1, 6, 3],
       [5, 6, 4, 0, 3, 1, 2],
       [0, 4, 5, 1, 2, 6, 3]])

In [5]:
K_ITEMS = 3
k = np.arange(K_ITEMS)
near_dist = near_dist[:,k]
near_dist

array([[2, 0, 1],
       [2, 1, 0],
       [3, 6, 0],
       [4, 5, 0],
       [5, 6, 4],
       [0, 4, 5]])

In [6]:
nearItem = {Iu_not[i]: near_dist[i,:] for i in np.arange(len(Iu_not))}
nearItem

{7: array([2, 0, 1]),
 8: array([2, 1, 0]),
 9: array([3, 6, 0]),
 10: array([4, 5, 0]),
 11: array([5, 6, 4]),
 12: array([0, 4, 5])}

In [7]:
for i in Iu_not:
    array = nearItem[i]
    positive = array[np.isin(array, Iup)]
    print(array, Iun)
    negative = array[np.isin(array, Iun)]
    score = np.nan
    if positive.size > negative.size:
        score = +1
    else:
        score = -1
    print('I{}+ = {}' .format(i, positive))
    print('I{}- = {}' .format(i, negative))
    print('predict1({}) = {:.3f}' .format(i, score))

        

[2 0 1] [3 4 5 6]
I7+ = [2 0 1]
I7- = []
predict1(7) = 1.000
[2 1 0] [3 4 5 6]
I8+ = [2 1 0]
I8- = []
predict1(8) = 1.000
[3 6 0] [3 4 5 6]
I9+ = [0]
I9- = [3 6]
predict1(9) = -1.000
[4 5 0] [3 4 5 6]
I10+ = [0]
I10- = [4 5]
predict1(10) = -1.000
[5 6 4] [3 4 5 6]
I11+ = []
I11- = [5 6 4]
predict1(11) = -1.000
[0 4 5] [3 4 5 6]
I12+ = [0]
I12- = [4 5]
predict1(12) = -1.000


In [8]:
pred_score = {}
for i in Iu_not:
    array = nearItem[i]
    positive = array[np.isin(array, Iup)]
    negative = array[np.isin(array, Iun)]
    score = round(((positive.size - negative.size) / K_ITEMS), 3)
    
    pred_score[i] = score
pred_score


{7: 1.0, 8: 1.0, 9: -0.333, 10: -0.333, 11: -1.0, 12: -0.333}

In [9]:
# 上位K件
TOP_K = 3
# しきい値
THETA = 0
rec_list = dict(sorted(pred_score.items(), key=lambda x:x[1], reverse=True)[:4])
rec_list

{7: 1.0, 8: 1.0, 9: -0.333, 10: -0.333}

In [10]:
keys = [k for k, t in rec_list.items() if t > THETA]
rec_list = {keys[i]: rec_list[keys[i]] for i in np.arange(len(keys))}
rec_list

{7: 1.0, 8: 1.0}

In [11]:
keys

[7, 8]

In [12]:
import sys; sys.path.insert(0, '..')
from util.data_loader import DataLoader
Data = DataLoader.load()

In [13]:
Data

array([[ 5.,  3.,  1.],
       [ 6.,  2.,  1.],
       [ 4.,  1.,  1.],
       [ 8.,  5., -1.],
       [ 2.,  4., -1.],
       [ 3.,  6., -1.],
       [ 7.,  6., -1.],
       [ 4.,  2., nan],
       [ 5.,  1., nan],
       [ 8.,  6., nan],
       [ 3.,  4., nan],
       [ 4.,  7., nan],
       [ 4.,  4., nan]])

In [14]:
from src.kNN import KNNRecommender
recommender = KNNRecommender()
rec_list = recommender.recommend(Data, 3, 3, 0)

TypeError: recommend() missing 1 required positional argument: 'THETA'