# 推薦順位に基づく正確性

In [1]:
import math
import numpy as np
np.set_printoptions(precision=3)

# 上位K件
TOP_K = 5
# 対数の底
ALPHA = 2

# テストデータ
R = np.array([
              [5, 4,      3, np.nan, 5, 4,      2,      2,      np.nan, np.nan],
              [3, 3,      3, 3,      2, np.nan, 4,      np.nan, 5,      np.nan],
              [4, np.nan, 3, 5,      4, 3,      np.nan, 3,      np.nan, np.nan],
])
U = np.arange(R.shape[0])
I = np.arange(R.shape[1])
Iu = [I[~np.isnan(R)[u,:]] for u in U]

# 推薦システムAによる推薦リスト
RA = np.array([
               [1,      np.nan, 3,      np.nan, 4,      2,      5,      np.nan, np.nan, np.nan],
               [4,      1,      np.nan, 3,      np.nan, np.nan, 5,      np.nan, 2,      np.nan],
               [np.nan, np.nan, 5,      3,      4,      2,      np.nan, 1,      np.nan, np.nan],
])

def confusion_matrix(u, RS, K):
    """
    ユーザu向け推薦リストRSの上位K件における混同行列の各値を返す。

    Parameters
    ----------
    u : int
        ユーザuのID
    RS : ndarray
        推薦リストRS
    K : int
        上位K件

    Returns
    -------
    int
        TP
    int
        FN
    int
        FP
    int
        TN
    """
    like = R[u,Iu[u]]>=4
    recommended = RS[u,Iu[u]]<=K
    TP = np.count_nonzero(np.logical_and(like, recommended))
    FN = np.count_nonzero(np.logical_and(like, ~recommended))
    FP = np.count_nonzero(np.logical_and(~like, recommended))
    TN = np.count_nonzero(np.logical_and(~like, ~recommended))
    return TP, FN, FP, TN

# 平均逆順位

In [2]:
u = 0
# 問１
like = R>=4
print('like = \n{}'.format(like))

like = 
[[ True  True False False  True  True False False False False]
 [False False False False False False  True False  True False]
 [ True False False  True  True False False False False False]]


In [3]:
# 問２
ku = np.array([np.nanmin(RA[u,:][like[u,:]]) for u in U])
print('ku = {}'.format(ku))

ku = [1. 2. 3.]


In [4]:
# 問３
MRR = np.sum(1/ku)/len(U)
print('MRR = {:.3f}'.format(MRR))

MRR = 0.611


# 平均適合率

In [5]:
# 各順位における適合率
precisions = []
for u in U:
    precisions_u = []
    for k in range(1, Iu[u].size+1):
        TP, FN, FP, TN = confusion_matrix(u, RA, k)
        precision_uk = TP / (TP+FP)
        precisions_u.append(precision_uk)
    precisions.append(precisions_u)
print('precisions = \n{}'.format(precisions))

precisions = 
[[1.0, 1.0, 0.6666666666666666, 0.75, 0.6, 0.6, 0.6], [0.0, 0.5, 0.3333333333333333, 0.25, 0.4, 0.4, 0.4], [0.0, 0.0, 0.3333333333333333, 0.5, 0.4, 0.4]]


In [6]:
# 問４
ranked_R = np.array([R[u,np.argsort(RA[u,:], axis=0)] for u in U])
print('ranked_R = \n{}'.format(ranked_R))

ranked_R = 
[[ 5.  4.  3.  5.  2.  4. nan  2. nan nan]
 [ 3.  5.  3.  3.  4.  3.  2. nan nan nan]
 [ 3.  3.  5.  4.  3.  4. nan nan nan nan]]


In [7]:
# 問５
ranked_like = ranked_R>=4
print('ranked_like = \n{}'.format(ranked_like))

ranked_like = 
[[ True  True False  True False  True False False False False]
 [False  True False False  True False False False False False]
 [False False  True  True False  True False False False False]]


In [8]:
# 問６
rel = np.array([list(map(int, ranked_like[u,:])) for u in U])
print('rel = \n{}'.format(rel))

rel = 
[[1 1 0 1 0 1 0 0 0 0]
 [0 1 0 0 1 0 0 0 0 0]
 [0 0 1 1 0 1 0 0 0 0]]


In [9]:
# 問７
APu = np.array([np.sum([rel[u,k]*precisions[u][k] for k in np.arange(TOP_K)])/np.sum([rel[u,i] for i in np.arange(TOP_K)]) for u in U])
print('APu = {}'.format(APu))

APu = [0.917 0.45  0.417]


In [10]:
# 問８
MAP = np.sum(APu)/APu.size
print('MAP = {:.3f}'.format(MAP))

MAP = 0.594


# DCG

In [38]:
Iu_rec = [I[~np.isnan(RA[u])] for u in U]
# 問９
DCGu = np.array([np.sum([R[u,Iu_rec[u]][k]/ np.max([1, math.log(RA[u,Iu_rec[u]][k], ALPHA)]) for k in range(TOP_K)]) for u in U])
print('DCGu = {}'.format(DCGu))

DCGu = [14.254 13.115 12.447]


In [53]:
# 問10
RI = np.argsort(np.argsort(-R))+1
print('RI = \n{}'.format(RI))

RI = 
[[ 1  3  5  8  2  4  6  7  9 10]
 [ 3  4  5  6  7  8  2  9  1 10]
 [ 2  7  4  1  3  5  8  6  9 10]]


In [61]:
# 問11
Iu_recI = np.array([I[RI[u]<= TOP_K] for u in U])
print('Iu_recI = \n{}'.format(Iu_recI))

Iu_recI = 
[[0 1 2 4 5]
 [0 1 2 6 8]
 [0 2 3 4 5]]


In [88]:
#　問１２
IDCGu = np.array([np.sum([R[u,Iu_recI[u]][np.argsort(-R[u,Iu_recI[u]])][k]/np.max([1, math.log(k+1, ALPHA)]) for k in range(TOP_K)]) for u in U])
print('IDCGu = {}'.format(IDCGu))

IDCGu = [15.816 13.685 14.316]


In [90]:
# 問１３
nDCGu = DCGu/IDCGu
print('nDCGu = {}'.format(nDCGu))

nDCGu = [0.901 0.958 0.869]


In [92]:
# 問14
nDCG = np.sum(nDCGu)/nDCGu.size
print('nDCG = {:.3f}'.format(nDCG))

nDCG = 0.910
