In [1]:
import numpy as np

from typing import List

# 예측 오차 지표

In [2]:
r = [0, 1, 2, 3, 4]
r_hat = [0.1, 1.1, 2.1, 3.1, 4.1]

In [3]:
from sklearn.metrics import mean_absolute_error

print(mean_absolute_error(r, r_hat))

0.09999999999999998


In [4]:
from sklearn.metrics import mean_squared_error

print(mean_squared_error(r, r_hat))

0.009999999999999995


In [5]:
from sklearn.metrics import mean_squared_error

print(np.sqrt(mean_squared_error(r, r_hat)))

0.09999999999999998


# 집합 평가 지표

In [6]:
pred_item = [1, 2, 3, 4, 5]
true_item = [2, 4, 6, 8]

def precision_at_k(true_item: List[int], pred_item: List[int], k: int) -> float:
    if k == 0:
        return 0.0

    p_at_k = (len(set(true_item) & set(pred_item[:k]))) / k
    return p_at_k

print(precision_at_k(true_item, pred_item, 3))

0.3333333333333333


In [7]:
def recall_at_k(true_item: List[int], pred_item: List[int], k: int) -> float:
    if len(true_item) == 0 or k == 0:
        return 0.0

    r_at_k = (len(set(true_item) & set(pred_item[:k]))) / len(true_item)
    return r_at_k

print(recall_at_k(true_item, pred_item, 3))

0.25


In [8]:
def f1_at_k(true_item: List[int], pred_item: List[int], k: int) -> float:
    precision = precision_at_k(true_item, pred_item, k)
    recall = recall_at_k(true_item, pred_item, k)

    if precision + recall == 0.0:
        return 0.0

    return 2 * precision * recall / (precision + recall)

print(f1_at_k(true_item, pred_item, 3))

0.28571428571428575


# 순위 평가 지표

In [9]:
def rr_at_k(user_relevance: List[int], k: int) -> float:
    nonzero_indices = np.asarray(user_relevance).nonzero()[0]
    if nonzero_indices.size > 0 and nonzero_indices[0] + 1 <= k:
        return 1.0 / (nonzero_indices[0] + 1.0)
    return 0.0

print(rr_at_k([0, 1, 0], 2))

def mrr_at_k(users_relevance: List[List[int]], k: int) -> float:
    return float(
        np.mean(
            [rr_at_k(user_relevance, k) for user_relevance in users_relevance]
        )
    )

print(mrr_at_k([[1, 0, 0], [0, 1, 0], [0, 0, 1]], 2))

0.5
0.5


In [10]:
def ap_at_k(user_relevance: List[int], k: int) -> float:
    if sum(user_relevance[:k]) == 0:
        return 0.0
    nonzero_indices = np.asarray(user_relevance[:k]).nonzero()[0]
    return sum(
        [sum(user_relevance[: idx + 1]) / (idx + 1) for idx in nonzero_indices]
    ) / sum(user_relevance[:k])

print(ap_at_k([0, 1, 0, 1, 0], 5))

def map_at_k(users_relevance: List[List[int]], k: int) -> float:
    return float(
        np.mean(
            [ap_at_k(user_relevance, k) for user_relevance in users_relevance]
        )
    )

ls = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
print(map_at_k(ls, 3))

0.5
0.611111111111111


In [11]:
def dcg_at_k(user_relevance: List[int], k: int) -> float:
    user_relevance = np.asfarray(user_relevance)[:k]
    if user_relevance.size == 0:
        return 0.0
    return user_relevance[0] + np.sum(
        user_relevance[1:] / np.log2(np.arange(2, user_relevance.size + 1))
    )

def ndcg_at_k(user_relevance: List[int], k: int) -> float:
    dcg_max = dcg_at_k(sorted(user_relevance, reverse=True), k)
    if not dcg_max:
        return 0.0
    return dcg_at_k(user_relevance, k) / dcg_max

print(ndcg_at_k([0, 2, 0, 1, 0], 5))

0.8333333333333334
