In [2]:
import numpy as np

# 回帰における評価指標

## RMSE (Root Mean Squared Error: 平均平方二乗誤差)

In [8]:
from sklearn.metrics import mean_squared_error

y_true = [1.0, 1.5, 2.0, 1.2, 1.8]
y_pred = [0.8, 1.5, 1.8, 1.3, 3.0]

value = np.sqrt(mean_squared_error(y_true, y_pred))
print(value)

0.5531726674375732


## RMSLE (Root Mean Squared Logarithmic Error)

In [7]:
from sklearn.metrics import mean_squared_log_error

y_true = [1.0, 1.5, 2.0, 1.2, 1.8]
y_pred = [0.8, 1.5, 1.8, 1.3, 3.0]

value = np.sqrt(mean_squared_log_error(y_true, y_pred))
print(value)

0.17032547044118185


## MAE (Mean Absolute Error)

In [10]:
from sklearn.metrics import mean_absolute_error

y_true = [1.0, 1.5, 2.0, 1.2, 1.8]
y_pred = [0.8, 1.5, 1.8, 1.3, 3.0]

value = mean_absolute_error(y_true, y_pred)
print(value)

0.33999999999999997


## 決定係数 (R^2)

In [11]:
from sklearn.metrics import r2_score

y_true = [1.0, 1.5, 2.0, 1.2, 1.8]
y_pred = [0.8, 1.5, 1.8, 1.3, 3.0]

value = r2_score(y_true, y_pred)
print(value)

-1.2499999999999996


# 二値分類における評価指標 ~正例か負例かを予測値とする場合

## 混同行列 (confusion matrix)

In [19]:
from sklearn.metrics import confusion_matrix

y_true = [1, 0, 1, 1, 0, 1, 1, 0]
y_pred = [0, 0, 1, 1, 0, 0, 1, 1]

tp = np.sum((np.array(y_true) == 1) & (np.array(y_pred) == 1))
tn = np.sum((np.array(y_true) == 0) & (np.array(y_pred) == 0))
fp = np.sum((np.array(y_true) == 0) & (np.array(y_pred) == 1))
fn = np.sum((np.array(y_true) == 1) & (np.array(y_pred) == 0))

confusion_matrix1 = np.array([[tp, fp], [fn, tn]])
print(confusion_matrix1)

confusion_matrix2 = confusion_matrix(y_true, y_pred)
print(confusion_matrix2)

[[3 1]
 [2 2]]
[[2 1]
 [2 3]]


## accuracy (正答率)とerror rate (誤答率)

In [21]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_true, y_pred)
print(accuracy)

0.625


## precision (適合率)とrecall (再現率)

In [22]:
from sklearn.metrics import precision_score, recall_score

precision = precision_score(y_true, y_pred)
print(precision)

recall = recall_score(y_true, y_pred)
print(recall)

0.75
0.6


## F1-scoreとFβ-score

In [28]:
from sklearn.metrics import f1_score, fbeta_score

f1 = f1_score(y_true, y_pred)
print(f1)

fbeta = fbeta_score(y_true, y_pred, beta=2)
print(fbeta)

0.6666666666666665
0.625


## MCC (Matthews Correlation Coefficient)

In [29]:
from sklearn.metrics import matthews_corrcoef

mcc = matthews_corrcoef(y_true, y_pred)
print(mcc)

0.2581988897471611


# 二値分類における評価指標 ~正例である確率を予測値とする場合

## logloss (cross entropy)

In [30]:
from sklearn.metrics import log_loss

y_true = [1, 0, 1, 1, 0, 1]
y_pred = [0.1, 0.2, 0.8, 0.8, 0.1, 0.3]

logloss = log_loss(y_true, y_pred)
print(logloss)

0.7135581778200728


## AUC (Area Under the ROC Curve)

In [31]:
from sklearn.metrics import roc_auc_score

auc = roc_auc_score(y_true, y_pred)
print(auc)

0.8125


# 多クラス分類における評価指標

## multi-class accuracy

In [32]:
y_true = [0, 2, 1, 1, 0]
y_pred = [0, 1, 1, 2, 0]

accuracy = accuracy_score(y_true, y_pred)
print(accuracy)

0.6


## multi-class logloss

In [34]:
y_true = np.array([0, 2, 1, 1, 0])
y_pred = np.array([[0.8, 0.2, 0.0],
                   [0.1, 0.3, 0.6],
                   [0.1, 0.8, 0.1],
                   [0.2, 0.5, 0.3],
                   [1.0, 0.0, 0.0]])

logloss = log_loss(y_true, y_pred)
print(logloss)

0.3300519813908718


## mean-F1とmacro-F1とmicro-F1

In [47]:
from sklearn.preprocessing import MultiLabelBinarizer

binarizer = MultiLabelBinarizer()
y_true = [[1,2], [1], [1,2,3], [2,3], [3]]
y_true = binarizer.fit_transform(y_true)
y_pred = [[1,3], [2], [1,3], [3], [3]]
y_pred = binarizer.fit_transform(y_pred)

mean_f1 = np.mean([f1_score(y_true[i, :], y_pred[i, :]) for i in range(len(y_true))])
print(mean_f1)

n_class = 3
macro_f1 = np.mean([f1_score(y_true[:, c], y_pred[:, c]) for c in range(n_class)])
print(macro_f1)

micro_f1 = f1_score(y_true.reshape(-1), y_pred.reshape(-1))
print(micro_f1)

mean_f1 = f1_score(y_true, y_pred, average='samples')
print(mean_f1)
macro_f1 = f1_score(y_true, y_pred, average='macro')
print(macro_f1)
micro_f1 = f1_score(y_true, y_pred, average='micro')
print(micro_f1)

0.5933333333333334
0.5523809523809523
0.6250000000000001
0.5933333333333334
0.5523809523809523
0.6250000000000001


## quadratic weighted kappa

In [50]:
from sklearn.metrics import cohen_kappa_score

y_true = [1, 2, 3, 4, 3]
y_pred = [2, 2, 4, 4, 5]

cohen_kappa_score(y_true, y_pred, weights='quadratic')

0.6153846153846154

# レコメンデーションにおける評価指標

## MAP@K

In [52]:
#クラスは4種類とする
K = 3
# 各レコードの真の値
y_true = [[1, 2], [4], [1, 2, 3, 4]]
# 各レコードに対する予測値 
# K=3なので、通常は各レコードにそれぞれ3個まで順位をつけて予測する
y_pred = [[1, 2, 4], [1, 4, 3], [1, 2, 3]]


# 各レコードごとのaverage precisionを計算する関数
# 1/min(m,K) * Σp
def apk(y_i_true, y_i_pred):
    # y_predがK以下の長さで、要素がすべて異なることが必要
    assert (len(y_i_pred) <= K)
    assert (len(np.unique(y_i_pred)) == len(y_i_pred))

    sum_precision = 0.0
    num_hits = 0.0

    for i, p in enumerate(y_i_pred):
        if p in y_i_true: # 正解の場合のみ足す
            num_hits += 1
            precision = num_hits / (i + 1)
            sum_precision += precision

    return sum_precision / min(len(y_i_true), K)


# MAP@K を計算する関数
def mapk(y_true, y_pred):
    return np.mean([apk(y_i_true, y_i_pred) for y_i_true, y_i_pred in zip(y_true, y_pred)])

# APの計算結果
print(apk(y_true[0], y_pred[0])) # 1.0
print(apk(y_true[1], y_pred[1])) # 0.5
print(apk(y_true[1], y_pred[1])) # 1.0

# MAP@K
print(mapk(y_true, y_pred))
# 0.833...

1.0
0.5
0.5
0.8333333333333334
