## 評価指標

In [1]:
import numpy as np

### RMSE

In [2]:
from sklearn.metrics import mean_squared_error

In [3]:
y_true = [1.0, 1.5, 2.0, 1.2, 1.8]
y_pred = [0.8, 1.5, 1.8, 1.3, 3.0]

rmse = np.sqrt(mean_squared_error(y_true, y_pred))
rmse

0.5531726674375732

### RMSLE

In [4]:
y_true = [100, 0, 400]
y_pred = [200, 10, 200]

rmsle = np.sqrt(mean_squared_error(np.log1p(y_true), np.log1p(y_pred)))
rmsle

1.4944905400842203

In [5]:
from sklearn.metrics import mean_squared_log_error

In [6]:
rmsle = np.sqrt(mean_squared_log_error(y_true, y_pred))
rmsle

1.4944905400842203

### 決定係数（R^2）

In [7]:
from sklearn.metrics import r2_score

In [8]:
y_true = [1.0, 1.5, 2.0, 1.2, 1.8]
y_pred = [0.8, 1.5, 1.8, 1.3, 3.0]

r2 = r2_score(y_true, y_pred)
r2

-1.2499999999999996

In [9]:
1 - mean_squared_error(y_true, y_pred) / mean_squared_error(y_true, [np.mean(y_true)] * 5 )

-1.2499999999999996

### 混同行列

In [10]:
from sklearn.metrics import confusion_matrix

In [11]:
y_true = [1, 0, 1, 1, 0, 1, 1, 0]
y_pred = [0, 0, 1, 1, 0, 0, 1, 1]

tp = np.sum((np.array(y_true) == 1) & (np.array(y_pred) == 1))
tn = np.sum((np.array(y_true) == 0) & (np.array(y_pred) == 0))
fp = np.sum((np.array(y_true) == 0) & (np.array(y_pred) == 1))
fn = np.sum((np.array(y_true) == 1) & (np.array(y_pred) == 0))

confusion_matrix1 = np.array([[tp, fp],
                              [fn, tn]])
confusion_matrix1

array([[3, 1],
       [2, 2]])

In [12]:
# scikit-learnのmetricsモジュールのconfusion_matrixでも作成できるが、混同行列の要素の配置が違うので注意が必要
confusion_matrix2 = confusion_matrix(y_true, y_pred)
confusion_matrix2

array([[2, 1],
       [2, 3]])

### precision, recall, f1score, mcc (matthews corr coef)

In [13]:
from sklearn.metrics import precision_score, recall_score, f1_score, fbeta_score, matthews_corrcoef

In [14]:
y_true = [1, 0, 1, 1, 0, 1, 1, 0]
y_pred = [0, 0, 1, 1, 0, 0, 1, 1]

print(precision_score(y_true, y_pred))
print(recall_score(y_true, y_pred))
print(f1_score(y_true, y_pred))
print(fbeta_score(y_true, y_pred, beta=2))
print(matthews_corrcoef(y_true, y_pred))

0.75
0.6
0.6666666666666665
0.625
0.2581988897471611


* fbeta_score: recallをどれだけ重視するか
* mcc: 1で完璧な予測、0でランダムな予測、-1で完全に反対の予測

### AUC

In [15]:
from sklearn.metrics import roc_auc_score, roc_curve

In [16]:
y_true = [1, 0, 1, 1, 0, 1, 1, 0]
y_pred = [0, 0, 1, 1, 0, 0, 1, 1]

roc_auc_score(y_true, y_pred)

0.6333333333333334

In [17]:
gini = 2 * roc_auc_score(y_true, y_pred) - 1
gini

0.26666666666666683

### multi-class logloss

In [18]:
from sklearn.metrics import log_loss

In [19]:
y_true = np.array([0, 2, 1, 2, 2])
y_pred = np.array([[0.68, 0.32, 0.00],
                   [0.00, 0.00, 1.00],
                   [0.60, 0.40, 0.00],
                   [0.00, 0.00, 1.00],
                   [0.28, 0.12, 0.60]])
logloss = log_loss(y_true, y_pred)
print(logloss)

0.3625557672904274


### mean-f1, macro-f1, micro-f1

In [20]:
from sklearn.metrics import f1_score

In [21]:
# マルチラベル分類の真の値・予測値は、評価指標の計算上はレコード×クラスの二値の行列とした方が扱いやすい
# 真の値 - [[1,2], [1], [1,2,3], [2,3], [3]]
y_true = np.array([[1, 1, 0],
                   [1, 0, 0],
                   [1, 1, 1],
                   [0, 1, 1],
                   [0, 0, 1]])

# 予測値 - [[1,3], [2], [1,3], [3], [3]]
y_pred = np.array([[1, 0, 1],
                   [0, 1, 0],
                   [1, 0, 1],
                   [0, 0, 1],
                   [0, 0, 1]])

In [22]:
mean_f1 = np.mean([f1_score(y_true[i, :], y_pred[i, :]) for i in range(y_true.shape[0])])
mean_f1

0.5933333333333334

In [23]:
macro_f1 = np.mean([f1_score(y_true[:, i], y_pred[:, i]) for i in range(y_true.shape[1])])
macro_f1

0.5523809523809523

In [24]:
micro_f1 = f1_score(y_true.flatten(), y_pred.flatten())
micro_f1

0.6250000000000001

In [25]:
print('mean_f1: ', f1_score(y_true, y_pred, average='samples'))
print('macro_f1: ', f1_score(y_true, y_pred, average='macro'))
print('micro_f1: ', f1_score(y_true, y_pred, average='micro'))

mean_f1:  0.5933333333333334
macro_f1:  0.5523809523809523
micro_f1:  0.6250000000000001


### quadratic weighted kappa
- マルチクラス分類で、クラス間に順序がある場合に使用される
$$
\begin{align*}
  \kappa =
  1 - \frac{\sum_{i,j}w_{i,j}O_{i,j}}{\sum_{i,j}w_{i,j}E_{i,j}}
\end{align*}
$$

* O_{i,j}: 真値i, 予測値jの件数
* E_{i,j}: 真値がiである割合 * 予測値がjである割合 * 全件
* w_{i,j}: (i - j)^2

In [26]:
from sklearn.metrics import confusion_matrix, cohen_kappa_score

In [27]:
def quadratic_weighted_kappa(c_matrix):
    numer = 0.0
    denom = 0.0

    for i in range(c_matrix.shape[0]):
        for j in range(c_matrix.shape[1]):
            wij = ((i - j) ** 2.0)
            oij = c_matrix[i, j]
            eij = c_matrix[i, :].sum() * c_matrix[:, j].sum() / c_matrix.sum()
            numer += wij * oij
            denom += wij * eij

    return 1.0 - numer / denom

In [28]:
y_true = [1, 2, 3, 4, 3]
y_pred = [2, 2, 4, 4, 5]

c_matrix = confusion_matrix(y_true, y_pred, labels=[1, 2, 3, 4, 5])

kappa = quadratic_weighted_kappa(c_matrix)
kappa

0.6153846153846154

In [29]:
cohen_kappa_score(y_true, y_pred, weights='quadratic')

0.6153846153846154