## sklearn.metrics.roc_curve 산출 원리

In [99]:
import numpy as np
from sklearn import metrics
y = np.array([0, 0, 1, 1])  ## y (실제 값)
scores = np.array([0.1, 0.4, 0.35, 0.8]) ## y_pred (모델이 예측한 값)

In [100]:
pos_label # positive classs는 1로 봤음

1

#### 사이킷런의 roc_curve 함수 이용

In [101]:
## reference : [sklearn.metrics.roc_curve] https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_curve.html

In [102]:
skl_fpr, skl_tpr, skl_thresholds = metrics.roc_curve(y, scores, pos_label=1)

In [103]:
skl_fpr # 사이릿런 roc_curve의 fpr

array([0. , 0. , 0.5, 0.5, 1. ])

In [104]:
skl_tpr # 사이릿런 roc_curve의 tpr

array([0. , 0.5, 0.5, 1. , 1. ])

In [105]:
skl_thresholds # 사이릿런 roc_curve의 thresholds

array([1.8 , 0.8 , 0.4 , 0.35, 0.1 ])

In [106]:
my_thresholds = np.append(scores, np.array(max(scores)+1))

In [107]:
my_thresholds = -np.sort(-my_thresholds)

#### 사이킷런의 roc_curve 함수 구현

In [108]:
my_fpr = []
my_tpr = []

# threshold 구현
my_thresholds = np.append(scores, np.array(max(scores)+1))
my_thresholds = -np.sort(-my_thresholds)

# tpr, fpr 계산
for i in range(len(thresholds)):
    temp = []
    for score in scores:
        if(score >= thresholds[i]):
            temp.append(1)
        else:
            temp.append(0)
    print(thresholds[i], "/ score: ", y_pred, "/ temp:", temp)
    tp = fp = tn = fn = 0
    for j in range(len(y)):
        if((temp[j] == y[j]) & (temp[j] == pos_label)): # 모델 1, 실제 1
            tp += 1
        elif((temp[j] == y[j]) & (temp[j] != pos_label)): # 모델 0, 실제 0
            tn += 1
        elif((temp[j] != y[j]) & (temp[j] == pos_label)): # 모델 1, 실제 0
            fp += 1
            #print(temp[j], y[j], "fp")
        elif((temp[j] != y[j]) & (temp[j] != pos_label)): # 모델 0, 실제 1
           # print(temp[j], y[j], "fn")
            fn += 1
    print("tp: ", tp, ", fp: ", fp, ", tn: ", tn, ", fn:", fn)
    fpr = (1 - (tn / (tn+fp))) if (tn+fp) != 0 else 1
    tpr = (tp / (tp+fn)) if (tp+fn) != 0 else 0
    print("tpr: ", tpr, "fpr: ", fpr)
    print("="*20)
    my_fpr.append(fpr)
    my_tpr.append(tpr)


my_fpr = np.array(my_fpr)
my_tpr = np.array(my_tpr)

1.8 / score:  [0.1  0.4  0.35 0.8 ] / temp: [0, 0, 0, 0]
tp:  0 , fp:  0 , tn:  2 , fn: 2
tpr:  0.0 fpr:  0.0
0.8 / score:  [0.1  0.4  0.35 0.8 ] / temp: [0, 0, 0, 1]
tp:  1 , fp:  0 , tn:  2 , fn: 1
tpr:  0.5 fpr:  0.0
0.4 / score:  [0.1  0.4  0.35 0.8 ] / temp: [0, 1, 0, 1]
tp:  1 , fp:  1 , tn:  1 , fn: 1
tpr:  0.5 fpr:  0.5
0.35 / score:  [0.1  0.4  0.35 0.8 ] / temp: [0, 1, 1, 1]
tp:  2 , fp:  1 , tn:  1 , fn: 0
tpr:  1.0 fpr:  0.5
0.1 / score:  [0.1  0.4  0.35 0.8 ] / temp: [1, 1, 1, 1]
tp:  2 , fp:  2 , tn:  0 , fn: 0
tpr:  1.0 fpr:  1.0


In [109]:
my_fpr

array([0. , 0. , 0.5, 0.5, 1. ])

In [110]:
my_tpr

array([0. , 0.5, 0.5, 1. , 1. ])

In [111]:
my_thresholds

array([1.8 , 0.8 , 0.4 , 0.35, 0.1 ])

In [112]:
my_fpr == skl_fpr

array([ True,  True,  True,  True,  True])

In [113]:
my_tpr == skl_tpr

array([ True,  True,  True,  True,  True])

In [114]:
my_thresholds == skl_thresholds

array([ True,  True,  True,  True,  True])