In [1]:
# chatgpt
import pandas as pd

# 假设你已经有一个DataFrame，其中包含label和prob列
# 例如：data = pd.DataFrame({'label': [0, 1, 0, 1], 'prob': [0.1, 0.8, 0.4, 0.6]})

def calculate_tpr_fpr(data, thresholds):
    # 对概率进行排序
    data = data.sort_values(by='prob', ascending=False)

    # 初始化结果列表
    tpr_list = []
    fpr_list = []

    # 计算每个阈值下的TPR和FPR
    for t in thresholds:
        data['pred'] = (data['prob'] >= t).astype(int)

        # 计算TP、FP、TN、FN
        TP = ((data['label'] == 1) & (data['pred'] == 1)).sum()
        FP = ((data['label'] == 0) & (data['pred'] == 1)).sum()
        TN = ((data['label'] == 0) & (data['pred'] == 0)).sum()
        FN = ((data['label'] == 1) & (data['pred'] == 0)).sum()

        # 计算TPR和FPR
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)

        # 将结果添加到列表中
        tpr_list.append(TPR)
        fpr_list.append(FPR)

    return tpr_list, fpr_list

# 示例
data = pd.DataFrame({'label': [0, 1, 0, 1], 'prob': [0.1, 0.8, 0.4, 0.6]})
thresholds = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]

tpr, fpr = calculate_tpr_fpr(data, thresholds)
print("TPR:", tpr)
print("FPR:", fpr)

TPR: [0.0, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
FPR: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0]


In [11]:
# new bing
import numpy as np
def roc_curve(y_true, y_prob, thresholds):
    fpr = []
    tpr = []
    for threshold in thresholds:
        y_pred = np.where(y_prob >= threshold, 1, 0) # assign 1 if prob >= threshold, else 0
        fp = np.sum((y_true == 0)&(y_pred == 1)) # false positives
        tp = np.sum((y_true == 1)&(y_pred == 1)) # true positives
        fn = np.sum((y_true == 1)&(y_pred == 0)) # false negatives
        tn = np.sum((y_true == 0)&(y_pred == 0)) # true negatives

        fpr.append(fp / (fp + tn)) # false positive rate
        tpr.append(tp / (tp + fn)) # true positive rate
    return [fpr, tpr]

data = pd.DataFrame({'label': [0, 1, 0, 1], 'prob': [0.1, 0.8, 0.4, 0.6]})
t = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]

fpr, tpr = roc_curve(data['label'],data['prob'],t)

print(tpr)
print(fpr)

[0.0, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0]


In [13]:
# new bing2

import pandas as pd
from sklearn.metrics import confusion_matrix


def roc2(y_true,y_prob,thresholds):
    thresholds = t # list of thresholds from large to small
    fpr = []
    tpr = []
    for threshold in thresholds:
        y_pred = np.where(y_prob >= threshold, 1, 0) # assign 1 if prob >= threshold, else 0
        cm = confusion_matrix(y_true, y_pred) # confusion matrix
        fp = cm[0][1] # false positives
        tp = cm[1][1] # true positives
        fn = cm[1][0] # false negatives
        tn = cm[0][0] # true negatives
        fpr.append(fp / (fp + tn)) # false positive rate
        tpr.append(tp / (tp + fn)) # true positive rate
    return tpr,fpr

df = pd.DataFrame({'label': [0, 1, 0, 1], 'prob': [0.1, 0.8, 0.4, 0.6]})
t = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]
y_true = df['label'] # true binary labels
y_prob = df['prob'] # probability estimates of the positive class
tpr,fpr=roc2(y_true,y_prob,thresholds)

print(tpr)
print(fpr)

[0.0, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0]
