In [1]:
# chatgpt
import pandas as pd

# 假设你已经有一个DataFrame，其中包含label和prob列
# 例如：data = pd.DataFrame({'label': [0, 1, 0, 1], 'prob': [0.1, 0.8, 0.4, 0.6]})

def calculate_tpr_fpr(data, thresholds):
    # 对概率进行排序
    data = data.sort_values(by='prob', ascending=False)

    # 初始化结果列表
    tpr_list = []
    fpr_list = []

    # 计算每个阈值下的TPR和FPR
    for t in thresholds:
        data['pred'] = (data['prob'] >= t).astype(int)

        # 计算TP、FP、TN、FN
        TP = ((data['label'] == 1) & (data['pred'] == 1)).sum()
        FP = ((data['label'] == 0) & (data['pred'] == 1)).sum()
        TN = ((data['label'] == 0) & (data['pred'] == 0)).sum()
        FN = ((data['label'] == 1) & (data['pred'] == 0)).sum()

        # 计算TPR和FPR
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)

        # 将结果添加到列表中
        tpr_list.append(TPR)
        fpr_list.append(FPR)

    return tpr_list, fpr_list

# 示例
data = pd.DataFrame({'label': [0, 1, 0, 1], 'prob': [0.1, 0.8, 0.4, 0.6]})
thresholds = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]

tpr, fpr = calculate_tpr_fpr(data, thresholds)
print("TPR:", tpr)
print("FPR:", fpr)

TPR: [0.0, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
FPR: [0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0]


In [None]:
# claude
import pandas as pd

df = pd.DataFrame({
    'label': [0, 1, 0, 1, 0, 1],
    'prob': [0.1, 0.4, 0.35, 0.8, 0.2, 0.7]
})

t = [0.8, 0.5, 0.3]  # 阈值列表

# 根据prob列从大到小排序
df = df.sort_values('prob', ascending=False)

# 初始化结果列表
tpr_list = []
fpr_list = []

# 遍历各个阈值
for threshold in t:

    # 获取prob大于等于阈值的行索引
    prob_index = df[df['prob'] >= threshold].index

    # 获取这些行的label列的值
    label = df.loc[prob_index, 'label']

    # 计算tpr和fpr
    tpr = sum(label) / sum(df['label'])
    fpr = 1 - sum(~label) / sum(~df['label'])

    # 添加到结果列表
    tpr_list.append(tpr)
    fpr_list.append(fpr)

print(tpr_list)
#[1.0, 0.66667, 0.5]
print(fpr_list)
#[0.8,  0.4, 0.2]

In [1]:
# bard
import pandas as pd

def calculate_tpr_fpr(df, t, label_col='label', prob_col='prob'):
  """
  Calculates the true positive rate (TPR) and false positive rate (FPR) for a list of thresholds.

  Args:
    df: A Pandas DataFrame.
    t: A list of thresholds.
    label_col: The name of the column in df that contains the labels.
    prob_col: The name of the column in df that contains the probabilities.

  Returns:
    A tuple of TPR and FPR.
  """

  # Sort the probabilities.
  df = df.sort_values(by=prob_col)

  # Initialize the TPR and FPR.
  tpr = []
  fpr = []

  # Iterate over the thresholds.
  for threshold in t:
    # Calculate the true positive and false positive counts.
    true_positives = len(df[df[label_col] == 1][df[prob_col] >= threshold])
    false_positives = len(df[df[label_col] == 0][df[prob_col] >= threshold])

    # Calculate the TPR and FPR.
    tpr.append(true_positives / len(df[df[label_col] == 1]))
    fpr.append(false_positives / len(df[df[label_col] == 0]))

  return tpr, fpr

df = pd.DataFrame({'label': [0, 0, 1, 1], 'prob': [0.1, 0.2, 0.3, 0.4]})
t = [0.2, 0.3]

tpr, fpr = calculate_tpr_fpr(df, t)

print(tpr)
# [0.5, 1.0]

print(fpr)

[1.0, 1.0]
[0.5, 0.0]


