In [4]:
from sklearn.metrics import roc_auc_score

# Example data
y_true = [0, 1, 1, 0, 1,0]
y_pred_prob = [0.1, 0.9, 0.8, 0.4, 0.7,0.7]


In [5]:
import numpy as np
from sklearn.metrics import roc_auc_score
from scipy.stats import norm

# Function to calculate AUC variance using DeLong's method
def delong_roc_variance(y_true, y_scores):
    """
    Calculate the variance of the AUC using DeLong's method.
    """
    from sklearn.utils import check_consistent_length
    from sklearn.utils.validation import column_or_1d
    from scipy.stats import mannwhitneyu
    
    y_true = column_or_1d(y_true)
    y_scores = column_or_1d(y_scores)
    check_consistent_length(y_true, y_scores)
    
    positive_scores = y_scores[y_true == 1]
    negative_scores = y_scores[y_true == 0]
    m, n = len(positive_scores), len(negative_scores)
    
    U, _ = mannwhitneyu(positive_scores, negative_scores)
    auc = U / (m * n)  # Calculate AUC
    
    V10 = np.var([1 if x > y else 0 for x in positive_scores for y in negative_scores])
    V01 = np.var([1 if x < y else 0 for x in positive_scores for y in negative_scores])
    
    variance = (V10 / m) + (V01 / n)
    return auc, variance

In [11]:
def delong_auc_variance(y_true, y_pred_prob):
    """
    Compute AUC and its variance using DeLong's method.

    Parameters:
        y_true (list or array): True labels (0 or 1).
        y_pred_prob (list or array): Predicted probabilities.

    Returns:
        auc (float): Area under the ROC curve.
        var_auc (float): Variance of the AUC.
    """
    # Separate positive and negative scores
    positive_scores = y_pred_prob[y_true == 1]
    negative_scores = y_pred_prob[y_true == 0]
    
    # Number of positives and negatives
    n_positives = len(positive_scores)
    n_negatives = len(negative_scores)
    
    # Compute AUC
    auc = roc_auc_score(y_true, y_pred_prob)

    # Compute rank sum for positive scores
    all_scores = np.concatenate([positive_scores, negative_scores])
    ranks = np.argsort(np.argsort(all_scores)) + 1
    positive_ranks = ranks[:n_positives]

    # Compute U-statistics
    U_positive = np.sum(positive_ranks) / n_positives
    U_negative = (n_positives + n_negatives + 1) / 2

    # Compute variance components
    sigma_positive_squared = np.sum((positive_ranks - U_positive) ** 2) / (n_positives - 1)
    sigma_negative_squared = np.sum((ranks[n_positives:] - U_negative) ** 2) / (n_negatives - 1)

    # Compute variance of AUC
    var_auc = sigma_positive_squared / n_positives + sigma_negative_squared / n_negatives

    return auc, var_auc

In [6]:

# Calculate AUC
auc = roc_auc_score(y_true, y_pred_prob)
print(f"AUC: {auc}")

AUC: 0.9444444444444444


In [12]:
auc, var_auc = delong_roc_variance(y_true, y_pred_prob)
std_auc = np.sqrt(var_auc)

# Print AUC and its standard error
print(f"AUC: {auc:.3f}")
print(f"Variance: {var_auc:.6f}")
print(f"Standard Error (SE): {std_auc:.6f}")

AUC: 0.944
Variance: 0.032922
Standard Error (SE): 0.181444


In [15]:
# Compute 95% confidence interval
z = norm.ppf(0.975)  # 1.96 for 95% CI
lower_ci = auc - z * std_auc
upper_ci = auc + z * std_auc

In [16]:
print(f"95% Confidence Interval: [{lower_ci}, {upper_ci}]")

95% Confidence Interval: [0.588821357307014, 1.3000675315818748]


In [17]:
params = {"objective": "multi:softprob", "tree_method": "gpu_hist", "num_class": 5}
n = 1000

results = xgb.cv(
   params, dtrain_clf,
   num_boost_round=n,
   nfold=5,
   metrics=["mlogloss", "auc", "merror"],
)

NameError: name 'xgb' is not defined