In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer, make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import auc, roc_curve, roc_auc_score
from itertools import combinations

In [2]:
dataset = load_breast_cancer()
feature_cols = dataset['feature_names']
df = pd.DataFrame(dataset['data'], columns = feature_cols)
df['target'] = dataset['target']
X = df[feature_cols].values
y = df['target'].values
X.shape, y.shape

((569, 30), (569,))

In [3]:
clf = LogisticRegression(max_iter = 3000)
clf.fit(X, y)

y_score = clf.predict_proba(X)

### Probabilistic Interpretation of AUC

AUC is defined as probability that a random positive example is positively scored higher than a random negative example.
Let there be total of $M$ examples, out of which $P$ are positive and $N$ are negative examples. Hence, $M = P + N$.

Fit a classification model and get probabilities/confidences of predicting positive label. Count all cases out of $P * N$, where confidence for $p$ is greater than $n$, where $p$ is one of $P$ examples and $n$ is one of $N$ examples. Let that count be $C$.

AUC is given by $ C / (P * N) $.

In [4]:
def probabilistic_AUC(y, y_score):
    # getting indexes for positive and negative examples
    pos_idxs = np.nonzero(y == 1)[0]
    neg_idxs = np.nonzero(y == 0)[0]

    # counting instances when positive example was scored higher than negative example
    count = 0
    for pos_idx in pos_idxs:
        for neg_idx in neg_idxs:
            if y_score[:, 1][pos_idx] > y_score[:, 1][neg_idx]:
                count += 1

    # total combinations - n_pos * n_neg
    prob = count / (len(pos_idxs) * len(neg_idxs))
    return prob

probabilistic_AUC(y, y_score)    

0.9947412927435125

In [5]:
fpr, tpr, thresholds = roc_curve(y, y_score[:, 1], pos_label = 1)
auc(fpr, tpr)

0.9947412927435124

#### Multi-Class AUC (MAUC)

There are two approaches to calculate AUC in multi-class problems (number of classes >= 3):
1. **One v/s Rest (OVR):** 
    1. Considering one class and rest of classes as another class. Modify true labels and probabilities accordingly and get AUC. 
    2. Take average of all combinations.
    3. Number of models is equal to number of classes
2. **One v/s One (OVO):** 
    1. Consider any two classes. Fit a binary classifier and get AUC. 
    2. Take average of all combinations.
    3. Number of models is equal to $K * (K - 1) / 2$, where $K$ is number of classes.

In [6]:
n_classes = 3
X, y = make_classification(n_samples = 1000, n_features = 10, n_classes = n_classes, \
                            n_clusters_per_class = 3, n_informative = 4, random_state = 0)
X.shape, y.shape

((1000, 10), (1000,))

In [7]:
clf = LogisticRegression()
clf.fit(X, y)
y_score = clf.predict_proba(X)

In [8]:
def find_outcome(act, pred):
    if act == pred:
        return 'TP' if act == 1 else 'TN'
    else:
        return 'FN' if act == 1 else 'FP'
        
def calculate_roc_metrics(y_test, y_score, threshold, pos_label = 1):
    y_pred = (y_score[:, pos_label] > threshold) * 1
    outcomes = np.array(list(map(lambda x, y: find_outcome(x, y), y_test, y_pred)))
    
    tp = sum(outcomes=='TP')
    tn = sum(outcomes=='TN')
    fp = sum(outcomes=='FP')
    fn = sum(outcomes=='FN')
    
    tpr = 0 if tp == 0 else tp/(tp+fn)
    fpr = 0 if fp == 0 else fp/(fp+tn)        
    return fpr, tpr     

def get_auc_score(y, y_score):
    n_datapoints = 100
    thresholds = np.linspace(0, 1, n_datapoints)
    roc_metrics = np.array(list(map(lambda x: calculate_roc_metrics(y, y_score, x), thresholds)))
    fpr = roc_metrics[:,0]
    tpr = roc_metrics[:,1]
    return auc(fpr, tpr)    

In [9]:
def one_vs_rest(y, y_score, n_classes):
    AUC = []
    for class_label in range(n_classes):
        iter_y = np.where(y == class_label, 0, 1)
        iter_y_score = np.vstack((y_score[:, class_label], 1 - y_score[:, class_label])).transpose()
        auc_score = get_auc_score(iter_y, iter_y_score)
        AUC += [auc_score]
    return np.mean(AUC)

In [10]:
def one_vs_one(y, n_classes):
    AUC = []
    combs = list(combinations(range(n_classes), 2))

    for neg_class_label, pos_class_label in combs:
        iter_X = X[(y == neg_class_label) | (y == pos_class_label)]
        iter_y = y[(y == neg_class_label) | (y == pos_class_label)]
        iter_y = np.where(iter_y == neg_class_label, 0, 1)

        clf.fit(iter_X, iter_y)
        iter_y_score = clf.predict_proba(iter_X)
        
        auc_score = get_auc_score(iter_y, iter_y_score)
        AUC += [auc_score]
    return np.mean(AUC)

In [11]:
one_vs_rest(y, y_score, n_classes), roc_auc_score(y, y_score, multi_class='ovr')

(0.8421645172434821, 0.8421976865259726)

In [12]:
one_vs_one(y, n_classes), roc_auc_score(y, y_score, multi_class='ovo')

(0.8679561547970424, 0.8420581530079008)