In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.distributions.empirical_distribution import ECDF
from sklearn.metrics import auc

## Calculate ROC-AUCs

This notebook helps calculate ROC-AUCs for a set of defined true and false positives.

Input arguments to function: 
<ul>
<li><b>df</b> -- DataFrame with at least LFCs, column named "tp" that has 1 for true-positives and 0 in all other rows, column named "fp" that has 1 for false positives and 0 in all the other rows. 
<li><b>col</b> -- Name of the column with values to sort by (could be the column with LFCs for the condition). 
</ul>

Output:
<ul>
    <li>AUC value</li>
    <li>DataFrame with true positive and false positive rates</li>
</ul>

In [3]:
def get_roc_auc(df, col):
    df = df.sort_values(by=col)
    df['tp_cumsum'] = np.cumsum(df['tp'])
    df['fp_cumsum'] = np.cumsum(df['fp'])
    df['fpr'] = df['fp_cumsum']/(df['fp_cumsum'].iloc[-1])
    df['tpr'] = df['tp_cumsum']/(df['tp_cumsum'].iloc[-1])
    df.head()
    roc_auc = auc(df['fpr'],df['tpr'])
    roc_df = pd.DataFrame({'False_Positive_Rate':list(df.fpr), 'True_Positive_Rate':list(df.tpr)})
    return roc_auc, roc_df