In [None]:

# my helper functions for this project
import importlib
import helper

import matplotlib.pyplot as plt

# set plot font size for all plots
plt.rcParams.update({'font.size': 16})  # applies to everything



In [None]:
# make the test data

import pandas as pd
import numpy as np

# positive class: 1 
# negative class: 0
# high score -> more likely to be positive

#d = {'score': [0.05, 0.1 ,0.20, 0.70, 0.70, 0.90, 0.90, 0.95], 'label': [0,0, 1, 0, 1, 1, 0, 1]}
d = {'score': [0.03, 0.05, 0.1 ,0.20, 0.70, 0.70, 0.90, 0.90, 0.95], 'label': [0, 0, 0, 1, 0, 0, 1, 0, 1]}
df = pd.DataFrame(data=d)
df




In [None]:
# set cost

# these are the three costs / skews used in the illustrations in the paper

#c=1/6
c=1/3
#c=2/3


In [None]:
# calculate TPR and FPR for each point on ROC space

importlib.reload(helper)
df = helper.calculateROCPoints(df)
df


In [None]:
###
### roc curve + isometrics for loss (cost proportion)
###

# plot roc curve with specific colours for each point to show correspondance with each cost line


cols = ['blue', 'green', 'orange', 'red', 'pink', 'lightgreen', 'red', 'lightgrey', "#fbadd8be", '#d37af6']

# plot ROC
plt.plot(df['fpr'], df['tpr'])
plt.scatter(df['fpr'], df['tpr'], c=cols, s=100, clip_on=False)

# plot convex hull
plt.plot([1/6,0.5], [2/3,1], linestyle='--', color='green')

plt.xlabel('FPR')
plt.ylabel('TPR')


nN = np.sum(df.label==0)
nP = np.sum(df.label==1)
piP = nP/(nP+nN)
piN = 1-piP


# isometrics for loss

for loss in np.arange(0,1.01, 0.1):
        
        gradient = (c/(1-c))*piN/piP
        #intercept = (loss - 2*piP*(1-c))/(c*piP*(c-1))
        intercept = 1 - (loss/(2*piP*(1-c)))
        print(loss, gradient, intercept)
        
        plt.axline((0, intercept), slope=gradient, color='pink', linestyle='--')

        textXPos = 0.9
        textYPos = (-2*c*piN/(2*piP*(c-1)))*textXPos + (1 - (loss/(2*piP*(1-c))))
        if (textYPos >=-0.05 and textYPos <=0.95):
                plt.text(textXPos,textYPos , "{:.2f}". format(loss) , fontsize=12, backgroundcolor='white', alpha=0.5, bbox=dict(boxstyle='square', fc='white', ec='none'))


        

plt.xlim([0,1])
plt.ylim([0,1])

plt.show()






In [None]:
###
### roc curve + isometrics for net benefit
###

# plot roc curve with specific colours for each point to show correspondance with each cost line

import matplotlib.pyplot as plt

cols = ['blue', 'green', 'orange', 'red', 'pink', 'lightgreen', 'red', 'lightgrey', "#fbadd8be", '#d37af6']

# plot ROC
plt.plot(df['fpr'], df['tpr'])
plt.scatter(df['fpr'], df['tpr'], c=cols, s=100, clip_on=False)

# plot convex hull
plt.plot([1/6,0.5], [2/3,1], linestyle='--', color='green')

plt.xlabel('FPR')
plt.ylabel('TPR')


nN = np.sum(df.label==0)
nP = np.sum(df.label==1)
piP = nP/(nP+nN)
piN = 1-piP


# isometrics for net benefit

for nb in np.arange(-2,0.5, 0.05):
        
        gradient = (c/(1-c))*piN/piP
        intercept = nb/piP
        plt.axline((0, intercept), slope=gradient, color='pink', linestyle='--')

        textXPos = 0.9
        textYPos = ((c/(1-c))*piN/piP)*textXPos + nb/piP
        
        if (textYPos >=-0.05 and textYPos <=0.95):
        
                plt.text(textXPos,textYPos , "{:.2f}". format(nb) , fontsize=12, backgroundcolor='white', alpha=0.5, bbox=dict(boxstyle='square', fc='white', ec='none'))


plt.xlim([0,1])
plt.ylim([0,1])

        
plt.show()




In [None]:

###
### roc curve + isometrics for skew
###

# plot roc curve with specific colours for each point to show correspondance with each cost line

import matplotlib.pyplot as plt

cols = ['blue', 'green', 'orange', 'red', 'pink', 'lightgreen', 'red', 'lightgrey', "#fbadd8be", '#d37af6']

# plot ROC
plt.plot(df['fpr'], df['tpr'])
plt.scatter(df['fpr'], df['tpr'], c=cols, s=100, clip_on=False)

# plot convex hull
plt.plot([1/6,0.5], [2/3,1], linestyle='--', color='green')

plt.xlabel('FPR')
plt.ylabel('TPR')


nN = np.sum(df.label==0)
nP = np.sum(df.label==1)
piP = nP/(nP+nN)
piN = 1-piP


# isometrics for skew

for loss in np.arange(0,1, 0.1):
        
        gradient = c/(1-c)
        intercept = (loss - 1+c)/(c-1)
        plt.axline((0, intercept), slope=gradient, color='pink', linestyle='--')

        textXPos = 0.9
        textYPos = (c/(1-c))*textXPos + (loss - 1+c)/(c-1)
        
        if (textYPos >=-0.05 and textYPos <=0.95):
        
                plt.text(textXPos,textYPos , "{:.2f}". format(nb) , fontsize=12, backgroundcolor='white', alpha=0.5)


plt.xlim([0,1])
plt.ylim([0,1])

        
plt.show()




In [None]:
## accuracy isometrics showing how they change as class distribution changes

def plotAccuracyIsometrics(piP):

        piN = 1-piP

        for acc in np.arange(0,1.01, 0.1):
            
                gradient = piN/piP
                intercept = (acc-piN)/piP
                #print(acc, gradient, intercept)
        
                plt.axline((0, intercept), slope=gradient, color='pink', linestyle='--')

                if (piP>0.4):
                        textXPos = 0.9
                        textYPos = (acc + textXPos*piN - piN)/piP
                        if (textYPos >=-0.05 and textYPos <=0.95):
                                plt.text(textXPos,textYPos , "{:.2f}". format(acc) , fontsize=10, backgroundcolor='white', alpha=0.5, bbox=dict(boxstyle='square', fc='white', ec='none'))
                else:
                        textYPos = 0.1
                        textXPos = (textYPos*piP -acc + piN)/piN
                        if (textXPos >=-0.05 and textXPos <=0.95):
                                plt.text(textXPos,textYPos , "{:.2f}". format(acc) , fontsize=10, backgroundcolor='white', alpha=0.5, bbox=dict(boxstyle='square', fc='white', ec='none'))

        plt.xlim([0,1])
        plt.ylim([0,1])
        plt.xlabel('FPR')
        plt.ylabel('TPR')
        plt.gca().set_aspect('equal', adjustable='box')  # square aspect ratio

        plt.show()
        

plotAccuracyIsometrics(0.5)
plotAccuracyIsometrics(0.1)
