## Understanding ROC

In [4]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.stats import norm  
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.metrics import roc_curve, auc, confusion_matrix, accuracy_score

import random
random.seed(100)

In [5]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display
from scipy.optimize import curve_fit as cf

### Let's draw ROC curve by changing cut-off value

ROC curve is created by changing the cutoff value. Each point in the curve refer to a cutoff value. 

In [6]:
def dfcreate(qlty):
    df=pd.DataFrame()
    #qlty = 10
    std=0.05
    adj = ((qlty)/10)*0.20

    lsty = pd.DataFrame(list((np.random.randn(5000))*std + (0.5+adj)), columns=['prob'])
    y = pd.DataFrame(list(np.ones(5000)), columns=['y'])
    lsty = pd.concat([lsty,y], axis=1)

    lstn = pd.DataFrame(list((np.random.randn(5000))*std + (0.5-adj)), columns=['prob'])
    n = pd.DataFrame(list(np.zeros(5000)), columns=['y'])
    lstn = pd.concat([lstn,n], axis=1)

    df = pd.concat([lsty,lstn], axis=0)
    df = df[(df['prob']<=1) & (df['prob']>=0)]

    return df

In [7]:
## specify quality of model here. Any integer between 0(worst) and 10(best)

qlty=2
df = dfcreate(qlty)

In [8]:
def tpfp(co):
    df['y_pred'] = df['prob'].apply(lambda x: 1 if x>co else 0)
    tn, fp, fn, tp = confusion_matrix(df['y'], df['y_pred']).ravel()
    tpp = tp/(tp+fn+0.001)
    fpp = fp/(fp+tn+0.001)
    fig, ax = plt.subplots()
    plt.scatter(fpp,tpp,edgecolors='k')
    ax.set_xlim(0, 1.1)
    ax.set_ylim(0, 1.1)
    ax.set_xlabel('FPR')
    ax.set_ylabel('TPR')
    plt.grid(True)
    plt.show()
    
    return (tp)

In [13]:
# move the slider to change cut-off value

y=interactive(tpfp, co=(0.35,0.65,0.01))
display(y)

interactive(children=(FloatSlider(value=0.5, description='co', max=0.65, min=0.35, step=0.01), Output()), _dom…

### Let's connect ROC curve with Model Quality

Shape of ROC curve depends on the quality of the model. Here we look at the ROC curve for worst to the best model.

In [10]:
def makeroc(qlty):
    df = dfcreate(qlty)
    # Let's draw ROC curve and look at the area under curve
    df['y_pred'] = df['prob'].apply(lambda x: 1 if x>0.5 else 0)

    fpr, tpr, thresholds = roc_curve(df['y'], df['prob'])
    roc_auc = auc(fpr, tpr)
    #print(roc_auc)

    plt.figure()
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.title('ROC of Model')
    plt.plot(fpr, tpr, color='blue', lw=2, label='Area under ROC curve = %0.2f)' % roc_auc)
    plt.legend(loc="lower right")
    plt.show()

In [12]:
# move the slider to change the quality of model
y=interactive(makeroc, qlty=(0,5,1))
display(y)

interactive(children=(IntSlider(value=2, description='qlty', max=5), Output()), _dom_classes=('widget-interact…