# ROC & AUC

In [1]:
# ROC ccurve is plotted for TPR (True Positive Rate) & FPR (False Positive Rate)
# TPR = TP/(TP+FN)
# FPR = FP/(FP+TN)
# Used for threshold selection.

In [2]:
import pandas as pd
df = pd.read_csv("diabetes.csv")
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [3]:
x=df.drop("Outcome",axis=1)
y=df.Outcome

In [5]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=1)

In [6]:
(x_train.shape,x_test.shape,y_train.shape,y_test.shape)

((614, 8), (154, 8), (614,), (154,))

In [8]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(max_iter=1000)
lr.fit(x_train,y_train)

In [21]:
y_proba = lr.predict_proba(x_test)[:,1]
y_proba

array([0.43145723, 0.32394675, 0.15356634, 0.0427564 , 0.20030502,
       0.26560255, 0.36859622, 0.10223741, 0.14508693, 0.18734958,
       0.45734484, 0.06715738, 0.98446968, 0.7516409 , 0.02830573,
       0.75664624, 0.22059967, 0.30910433, 0.1208812 , 0.15334679,
       0.38489993, 0.13588956, 0.96627082, 0.27874324, 0.06602078,
       0.40515216, 0.18887625, 0.78491998, 0.07633905, 0.50903956,
       0.41852057, 0.3178688 , 0.05268635, 0.70209161, 0.12110639,
       0.63328997, 0.08589406, 0.17663684, 0.10063107, 0.62897578,
       0.19415703, 0.07979395, 0.03554552, 0.28843146, 0.06777127,
       0.02151819, 0.7472092 , 0.84227423, 0.0733336 , 0.13592514,
       0.00783746, 0.17726575, 0.86727897, 0.04736448, 0.76475566,
       0.09750861, 0.62878017, 0.30355789, 0.54174567, 0.15104918,
       0.6272026 , 0.01263162, 0.72849435, 0.20723897, 0.3406523 ,
       0.10552361, 0.01080666, 0.31376243, 0.74325886, 0.6299824 ,
       0.89591679, 0.74956996, 0.59988724, 0.01517904, 0.71265

In [40]:
from sklearn.metrics import roc_curve,roc_auc_score
fpr,tpr,thresh=roc_curve(y_test,y_proba)

In [41]:
np.arange(len(thresh)) % 10 == 0

array([ True, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False,  True, False])

In [50]:
# Plotting ROC
from plotly import graph_objects as go
import numpy as np
trace0=go.Scatter(
            x=fpr,
            y=tpr,
            mode="lines",
            name="ROC Curve",
)
# Every 10th point highlighting
n=10
indices=np.arange(len(thresh))%10 == 0

# Trace 1
trace1 = go.Scatter(
            x=fpr[indices],
            y=tpr[indices],
            mode="markers+text",
            name="Threshold Points",
            text=[f"Threshold: {t: .2f}" for t in thresh[indices]],
            textposition="top center"
)

# Trace 2
trace2 = go.Scatter(
            x=[0,1],
            y=[0,1],
            name="Area : 0.5",
            line=dict(dash="dash")
)

data=[trace0,trace1,trace2]

# Layout
layout=go.Layout(
title="ROC Curve",
    xaxis=dict(title="False Positive Rate"),
    yaxis=dict(title="True Positive Rate"),
    width=800,
    height=800    
)
fig=go.Figure(data=data,layout=layout)
fig.show()


In [43]:
np.argmax(tpr-fpr)

28

In [44]:
thresh[28]

0.3330969615314099

In [45]:
roc_auc = roc_auc_score(y_test,y_proba)

In [49]:
# Plotting AUC
from plotly import graph_objects as go
import numpy as np
trace0=go.Scatter(
            x=fpr,
            y=tpr,
            mode="lines",
            name=f"ROC Curve(area : {roc_auc: .2f})",
)
# Every 10th point highlighting
n=10
indices=np.arange(len(thresh))%10 == 0

# Trace 1
trace1 = go.Scatter(
            x=fpr[indices],
            y=tpr[indices],
            mode="markers+text",
            name="Threshold Points",
            text=[f"Threshold: {t: .2f}" for t in thresh[indices]],
            textposition="top center"
)

# Trace 2
trace2 = go.Scatter(
            x=[0,1],
            y=[0,1],
            name="Area : 0.5",
            line=dict(dash="dash")
)

data=[trace0,trace1,trace2]

# Layout
layout=go.Layout(
title="ROC Curve",
    xaxis=dict(title="False Positive Rate"),
    yaxis=dict(title="True Positive Rate"),
    width=800,
    height=800    
)
fig=go.Figure(data=data,layout=layout)
fig.show()


In [51]:
from sklearn.tree import DecisionTreeClassifier

In [54]:
dtree = DecisionTreeClassifier()
dtree.fit(x_train,y_train)

In [56]:
y_proba = dtree.predict_proba(x_test)[:,1]
y_proba

array([0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0.,
       1., 0., 0., 1., 0., 1., 1., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1.,
       1., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0.,
       1., 1., 0., 1., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0., 0., 1.,
       1., 1., 1., 1., 1., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0.,
       0., 1., 1., 0., 1., 0., 0., 1., 0., 1., 1., 1., 0., 1., 0., 1., 0.,
       1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 1., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0.,
       0.])

In [60]:
from sklearn.metrics import roc_curve,roc_auc_score
fpr,tpr,thresh=roc_curve(y_test,y_proba)

In [66]:
len(thresh)

3

In [58]:
# Plotting ROC
from plotly import graph_objects as go
import numpy as np
trace0=go.Scatter(
            x=fpr,
            y=tpr,
            mode="lines",
            name="ROC Curve",
)
# Every 10th point highlighting
n=10
indices=np.arange(len(thresh))%10 == 0

# Trace 1
trace1 = go.Scatter(
            x=fpr[indices],
            y=tpr[indices],
            mode="markers+text",
            name="Threshold Points",
            text=[f"Threshold: {t: .2f}" for t in thresh[indices]],
            textposition="top center"
)

# Trace 2
trace2 = go.Scatter(
            x=[0,1],
            y=[0,1],
            name="Area : 0.5",
            line=dict(dash="dash")
)

data=[trace0,trace1,trace2]

# Layout
layout=go.Layout(
title="ROC Curve",
    xaxis=dict(title="False Positive Rate"),
    yaxis=dict(title="True Positive Rate"),
    width=800,
    height=800    
)
fig=go.Figure(data=data,layout=layout)
fig.show()


In [63]:
np.argmax(tpr-fpr)

1

In [64]:
thresh[1]

1.0

# Apriori Algorithm

In [70]:
df = pd.read_csv("store_data.csv",header=None)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7496,butter,light mayo,fresh bread,,,,,,,,,,,,,,,,,
7497,burgers,frozen vegetables,eggs,french fries,magazines,green tea,,,,,,,,,,,,,,
7498,chicken,,,,,,,,,,,,,,,,,,,
7499,escalope,green tea,,,,,,,,,,,,,,,,,,


In [71]:
pip install apyori

Collecting apyoriNote: you may need to restart the kernel to use updated packages.
  Downloading apyori-1.1.2.tar.gz (8.6 kB)
Building wheels for collected packages: apyori
  Building wheel for apyori (setup.py): started
  Building wheel for apyori (setup.py): finished with status 'done'
  Created wheel for apyori: filename=apyori-1.1.2-py3-none-any.whl size=5974 sha256=61f82b929676e2275669bc09813caaff58c96ca64b22c7446d702f805605a7f0
  Stored in directory: c:\users\asus\appdata\local\pip\cache\wheels\32\2a\54\10c595515f385f3726642b10c60bf788029e8f3a1323e3913a
Successfully built apyori
Installing collected packages: apyori
Successfully installed apyori-1.1.2



In [90]:
from apyori import apriori

In [91]:
records = []
for i in range(7501):
    t=[]
    for j in range(20):
        t.append(str(df.values[i,j]))
    records.append(t)
records

[['shrimp',
  'almonds',
  'avocado',
  'vegetables mix',
  'green grapes',
  'whole weat flour',
  'yams',
  'cottage cheese',
  'energy drink',
  'tomato juice',
  'low fat yogurt',
  'green tea',
  'honey',
  'salad',
  'mineral water',
  'salmon',
  'antioxydant juice',
  'frozen smoothie',
  'spinach',
  'olive oil'],
 ['burgers',
  'meatballs',
  'eggs',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan'],
 ['chutney',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan'],
 ['turkey',
  'avocado',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan'],
 ['mineral water',
  'milk',
  'energy bar',
  'whole wheat rice',
  'green tea',
  'nan',
  'nan',
  'nan',
 

In [92]:
#records = [str(df.values[i,j]) for j in range(20) for i in range(7501)]

In [93]:
association = list(apriori(records,min_support=0.004,min_lift=3,min_confidence=0.2,max_length=2))

In [118]:
association[0][2][0][3]

4.84395061728395

In [168]:
for i in association:
    print("Rule:", list(i[0]))#,"->",list(i[1]))
    print("Support:", i[1])
    print("Confidence:", i[2][0][2])
    print("Lift:", i[2][0][3])
    print("\n")

Rule: ['chicken', 'light cream']
Support: 0.004532728969470737
Confidence: 0.29059829059829057
Lift: 4.84395061728395


Rule: ['escalope', 'mushroom cream sauce']
Support: 0.005732568990801226
Confidence: 0.3006993006993007
Lift: 3.790832696715049


Rule: ['pasta', 'escalope']
Support: 0.005865884548726837
Confidence: 0.3728813559322034
Lift: 4.700811850163794


Rule: ['herb & pepper', 'ground beef']
Support: 0.015997866951073192
Confidence: 0.3234501347708895
Lift: 3.2919938411349285


Rule: ['tomato sauce', 'ground beef']
Support: 0.005332622317024397
Confidence: 0.3773584905660377
Lift: 3.840659481324083


Rule: ['olive oil', 'whole wheat pasta']
Support: 0.007998933475536596
Confidence: 0.2714932126696833
Lift: 4.122410097642296


Rule: ['pasta', 'shrimp']
Support: 0.005065991201173177
Confidence: 0.3220338983050847
Lift: 4.506672147735896


