In [1]:
import pandas as pd
from interpret.glassbox import (LogisticRegression,ClassificationTree,ExplainableBoostingClassifier)
from interpret import show
from sklearn.metrics import f1_score,accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [7]:
#encoding data - string(object) to integer
df.replace({'Sex':{'M':0,'F':1}},inplace = True)
df.replace({'ChestPainType':{'TA':1,'ATA':2,'NAP':3,'ASY':4}},inplace=True)
df.replace({'RestingECG':{'Normal':0,'ST':1,'LVH':2}},inplace = True)
df.replace({'ExerciseAngina':{'N':0,'Y':1}},inplace = True)
df.replace({'ST_Slope':{'Up':1,'Flat':2,'Down':3}},inplace = True)

In [8]:
#spliting dataset 
x = df.iloc[:,:11].values
y = df['HeartDisease'].values
x.shape,y.shape

((918, 11), (918,))

In [10]:
X_train, X_test, y_train, y_test  = train_test_split(x,y,test_size=0.2)

In [11]:
#training model for given datapoints
lr = LogisticRegression(random_state=2021, feature_names=df.iloc[:,:11].columns, penalty='l1', solver='liblinear')
lr.fit(X_train, y_train)
print("Training finished.")

Training finished.


In [14]:
#predicting results
y_pred = lr.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)*100}")

F1 Score 0.837153233436079
Accuracy 84.23913043478261


In [13]:
#generating explanations
'''explain_local give explaination for every data point i.e.
gives interscept show which parameter affect the dataset ans how (positively or negatively)'''
lr_local = lr.explain_local(X_test[:100], y_test[:100], name='Logistic Regression')
show(lr_local)

In [15]:
lr_global = lr.explain_global(name='Logistic Regression')
show(lr_global)

In [16]:
tree = ClassificationTree()
tree.fit(X_train, y_train)
print("Training finished.")
y_pred = tree.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

Training finished.
F1 Score 0.8278141697105081
Accuracy 0.8315217391304348


In [17]:
tree_local = tree.explain_local(X_test[:100], y_test[:100], name='Tree')
show(tree_local)

In [18]:
ebm = ExplainableBoostingClassifier(random_state=2021)
ebm.fit(X_train, y_train) 
print("Training finished.")
y_pred = ebm.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

Training finished.
F1 Score 0.8762079510703364
Accuracy 0.8804347826086957


In [19]:
ebm_local = ebm.explain_local(X_test[:100], y_test[:100], name='EBM')
show(ebm_local)

In [20]:
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)