In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV,RandomizedSearchCV
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier
from sklearn.svm import SVC
import plotly.graph_objects as go 
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier


RANDOM_STATE = 55

In [4]:
pip install plotly

Collecting plotlyNote: you may need to restart the kernel to use updated packages.

  Downloading plotly-5.22.0-py3-none-any.whl.metadata (7.1 kB)
Downloading plotly-5.22.0-py3-none-any.whl (16.4 MB)
   ---------------------------------------- 0.0/16.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/16.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/16.4 MB 330.3 kB/s eta 0:00:50
   ---------------------------------------- 0.1/16.4 MB 751.6 kB/s eta 0:00:22
    --------------------------------------- 0.4/16.4 MB 2.3 MB/s eta 0:00:07
   - -------------------------------------- 0.7/16.4 MB 3.6 MB/s eta 0:00:05
   -- ------------------------------------- 0.9/16.4 MB 4.1 MB/s eta 0:00:04
   -- ------------------------------------- 1.0/16.4 MB 3.7 MB/s eta 0:00:05
   -- ------------------------------------- 1.2/16.4 MB 3.3 MB/s eta 0:00:05
   ---- ----------------------------------- 1.6/16.4 MB 4.2 MB/s eta 0:00:04
   ---- --------------------------------

In [7]:
# loading data
df = pd.read_csv("CAD Database.csv")

df.head()

Unnamed: 0,Age,Weight,Height,Sex,BMI,Diabetes Mellitus,Hypertension,Current Smoker,EX-Smoker,Obesity,...,Left Ventricular Hypertrophy,Low Density Lipoprotein,High Density Lipoprotein,Ejection Fraction,Regional Wall Motion Abnormalities,Valvular Heart Disease,Left Anterior Descending,Left Circumflex,Right Coronary Artery,Result
0,53,90,175,Male,29.387755,0,1,1,0,Y,...,N,155,30.0,50,0,N,Stenotic,Normal,Stenotic,CAD
1,67,70,157,Fmale,28.398718,0,1,0,0,Y,...,N,121,36.0,40,4,N,Stenotic,Stenotic,Normal,CAD
2,54,54,164,Male,20.077335,0,0,1,0,N,...,N,70,45.0,40,2,mild,Stenotic,Normal,Normal,CAD
3,66,67,158,Fmale,26.838648,0,1,0,0,Y,...,N,55,27.0,55,0,Severe,Normal,Normal,Normal,Normal
4,50,87,153,Fmale,37.165193,0,1,0,0,Y,...,N,110,50.0,50,0,Severe,Normal,Normal,Normal,Normal


In [8]:
# selecting required input features
df = df[['Age', 'Sex', 'Diabetes Mellitus', 'Hypertension', 'Current Smoker', 'EX-Smoker', 'Obesity', 'Typical Chest Pain','Left Ventricular Hypertrophy', 'Ejection Fraction', 'Regional Wall Motion Abnormalities', 'Valvular Heart Disease','Result']]

In [9]:
# selecting categorical columns for one hot encoding
cat_variables = ['Sex', 'Obesity', 'Left Ventricular Hypertrophy', 'Valvular Heart Disease']

In [10]:
# performing one hot encoding on categorical columns
df = pd.get_dummies(data = df,columns = cat_variables)

In [11]:
# seeing the data after one hot encoding
df

Unnamed: 0,Age,Diabetes Mellitus,Hypertension,Current Smoker,EX-Smoker,Typical Chest Pain,Ejection Fraction,Regional Wall Motion Abnormalities,Result,Sex_Fmale,Sex_Male,Obesity_N,Obesity_Y,Left Ventricular Hypertrophy_N,Left Ventricular Hypertrophy_Y,Valvular Heart Disease_Moderate,Valvular Heart Disease_N,Valvular Heart Disease_Severe,Valvular Heart Disease_mild
0,53,0,1,1,0,0,50,0,CAD,False,True,False,True,True,False,False,True,False,False
1,67,0,1,0,0,1,40,4,CAD,True,False,False,True,True,False,False,True,False,False
2,54,0,0,1,0,1,40,2,CAD,False,True,True,False,True,False,False,False,False,True
3,66,0,1,0,0,0,55,0,Normal,True,False,False,True,True,False,False,False,True,False
4,50,0,1,0,0,0,50,0,Normal,True,False,False,True,True,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,58,0,0,0,0,1,45,0,CAD,False,True,False,True,True,False,False,True,False,False
299,55,0,0,0,0,0,40,0,Normal,True,False,False,True,True,False,False,False,False,True
300,48,0,1,0,0,0,55,0,Normal,True,False,False,True,True,False,False,True,False,False
301,57,1,0,0,0,0,55,0,Normal,True,False,False,True,True,False,False,True,False,False


In [12]:
# selecting all the other features than Result (excluding target variable)
features = [x for x in df.columns if x not in 'Result']

In [13]:
# converting all features to int datatype
df[features] = df[features].astype(int)

In [14]:
# mapping CAD Result to 1 and Normal Result to 0
df['Result'] = df['Result'].map({"CAD": 1, "Normal": 0})

# XGBoost

In [15]:
X_train, X_temp, y_train, y_temp = train_test_split(df[features], df['Result'], train_size=0.7, random_state=RANDOM_STATE)
X_eval, X_test, y_eval, y_test = train_test_split(X_temp, y_temp, train_size=0.5, random_state=RANDOM_STATE)

In [16]:
xgb_model = XGBClassifier(n_estimators=400, learning_rate=0.01, max_depth=3, reg_alpha=0.1, reg_lambda=0.1, verbosity=1, random_state=RANDOM_STATE)
xgb_model.fit(X_train, y_train, eval_set=[(X_eval, y_eval)], early_stopping_rounds=30, verbose=False)



In [17]:
y_score = xgb_model.predict_proba(X_test)[:, 1]

In [18]:
print(f"Metrics test:\n\tAccuracy score: {accuracy_score(xgb_model.predict(X_test), y_test):.4f}")

Metrics test:
	Accuracy score: 0.8478


In [19]:
fpr, tpr, thresholds = roc_curve(y_test, y_score)
auc = roc_auc_score(y_test, y_score)

# Logistic Regression

In [20]:
# training logistic regression model
classifier = LogisticRegression(max_iter=650)
classifier.fit(X_train, y_train)

In [21]:
y_pred_proba = classifier.predict_proba(X_test)[:, 1]

In [22]:
print(f"Metrics test:\n\tAccuracy score: {accuracy_score(classifier.predict(X_test), y_test):.4f}")

Metrics test:
	Accuracy score: 0.7609


In [23]:
fpr_lr, tpr_lr, thresholds_lr = roc_curve(y_test, y_pred_proba)
auc_lr = roc_auc_score(y_test, y_pred_proba)

# SVM 

In [24]:
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

In [25]:
svm_classifier = SVC(kernel='linear', C=1.0, random_state=RANDOM_STATE)
svm_classifier.fit(X_train_normalized, y_train)

In [26]:
y_pred_proba_svm = svm_classifier.decision_function(X_test_normalized)

In [27]:
print(f"SVM Metrics test:\n\tAccuracy score: {accuracy_score(svm_classifier.predict(X_test_normalized), y_test):.4f}")

SVM Metrics test:
	Accuracy score: 0.8043


In [28]:
fpr_svm, tpr_svm, thresholds_svm = roc_curve(y_test, y_pred_proba_svm)
auc_svm = roc_auc_score(y_test, y_pred_proba_svm)

# Gradient Boosting Machines

In [29]:
gbm_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=RANDOM_STATE)
gbm_classifier.fit(X_train, y_train)

In [30]:
y_pred_proba_gbm = gbm_classifier.predict_proba(X_test)[:, 1]

In [31]:
print(f"GBM Metrics test:\n\tAccuracy score: {accuracy_score(gbm_classifier.predict(X_test), y_test):.4f}")

GBM Metrics test:
	Accuracy score: 0.8478


In [32]:
fpr_gbm, tpr_gbm, thresholds_gbm = roc_curve(y_test, y_pred_proba_gbm)
auc_gbm = roc_auc_score(y_test, y_pred_proba_gbm)

# ROC Curve


In [33]:
trace_gbm = go.Scatter(
    x=fpr_gbm,
    y=tpr_gbm,
    mode='lines',
    name=f'GBM (Area = {auc_gbm:.2f})'
)

In [34]:
trace = go.Scatter(
    x=fpr,
    y=tpr,
    mode='lines',
    name=f'XGB (Area = {auc:.2f})'
)

In [35]:
trace_lr = go.Scatter(
    x=fpr_lr,
    y=tpr_lr,
    mode='lines',
    name=f'Logistic Regression (Area = {auc_lr:.2f})'
)


In [36]:
trace_svm = go.Scatter(
    x=fpr_svm,
    y=tpr_svm,
    mode='lines',
    name=f'SVM (Area = {auc_svm:.2f})'
)


In [37]:
# Diagonal line
trace_random = go.Scatter(
    x=[0, 1],
    y=[0, 1],
    mode='lines',
    name='Random (Area = 0.5)',
    line=dict(dash='dash')
)

In [33]:
data = [trace ,trace_lr, trace_svm, trace_gbm, trace_random]

# Define layout with square aspect ratio
layout = go.Layout(
    title='Receiver Operating Characteristic',
    xaxis=dict(title='False Positive Rate'),
    yaxis=dict(title='True Positive Rate'),
    autosize=False,
    width=800,
    height=800,
    showlegend=True
)

# Define figure and add data
fig = go.Figure(data=data, layout=layout)

# Show figure
fig.show()