Importing the Dependencies

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import  accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.metrics import confusion_matrix

Data Collection and Analysis

Heart Disease Dataset

In [None]:
# loading the diabetes dataset to a pandas DataFrame
heart_dataset = pd.read_csv('/content/heart.csv')

In [None]:
# printing the first 5 rows of the dataset
heart_dataset.head()

In [None]:
# number of rows and Columns in this dataset
heart_dataset.shape

In [None]:
# getting the statistical measures of the data
heart_dataset.describe()

In [None]:
heart_dataset['target'].value_counts()

0 --> No Heart Disease

1 --> Heart Disease

In [None]:
heart_dataset.groupby('target').mean()

In [None]:
# separating the data and labels
X = heart_dataset.drop(columns = 'target', axis=1)
Y = heart_dataset['target']

In [None]:
print(X)

In [None]:
print(Y)

Train Test SplitS



In [None]:
X_train1, X_test1, Y_train1, Y_test1 = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

# Splitting into 75% train and 25% test
X_train2, X_test2, Y_train2, Y_test2 = train_test_split(X, Y, test_size=0.25, stratify=Y, random_state=2)

# Splitting into 70% train and 30% test
X_train3, X_test3, Y_train3, Y_test3 = train_test_split(X, Y, test_size=0.3, stratify=Y, random_state=2)

# Splitting into 60% train and 40% test
X_train4, X_test4, Y_train4, Y_test4 = train_test_split(X, Y, test_size=0.4, stratify=Y, random_state=2)

# Splitting into 90% train and 10% test
X_train5, X_test5, Y_train5, Y_test5 = train_test_split(X, Y, test_size=0.1, stratify=Y, random_state=2)

# Splitting into 50% train and 50% test
X_train6, X_test6, Y_train6, Y_test6 = train_test_split(X, Y, test_size=0.5, stratify=Y, random_state=2)


In [None]:
print(X.shape, X_train1.shape, X_test1.shape)
print(X.shape, X_train2.shape, X_test2.shape)
print(X.shape, X_train3.shape, X_test3.shape)

Training the Model

In [None]:
#training the all four classifiers
def result(X_train, X_test, Y_train, Y_test):
  svmmodel = svm.SVC(kernel='linear')
  svmmodel.fit(X_train, Y_train)
  logmodel = LogisticRegression(random_state=1)
  logmodel.fit(X_train, Y_train)
  dctmodel = DecisionTreeClassifier(criterion = 'entropy',random_state = 0)
  dctmodel.fit(X_train, Y_train)
  rfcmodel = RandomForestClassifier(n_estimators = 100,criterion = 'entropy',random_state = 0)
  rfcmodel.fit(X_train, Y_train)
  y_pred_svm = svmmodel.predict(X_test)
  y_pred_log=logmodel.predict(X_test)
  y_pred_dct=dctmodel.predict(X_test)
  y_pred_rfc=rfcmodel.predict(X_test)
  metrics_svm = {
      'Model': 'SVM',
      'Accuracy': accuracy_score(Y_test, y_pred_svm),
      'Precision': precision_score(Y_test, y_pred_svm),
      'Recall': recall_score(Y_test, y_pred_svm),
      'F1 Score': f1_score(Y_test, y_pred_svm),
      'ROC': roc_auc_score(Y_test, y_pred_svm)
  }

  metrics_log = {
      'Model': 'Logistic Regression',
      'Accuracy': accuracy_score(Y_test, y_pred_log),
      'Precision': precision_score(Y_test, y_pred_log),
      'Recall': recall_score(Y_test, y_pred_log),
      'F1 Score': f1_score(Y_test, y_pred_log),
      'ROC': roc_auc_score(Y_test, y_pred_log)
  }

  metrics_dct = {
      'Model': 'Decision Tree',
      'Accuracy': accuracy_score(Y_test, y_pred_dct),
      'Precision': precision_score(Y_test, y_pred_dct),
      'Recall': recall_score(Y_test, y_pred_dct),
      'F1 Score': f1_score(Y_test, y_pred_dct),
      'ROC': roc_auc_score(Y_test, y_pred_dct)
  }

  metrics_rfc = {
      'Model': 'Random Forest',
      'Accuracy': accuracy_score(Y_test, y_pred_rfc),
      'Precision': precision_score(Y_test, y_pred_rfc),
      'Recall': recall_score(Y_test, y_pred_rfc),
      'F1 Score': f1_score(Y_test, y_pred_rfc),
      'ROC': roc_auc_score(Y_test, y_pred_rfc)
  }

  model_metrics = pd.DataFrame([metrics_svm, metrics_log, metrics_dct, metrics_rfc])

  print(model_metrics)



Model Evaluation

Accuracy Score

In [None]:
result(X_train1, X_test1, Y_train1, Y_test1)

In [None]:
result(X_train2, X_test2, Y_train2, Y_test2)

In [None]:
result(X_train3, X_test3, Y_train3, Y_test3)

In [None]:
result(X_train4, X_test4, Y_train4, Y_test4)


In [None]:
result(X_train5, X_test5, Y_train5, Y_test5)


In [None]:
result(X_train6, X_test6, Y_train6, Y_test6)