# Data Preprocessing

In [1]:
# importing libraries
import pandas as pd 
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# importing dataset
dataset = pd.read_csv('/kaggle/input/heart-disease-uci/heart.csv')

# splitting dataset into features and target
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

# splitting dataset into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 0)

# standardizing the data
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Logistic Regression

In [2]:
# importing modules
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

# creating regressor and fitting on training data
logisticRegression = LogisticRegression()
logisticRegression.fit(X_train, y_train)

# predicting the ouput of test data
y_pred = logisticRegression.predict(X_test)

# calcuating accuracy
cm = confusion_matrix(y_test, y_pred)
print(cm)
logisticRegression_score = accuracy_score(y_test, y_pred)
print(logisticRegression_score)

[[14  3]
 [ 2 12]]
0.8387096774193549


# Decision Tree Classifier

In [3]:
# importing modules
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

# creating classifier
decision_tree = DecisionTreeClassifier(criterion = 'entropy', splitter = 'random', max_depth = 50, random_state = 0)
decision_tree.fit(X_train, y_train)

# predicting output of test data
y_pred = decision_tree.predict(X_test)

# calculating accuracy
cm = confusion_matrix(y_test, y_pred)
print(cm)
DecisionTree_score = accuracy_score(y_test, y_pred)
print(DecisionTree_score)

[[14  3]
 [ 6  8]]
0.7096774193548387


# Random Forest Classifier

In [4]:
# importing modules
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

# creating classifier
random_forest = RandomForestClassifier(n_estimators = 50, criterion = 'entropy', random_state = 0)
random_forest.fit(X_train, y_train)

# predicting output on test data
y_pred = random_forest.predict(X_test)

# calculating accuracy
cm = confusion_matrix(y_test, y_pred)
print(cm)
RandomForest_score = accuracy_score(y_test, y_pred)
print(RandomForest_score)

[[14  3]
 [ 2 12]]
0.8387096774193549


# Naive Bayes Classifier

In [5]:
# importing modules
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score

# creating classifier
naiveBayes = GaussianNB()
naiveBayes.fit(X_train, y_train)

# predicting output on test data
y_pred = naiveBayes.predict(X_test)

# calculating accuracy
cm = confusion_matrix(y_test, y_pred)
print(cm)
NaiveBayes_score = accuracy_score(y_test, y_pred)
print(NaiveBayes_score)

[[13  4]
 [ 2 12]]
0.8064516129032258


# Kernel SVM

In [6]:
# importing modules
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

# creating classifier
classifier = SVC(kernel='rbf', random_state=0)
classifier.fit(X_train, y_train)

# predicting output of test features
y_pred = classifier.predict(X_test)

# calculating accuracy
cm = confusion_matrix(y_test, y_pred)
print(cm)
kernelSVM_score = accuracy_score(y_test, y_pred)
print(kernelSVM_score)

[[13  4]
 [ 2 12]]
0.8064516129032258


# Summary

In [7]:
d = {'Algorithms': ['LogisticRegression', 'DecisionTreeClassifier', 'RandomForestClassifier', 'NaiveBayes', 'KernelSVM'], 
     'Accuracy': [logisticRegression_score, DecisionTree_score, RandomForest_score, NaiveBayes_score, kernelSVM_score]}

print(pd.DataFrame(data=d))

               Algorithms  Accuracy
0      LogisticRegression  0.838710
1  DecisionTreeClassifier  0.709677
2  RandomForestClassifier  0.838710
3              NaiveBayes  0.806452
4               KernelSVM  0.806452
